Compare commits

..

441 Commits

Author SHA1 Message Date
Antonio Andelic
8cdcc431fe Fix 2024-09-16 13:30:17 +02:00
Antonio Andelic
187a717872 Merge branch 'master' into keeper-better-ssl-support 2024-09-16 09:17:30 +02:00
Robert Schulze
733c57dae7
Merge pull request #69583 from rschu1ze/better-fix-67476
A better fix for #67476
2024-09-15 09:35:37 +00:00
Nikita Taranov
918ead070a
Merge pull request #69602 from ClickHouse/rm_explicit_initial_announce_from_RFMT
Remove explicit announce from local replica in ReadFromMergeTree
2024-09-14 17:58:41 +00:00
Nikita Taranov
8fd9345d2d fix 2024-09-13 21:26:58 +01:00
Miсhael Stetsyuk
98a2c1c638
Merge pull request #69274 from ClickHouse/fix-metadata-version-in-zookeeper
fix `metadata_version` in ZooKeeper
2024-09-13 17:11:21 +00:00
Nikita Taranov
bab574d674
Merge pull request #69404 from bigo-sg/69135
`have_compressed` is lost in `HashJoin::reuseJoinedData`
2024-09-13 16:39:43 +00:00
Nikita Taranov
57a6a64d8c fix 2024-09-13 17:31:47 +01:00
Nikita Taranov
de78992966
Update tests/integration/test_parallel_replicas_snapshot_from_initiator/test.py
Co-authored-by: Igor Nikonov <954088+devcrafter@users.noreply.github.com>
2024-09-13 18:25:51 +02:00
Nikita Taranov
42670a46d4 impl 2024-09-13 17:00:46 +01:00
Antonio Andelic
9a31fc385d Fixes 2024-09-13 15:58:17 +02:00
Alexander Tokmakov
8111a32c72
Merge pull request #69539 from ClickHouse/evillique-patch-2
Fix attach of ReplicatedMergeTree tables in Replicated databases
2024-09-13 13:06:21 +00:00
Antonio Andelic
492461271b Merge branch 'master' into keeper-better-ssl-support 2024-09-13 14:44:12 +02:00
Michael Stetsyuk
721e9a7356 empty 2024-09-13 12:38:58 +00:00
Michael Stetsyuk
1b1db0081f do not fix metadata_version if replica is read_only 2024-09-13 12:38:25 +00:00
Robert Schulze
51f3245030
A better fix for #67476
Throw an exception instead of silently ignoring two conflicting settings.
2024-09-13 11:45:15 +00:00
Daniil Ivanik
2fce90ab76
Merge pull request #66782 from a-a-f/input_format_json_empty_as_default
Add support for the `input_format_json_empty_as_default` setting
2024-09-13 10:33:45 +00:00
Yarik Briukhovetskyi
e0c4a88f98
Merge pull request #69514 from aiven-sal/aiven-sal/showcolumns
Correctly handle tables' names with dots in SHOW COLUMNS and SHOW INDEX
2024-09-13 10:12:53 +00:00
János Benjamin Antal
d5c3adabde
Merge pull request #69301 from aohoyd/master
Add xml char escaping to docker's entrypoint
2024-09-13 10:11:30 +00:00
lgbo-ustc
08fd6c8ab6 have_compressed is lost in reuseJoinedData 2024-09-13 17:18:27 +08:00
Nikita Taranov
f330fdb1bf
Merge pull request #69406 from canhld94/parallel_join_limit_threads
Not retaining thread in concurrent hash join threadpool
2024-09-13 09:14:48 +00:00
Michael Kolupaev
e1a206c84d
Merge pull request #64953 from ClickHouse/tomic
Disallow creating refreshable MV on Linux < 3.15
2024-09-13 03:42:28 +00:00
Duc Canh Le
59763a937e Merge branch 'master' into parallel_join_limit_threads
Fix CI
2024-09-13 01:47:20 +00:00
Michael Kolupaev
7e99f05981 Merge remote-tracking branch 'origin/master' into tomic 2024-09-12 21:45:09 +00:00
Alexander Gololobov
09f22920d9
Merge pull request #69525 from ClickHouse/backport_mergetask
Backport MergeTask changes from private to minimize merge conflicts
2024-09-12 19:52:10 +00:00
Kruglov Pavel
8610a01745
Merge pull request #69147 from Avogar/fix-s3-cluster-structure
Fix propogating structure argument in s3Cluster
2024-09-12 17:24:26 +00:00
Antonio Andelic
f62cb32c5c
Merge pull request #69332 from ClickHouse/issues/68932/remove-recursive
Support removeRecursive natively in keeper
2024-09-12 17:14:34 +00:00
Kseniia Sumarokova
f50898ae98
Merge pull request #69513 from ClickHouse/s3queue-refactor
S3Queue: small refactoring
2024-09-12 16:56:59 +00:00
Yakov Olkhovskiy
d3533742d5
Merge pull request #68236 from marco-vb/marco-vb/x509-san-support-wildcard
Added support for wildcard usage in x509 SubjectAltName identification.
2024-09-12 16:00:35 +00:00
Nikita Taranov
54f0a8c0e2
Merge pull request #69554 from KevinyhZou/fix_settings_join_to_sort
Fix upgrade check failure for join_to_sort settings
2024-09-12 15:07:17 +00:00
Robert Schulze
c9dd409baa
Merge pull request #69549 from rschu1ze/qc-with-truncated-results
Disallow query cache for queries with non-throw overflow mode
2024-09-12 13:58:38 +00:00
kevinyhzou
2225389474 Fix upgrade check failed for join_to_sort settings 2024-09-12 20:10:42 +08:00
Alexander Gololobov
e15b86cd9d
Merge branch 'master' into backport_mergetask 2024-09-12 13:29:59 +02:00
Alexander Gololobov
bbcde19910
Merge pull request #69383 from ClickHouse/tmp_files_lookup
Refactor temporary file usage in MergeTask with QueryPlan
2024-09-12 10:57:15 +00:00
Ilya Yatsishin
98ef5f6e10
Merge pull request #69253 from ClickHouse/restore-replace-to-null-settings-fix
Restore external tables to Null supports no settings
2024-09-12 10:40:54 +00:00
kssenii
3e3a80e714 Merge remote-tracking branch 'origin/s3queue-refactor' into s3queue-refactor 2024-09-12 12:35:59 +02:00
kssenii
6e94c9f617 Fix 2024-09-12 12:34:56 +02:00
Marco Vilas Boas
185d9401e5
Merge branch 'ClickHouse:master' into marco-vb/x509-san-support-wildcard 2024-09-12 11:08:45 +01:00
Alexey Katsman
d7f271c658
Merge pull request #69269 from alexkats/external-data-checksum
Use another error code for external data in buffer
2024-09-12 09:38:09 +00:00
Robert Schulze
5fe5cc8611
Disallow query cache for queries with non-throw overflow mode 2024-09-12 09:21:27 +00:00
Mikhail Artemenko
494455e4eb Merge branch 'master' into issues/68932/remove-recursive 2024-09-12 08:51:41 +00:00
Alexander Gololobov
8ea2573a9d Merge branch 'master' of github.com:ClickHouse/ClickHouse into backport_mergetask 2024-09-12 09:31:54 +02:00
Alexander Gololobov
856a66d45f Merge branch 'master' of github.com:ClickHouse/ClickHouse into tmp_files_lookup 2024-09-12 09:26:36 +02:00
Yakov Olkhovskiy
fd53b39352
bump 2024-09-12 02:33:15 -04:00
Mikhail f. Shiryaev
8a70017815
Merge pull request #69538 from ClickHouse/fix-latest-sync-commit
Get rid of broken `get_commits().reversed`
2024-09-12 05:34:35 +00:00
Yakov Olkhovskiy
c124533ef9
fix 2024-09-12 00:08:53 -04:00
Yakov Olkhovskiy
98c0f82f28
make it compatible with multiple runs 2024-09-11 20:45:54 -04:00
robot-clickhouse
1da5729f89 Automatic style fix 2024-09-11 21:08:43 +00:00
Nikolay Degterinsky
8e3ba4bd6c
Add test 2024-09-11 22:59:06 +02:00
Igor Nikonov
56b39c37a1
Merge pull request #64448 from ClickHouse/pr-local-plan
Local plan for parallel replicas
2024-09-11 20:44:37 +00:00
Nikolay Degterinsky
b30aabf635
Fix attach of ReplicatedMergeTree tables in Replicated databases 2024-09-11 22:38:54 +02:00
Mikhail f. Shiryaev
370ce5ef63
Get rid of broken get_commits().reversed 2024-09-11 22:37:30 +02:00
Julia Kartseva
b5289c1f08
Merge pull request #60341 from KevinyhZou/improve_join_insert_from
Improve left/inner join performance by rerange right table by keys
2024-09-11 19:04:41 +00:00
Kseniia Sumarokova
b216e1a74b
Merge branch 'master' into s3queue-refactor 2024-09-11 20:56:11 +02:00
Yatsishin Ilya
ccc6390a9e merge remote-tracking branch 'origin/master' into restore-replace-to-null-settings-fix 2024-09-11 18:23:07 +00:00
Antonio Andelic
de4774be74
Merge pull request #69469 from yariks5s/squashing_mem_usage
Fix high memory consumption on Squashing
2024-09-11 16:52:19 +00:00
robot-clickhouse
c0a42ef877 Automatic style fix 2024-09-11 15:42:12 +00:00
Alex Katsman
9a2a6d71d2 Fix check in test_broken_projections 2024-09-11 15:32:26 +00:00
Kseniia Sumarokova
abe8f29bf8
Update test 2024-09-11 17:32:20 +02:00
Alexander Gololobov
c3416859d5 Merge branch 'master' of github.com:ClickHouse/ClickHouse into tmp_files_lookup 2024-09-11 17:22:08 +02:00
Alex Katsman
22ceaa0066 Use another error code for external data in buffer
Use CANNOT_DECOMPRESS error code for the compressed external data in
CompressedReadBufferBase for checksum validation and decompression.
2024-09-11 14:59:31 +00:00
Alexander Gololobov
f69cb73df0
Merge pull request #69167 from ClickHouse/query_plan_for_merge
Use QueryPlan for merge
2024-09-11 14:41:25 +00:00
Robert Schulze
1ce4d6d8e5
Merge pull request #69511 from rschu1ze/icu-comment
CMake: Add comment about ICU data files
2024-09-11 13:58:18 +00:00
Alexander Gololobov
3aef79b5a2 Backport changes from private to minimize merge conflicts 2024-09-11 15:45:55 +02:00
Salvatore Mesoraca
dca72d2e0b Add tests for SHOW INDEX from tables with dots 2024-09-11 15:39:02 +02:00
Salvatore Mesoraca
146edca4f7 Add tests for SHOW COLUMNS from tables with dots 2024-09-11 15:35:48 +02:00
kssenii
a05610c38f Move creation and validation of table metadata before creating queue metadata & save to factory 2024-09-11 15:32:22 +02:00
Antonio Andelic
9b42af7263
Merge pull request #66648 from ClickHouse/read-cgroup-memory-usage-async-metrics
Simplify some memory tracking parts
2024-09-11 13:24:38 +00:00
Salvatore Mesoraca
3556cf92c3 Allow SHOW INDEXES to work with tables that contain dots in the name 2024-09-11 15:15:31 +02:00
Salvatore Mesoraca
075a85f15c Allow SHOW COLUMNS to work with tables that contain dots in the name 2024-09-11 15:15:08 +02:00
Robert Schulze
9e335abe17
CMake: Add comment about ICU data files 2024-09-11 13:06:17 +00:00
Igor Nikonov
c53e165118
Merge branch 'master' into pr-local-plan 2024-09-11 14:03:35 +02:00
Kseniia Sumarokova
4e0b3f8415
Merge pull request #69432 from ClickHouse/add-documnetation-for-azure-queue
Add azure-queue.md
2024-09-11 12:02:19 +00:00
Mikhail f. Shiryaev
22f389d415
Merge pull request #69483 from ClickHouse/logs-export-improvement
Another attempt to address EAGAIN "Resource unavailable"
2024-09-11 13:47:38 +02:00
Kseniia Sumarokova
6f9d1631e2
Update aspell-ignore 2024-09-11 13:11:29 +02:00
Mikhail f. Shiryaev
c1830bc041
Escape the \n in statements 2024-09-11 13:08:58 +02:00
Nikita Taranov
bdddab6a1f
Merge pull request #69449 from ClickHouse/fix_test2
Fix test_disks_app_func
2024-09-11 11:07:18 +00:00
Yarik Briukhovetskyi
af469ffde4
remove sanitizer builds 2024-09-11 12:32:36 +02:00
Mikhail f. Shiryaev
3921f910f5
Another attempt to address EAGAIN "Resource unavailable" 2024-09-11 12:14:16 +02:00
Mikhail Artemenko
55d6672bc4 add early break for test keeper impl 2024-09-11 10:00:40 +00:00
Yarik Briukhovetskyi
fd3bd785b8
Update 03236_squashing_high_memory.sql 2024-09-11 11:17:06 +02:00
Yarik Briukhovetskyi
ccfb7ad38c
Merge pull request #69455 from yariks5s/group_concat_return_string
`groupConcat` consistency with `arrayStringConcat`
2024-09-11 08:59:48 +00:00
Robert Schulze
7a3d69b626
Merge pull request #69451 from rschu1ze/sqid-crash
Fix crash in `sqidDecode`
2024-09-11 08:56:05 +00:00
Duc Canh Le
085be6bf59 remove unwanted include
Signed-off-by: Duc Canh Le <duccanh.le@ahrefs.com>
2024-09-11 06:42:48 +00:00
Yarik Briukhovetskyi
3d36f6dce3
Update 03236_squashing_high_memory.sql 2024-09-10 23:48:32 +02:00
Kseniia Sumarokova
cb1b7dc43c
Merge pull request #69454 from ClickHouse/revert-s3queue-problem
Quick fix for s3queue problem
2024-09-10 21:13:16 +00:00
Alexander Tokmakov
629d27fec8
Merge pull request #69422 from ClickHouse/add_a_setting
Add a setting to disallow DETACH PERMANENTLY in Replicated database
2024-09-10 19:56:10 +00:00
Michael Kolupaev
263cd9e1c3 Fix error message 2024-09-10 19:41:15 +00:00
Michael Kolupaev
5539faa54e Disallow creating refreshable MV on Linux < 3.15 2024-09-10 19:41:15 +00:00
Alexander Gololobov
d4aa06524b Add materialization when building indices in vertical merge 2024-09-10 20:57:55 +02:00
Alexander Gololobov
2f15fcd23f Test with sparse serialization, vertical merge and skip indices 2024-09-10 20:57:03 +02:00
Yarik Briukhovetskyi
63aebfa5d7
Update 03236_squashing_high_memory.sql 2024-09-10 20:15:36 +02:00
Yarik Briukhovetskyi
9b517a939f
Update 03236_squashing_high_memory.sql 2024-09-10 19:53:16 +02:00
Mikhail Artemenko
3e3d652320 refactor tests 2024-09-10 17:52:41 +00:00
Mikhail Artemenko
00fe1d12ee add acl test for rmr 2024-09-10 17:45:39 +00:00
Mikhail Artemenko
3730582d06 add full tree acl check 2024-09-10 17:43:35 +00:00
Antonio Andelic
df1821a579
Update tests/queries/0_stateless/03236_squashing_high_memory.sql 2024-09-10 19:32:28 +02:00
Igor Nikonov
c3dfdd41d3 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-09-10 17:24:44 +00:00
Mikhail f. Shiryaev
d093dd2c71
Merge pull request #69416 from ClickHouse/logs-export-improvement
Hide exported logs statements in GH logs
2024-09-10 19:19:47 +02:00
Nikita Taranov
8534aa07fc
Merge pull request #69400 from ClickHouse/support_more_oss_endpoints
Support more oss endpoints
2024-09-10 17:01:02 +00:00
Nikita Taranov
8fa31c0750
Merge pull request #69385 from ClickHouse/fix
Fix type mismatch
2024-09-10 16:52:57 +00:00
Alexander Tokmakov
b3a5101f34
Merge pull request #69430 from ClickHouse/tavplubix-patch-10
Update 03232_pr_not_ready_set.sql
2024-09-10 16:43:22 +00:00
Alexander Gololobov
bb82465a5f Merge branch 'master' of github.com:ClickHouse/ClickHouse into query_plan_for_merge 2024-09-10 18:27:59 +02:00
Yarik Briukhovetskyi
caab4dd8b9
fix 2024-09-10 18:20:48 +02:00
Kseniia Sumarokova
6a6d26aeed
Update StorageObjectStorageQueue.cpp 2024-09-10 18:16:27 +02:00
Kseniia Sumarokova
b7e863b0e9
Update docs/en/engines/table-engines/integrations/azure-queue.md
Co-authored-by: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
2024-09-10 18:13:04 +02:00
Kseniia Sumarokova
dcd1874520
Update docs/en/engines/table-engines/integrations/azure-queue.md
Co-authored-by: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
2024-09-10 18:12:56 +02:00
Mikhail f. Shiryaev
7727b30f5c
Attempt to fix EAGAIN error 2024-09-10 18:06:32 +02:00
Mikhail Artemenko
8b7a5616a2 update test 2024-09-10 15:51:31 +00:00
Alexander Gololobov
e4761d40ba
Merge pull request #69429 from ClickHouse/materialize_sparse
Make `materialize()` function return full column when parameter is a sparse column
2024-09-10 15:49:29 +00:00
Robert Schulze
080193cfc3
14% more aesthetic code 2024-09-10 15:46:21 +00:00
Yarik Briukhovetskyi
f588e3c31b
rename tests 2024-09-10 17:43:50 +02:00
Yarik Briukhovetskyi
d8e670297b
groupConcat consistency 2024-09-10 17:42:27 +02:00
kssenii
d43264c44e Quick fix for s3queue problem 2024-09-10 17:41:52 +02:00
Robert Schulze
04dcf73e8f
Fix crash in sqidDecode 2024-09-10 15:35:59 +00:00
Kruglov Pavel
d226b13fd4
Merge pull request #69272 from Avogar/json-memory-usage
Reduce memory usage of inserts to JSON by using adaptive write buffer size
2024-09-10 15:23:18 +00:00
Nikita Taranov
acbeaa6d33 fix 2024-09-10 16:14:53 +01:00
Mikhail Artemenko
1aacb48bcf fix collector 2024-09-10 14:34:27 +00:00
Pablo Marcos
a34a544f4a
Merge pull request #67938 from mwoenker/incomplete-utf8-sequence
Handle incomplete sequences at end of input
2024-09-10 14:04:58 +00:00
Michael Stetsyuk
be55e1d2e1 better 2024-09-10 13:52:40 +00:00
kssenii
6719112b02 Add azure-queue.md 2024-09-10 15:10:53 +02:00
Alexander Tokmakov
30cf6ee711
Update 03232_pr_not_ready_set.sql 2024-09-10 14:30:07 +02:00
Mikhail Artemenko
0bd7ef39c4 unify processWatches 2024-09-10 12:27:34 +00:00
Mikhail Artemenko
0453d5e91a small fixes 2024-09-10 12:26:03 +00:00
Alexander Gololobov
a01229bba2 Convert sparse columns to full in materialize() 2024-09-10 14:23:31 +02:00
Alexander Gololobov
41aaf07537 Test materialize() on sparse columns 2024-09-10 14:23:19 +02:00
Nikita Taranov
5ef98cd275
Merge pull request #69413 from ClickHouse/fix_test
Fix 01603_read_with_backoff_bug
2024-09-10 12:20:12 +00:00
Antonio Andelic
bdf8c859b1 Merge branch 'master' into read-cgroup-memory-usage-async-metrics 2024-09-10 14:13:48 +02:00
Alexander Tokmakov
89dd3188bb add a setting to disallow DETACH PERMANENTLY in Replicated 2024-09-10 13:35:18 +02:00
Nikita Fomichev
24d81fe00b
Merge pull request #69394 from ClickHouse/rows-before-aggregation-compatibility
Fix 24.8 setting compatibility `rows_before_aggregation`
2024-09-10 11:33:33 +00:00
Robert Schulze
4d0941072f
Merge pull request #69367 from rschu1ze/bump-libarchive
Bump libarchive to v3.7.4
2024-09-10 11:09:37 +00:00
Igor Nikonov
b0c4f6b9e7 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-09-10 11:06:10 +00:00
Mikhail f. Shiryaev
6e6e705486
Add a hostname to log; we need something to grep remote query_log 2024-09-10 12:56:19 +02:00
Sergei Trifonov
db42cf12fd
Merge pull request #68933 from Blargian/docs_toRelativeXYZ
[Docs] update `toReference*Num` docs
2024-09-10 10:48:24 +00:00
Mikhail f. Shiryaev
8b1f21e7dd
Hide exported logs statements in GH logs 2024-09-10 12:22:19 +02:00
Pablo Marcos
ac3cfef8c1
Merge pull request #69390 from pamarcos/fix-invalid-connection-logical-error
Fix undefined behavior if all connection tries fail
2024-09-10 09:45:37 +00:00
Nikita Taranov
49ea016a3d impl 2024-09-10 10:19:23 +01:00
Daniil Ivanik
25a5768adc
Merge pull request #68952 from Blargian/docs_toDateTime64XYZ
[Docs] update docs for `DateTime64OrZero/Null/Default` functions
2024-09-10 09:15:34 +00:00
Daniil Ivanik
8263815451
Merge pull request #69007 from jww0924/patch-1
[Docs] Add CKibana as third-party GUI
2024-09-10 09:14:37 +00:00
alesapin
41798be63f
Merge pull request #69339 from ClickHouse/cp-mv-docs
Docs: Add cp/mv to keeper client docs
2024-09-10 09:10:32 +00:00
kevinyhzou
597181c45e review 2024-09-10 16:32:52 +08:00
Duc Canh Le
d089a56272 only set max_free_threads_ = 0 when creating pool
Signed-off-by: Duc Canh Le <duccanh.le@ahrefs.com>
2024-09-10 05:51:48 +00:00
Duc Canh Le
11478b949f limit threadpool size in concurrent hash join
Signed-off-by: Duc Canh Le <duccanh.le@ahrefs.com>
2024-09-10 03:12:44 +00:00
Robert Schulze
6464d47d34
Bump to v3.7.4 2024-09-09 20:33:20 +00:00
Robert Schulze
e252bdc30d
Bump to v3.7.3 2024-09-09 20:27:10 +00:00
Igor Nikonov
bb063babff
Merge branch 'master' into pr-local-plan 2024-09-09 21:17:51 +02:00
Nikita Taranov
47589d4898 impl 2024-09-09 19:53:58 +01:00
Alexander Gololobov
38b8edae97 Fix the case with empty temp file 2024-09-09 19:34:59 +02:00
avogar
72dbc8205b Fix unit tests 2024-09-09 15:12:17 +00:00
Pablo Marcos
f7dee10030 Make a more assertive exception text 2024-09-09 14:38:06 +00:00
Alexander Gololobov
4da1e10ac6 Move sorting key calculation step outside the loop 2024-09-09 16:01:00 +02:00
Pablo Marcos
67b57eb89f Fix undefined behavior if all connection tries fail
For some reason, getManyCheckedForInsert was calling getManyImpl with
skip_unavailable_endpoints=nullptr, which resulted in getManyImpl using
the `skip_unavailable_shards` settings. Since this is true by default,
min_entries was set to 0.

Having min_entries set to 0 while using PoolMode::GET_ONE was strange,
to say the least. There was an edge case where if all connection
attempts failed and min_entries was 0, it was returning an empty vector.
That was not considered to be possible by the caller and it was getting
the front entry of an empty vector, causing undefined behavior.

Conclusion: set `skip_unavailable_endpoints=false` explicitly in
getManyCheckedForInsert so that min_entries=1. In case all connections
fail, an exception will be thrown. Also, add some defensive programming
to ensure we never ever try to get the front element of an empty vector.
2024-09-09 14:00:35 +00:00
Pablo Marcos
e6e79c3c4a Fix minor typos 2024-09-09 13:35:45 +00:00
Alexander Gololobov
8c1f434b1a Do column materialization using ActionsDAG::addMaterializingOutputActions instead of a special step 2024-09-09 15:31:43 +02:00
Alexander Gololobov
1bcc4ba823 Renamed ApplyMergeStep into MergePartsStep 2024-09-09 15:30:19 +02:00
Nikita Fomichev
66572aa029 Fix 24.8 setting compatibility rows_before_aggregation
https://github.com/ClickHouse/ClickHouse/pull/66084
2024-09-09 15:25:14 +02:00
imddba
942d0fab4a
spelling fixes 2024-09-09 20:24:16 +08:00
Nikita Taranov
43d057ed0f impl 2024-09-09 13:04:45 +01:00
Alexander Gololobov
5a8bd5b4f5 Encapsulte and cleanup "rows_sources" temp file usage 2024-09-09 13:22:38 +02:00
Alexander Gololobov
7d042be8eb A simple interface to find temporary files by their logical names when building pipeline from query plan 2024-09-09 13:21:00 +02:00
Igor Nikonov
eda6dabe41 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-09-09 09:58:36 +00:00
Mikhail Artemenko
08368d1d65 update keeper client docs 2024-09-09 09:46:17 +00:00
Mikhail Artemenko
6309f36232 add limit option to rmr command 2024-09-09 09:44:19 +00:00
Mikhail Artemenko
2fe6991124 Merge branch 'master' into issues/68932/remove-recursive 2024-09-09 08:55:53 +00:00
Robert Schulze
483dd7eebe
Bump to v3.7.2 2024-09-08 08:55:59 +00:00
Robert Schulze
33866fb5bd
Fix config.h for v3.7.1 2024-09-08 08:38:05 +00:00
Igor Nikonov
42d3cd2b91 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-09-06 20:10:50 +00:00
Igor Nikonov
ebd9c294aa Fix build 2024-09-06 20:09:48 +00:00
Igor Nikonov
b436057cba Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-09-06 18:46:42 +00:00
Mikhail Artemenko
3431e2ec4a add dest to aspell ignore 2024-09-06 16:52:31 +00:00
Mikhail Artemenko
9d07d002f1 update keeper client docs 2024-09-06 16:20:30 +00:00
Mikhail Artemenko
81972b97e7 support test keeper 2024-09-06 13:23:57 +00:00
Mikhail Artemenko
068ada57ba update api 2024-09-06 12:23:57 +00:00
Mikhail Artemenko
cf0e0b766d add test for watches 2024-09-06 09:34:26 +00:00
Mikhail Artemenko
2bbe933531 fix watches logic 2024-09-06 09:33:47 +00:00
Mikhail Artemenko
231a7c97cc move code 2024-09-06 08:34:59 +00:00
kevinyhzou
02e129f0d1 modify test 2024-09-06 10:15:43 +08:00
kevinyhzou
f8b6025e07 rename to allow_experimental_join_right_table_sorting and modify comments 2024-09-06 09:43:32 +08:00
Mikhail Artemenko
aa2721f7e4 update tests 2024-09-06 00:09:46 +00:00
Mikhail Artemenko
cb1c11c74a change traverse 2024-09-06 00:08:08 +00:00
Alexander Gololobov
2df5edc1c1 Merge branch 'master' of github.com:ClickHouse/ClickHouse into query_plan_for_merge 2024-09-05 20:58:47 +02:00
Mikhail Artemenko
c3cc2a3fb1 intro new request type 2024-09-05 16:14:50 +00:00
Miсhael Stetsyuk
c9aedee24f
Merge branch 'master' into fix-metadata-version-in-zookeeper 2024-09-05 16:52:56 +01:00
Michael Stetsyuk
32cfdc98b2 fix metadata_version in keeper 2024-09-05 15:51:37 +00:00
Alexey Olshanskiy
d17b20705a
Add char escaping to docker's entrypoint 2024-09-05 17:55:56 +03:00
Igor Nikonov
536e0808e3
Merge branch 'master' into pr-local-plan 2024-09-05 16:19:47 +02:00
Alexander Gololobov
20eaecc4f3
Fix build 2024-09-05 13:50:26 +02:00
Alexander Gololobov
e1b2952a60
Merge branch 'master' into query_plan_for_merge 2024-09-05 13:32:12 +02:00
Mikhail Artemenko
6455e1dfa1 add ephemerals check 2024-09-05 11:14:43 +00:00
Mikhail Artemenko
ae512fe533 add test for single delete 2024-09-05 11:05:23 +00:00
Mikhail Artemenko
5180e58dca fix collector 2024-09-05 11:01:37 +00:00
avogar
b4ef10ad1c Make better 2024-09-05 08:52:22 +00:00
kevinyhzou
d14e03abad fix tests incompatible and add new test example 2024-09-05 09:54:27 +08:00
Mikhail Artemenko
c6777af485 add remove recursive support 2024-09-04 22:59:18 +00:00
Yatsishin Ilya
ef2dd93418 crazy stuff 2024-09-04 19:43:39 +00:00
avogar
b7b88737ad Treat dynamic/object structure streams as dynamic 2024-09-04 18:37:33 +00:00
avogar
0dad8b088a Fix typo 2024-09-04 18:35:10 +00:00
avogar
68a8b5a3a1 Better 2024-09-04 18:22:20 +00:00
avogar
4b322ee3c5 Reduce memory usage of inserts to JSON by using adaptive write buffer size 2024-09-04 17:12:17 +00:00
Yatsishin Ilya
dff153b59e Null supports no settings 2024-09-04 11:48:22 +00:00
avogar
f926a0fff7 Fix tidy build 2024-09-04 11:25:22 +00:00
Igor Nikonov
3898f52868 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-09-04 11:15:54 +00:00
Mikhail Artemenko
6c57adee7c add remove recursive entry point 2024-09-04 10:38:03 +00:00
kevinyhzou
49548ed4d5 update the description 2024-09-04 16:40:46 +08:00
kevinyhzou
dbf6e6c85e review fix 2024-09-04 16:05:13 +08:00
Igor Nikonov
fc1228e3a4 Update settings history 2024-09-03 20:45:43 +00:00
Igor Nikonov
8629f7e592
Merge branch 'master' into pr-local-plan 2024-09-03 22:43:20 +02:00
Alexander Gololobov
472e6eb856 typo 2024-09-03 17:16:43 +02:00
Alexander Gololobov
8361724539 Build pipeline for next column for prefetching 2024-09-03 17:02:25 +02:00
Alexander Gololobov
a1cec53b7c Fix updateOutputStream and Traits 2024-09-03 14:57:33 +02:00
Alexander Gololobov
6a6935cb84 Cleanup 2024-09-03 14:57:19 +02:00
marco-vb
d6ea08e812 Fixed documentation related to wildcard support. 2024-09-03 12:47:20 +00:00
Igor Nikonov
d0d2509c69
Merge branch 'master' into pr-local-plan 2024-09-03 13:31:36 +02:00
Kruglov Pavel
cec5037c4d
Update Configuration.h 2024-09-03 12:05:26 +02:00
marco-vb
919f51533a Fixed style issues. 2024-09-03 09:37:03 +00:00
Alexander Gololobov
d28cba981c Fix clang_tidy 2024-09-03 08:59:01 +02:00
Alexander Gololobov
48cacd6f31 Use query plan for column vertical merges 2024-09-02 22:36:42 +02:00
Alexander Gololobov
13f4eb3fac Fix for graphite merge mode 2024-09-02 22:24:53 +02:00
avogar
b7e1dda2e0 Remove unneded code 2024-09-02 20:13:44 +00:00
avogar
d281333db2 Better process of object storage arguments 2024-09-02 20:11:00 +00:00
marco-vb
547276780c Removed unnecessary code and restored test certificates. 2024-09-02 17:14:10 +00:00
Alexander Gololobov
7e444136bb Use QueryPlan for horizontal part of merge 2024-09-02 18:27:24 +02:00
Igor Nikonov
db8ce31bb7 Increase flaky check timeout 2024-09-02 14:08:22 +00:00
Igor Nikonov
d064692125 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-09-02 14:06:46 +00:00
Kruglov Pavel
0cb8c9f148
Fix typo 2024-09-02 14:58:09 +02:00
avogar
d7aaf053f9 Fix propogating structure argument in s3Cluster 2024-09-02 12:50:12 +00:00
Igor Nikonov
1d1f41521e
Merge branch 'master' into pr-local-plan 2024-09-02 11:07:07 +02:00
Igor Nikonov
bfa27d6650 Fix 02500_remove_redundant_distinct 2024-08-31 06:11:50 +00:00
Igor Nikonov
81b8f8594c Fix tests 2024-08-30 21:23:07 +00:00
Igor Nikonov
5b4b08b711 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-08-30 20:06:03 +00:00
Igor Nikonov
3c29f27dd2 Fix 03228_pr_subquery_view_order_by 2024-08-30 20:04:43 +00:00
imddba
ad08db39e5
Add CKibana as third-party GUI 2024-08-28 20:05:06 +08:00
Kruglov Pavel
2c0ddd10a0
Fix build 2024-08-28 13:49:48 +02:00
Blargian
6febfc78b3 updates docs for DateTime64OrZero/Null/Default 2024-08-27 14:52:46 +02:00
Blargian
29664d12fc update example of toTime 2024-08-27 12:44:28 +02:00
Blargian
c1c4cabe9f Update toReference*Num function documentation 2024-08-27 12:41:02 +02:00
Kruglov Pavel
bf4b53390b
Merge branch 'master' into input_format_json_empty_as_default 2024-08-26 13:40:48 +02:00
Igor Nikonov
d12aac7d12 Test 03228_pr_subquery_view_order_by
PR + subquery with view and order by
2024-08-23 20:11:33 +00:00
Igor Nikonov
17c1e82bc0 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-08-23 18:27:19 +00:00
Kruglov Pavel
34d02304d3
Merge branch 'master' into input_format_json_empty_as_default 2024-08-21 14:50:36 +02:00
kevinyhzou
add486b62a rebase and reslove conflict 2024-08-20 17:33:08 +08:00
kevinyhzou
cfa4ca6fb1 remove useless code 2024-08-20 15:56:34 +08:00
kevinyhzou
b8e967ff9c add allowReadCaseInsentitive func 2024-08-20 15:56:34 +08:00
kevinyhzou
29c94195e1 add setting tests/performance/all_join_opt.xml 2024-08-20 15:56:34 +08:00
kevinyhzou
37c3f4a870 add threshold for table rows 2024-08-20 15:55:06 +08:00
Igor Nikonov
9fe65f750e Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-08-17 14:44:02 +00:00
Igor Nikonov
6e0a74f74d Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-08-16 19:35:19 +00:00
Igor Nikonov
9dc288faa8 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-08-16 11:07:35 +00:00
Igor Nikonov
29fffd34ac Fix: check if table expression is present 2024-08-16 11:06:20 +00:00
Igor Nikonov
49fc4cb87b Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-08-15 20:48:56 +00:00
Igor Nikonov
5d9f105fe3 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-08-15 14:34:15 +00:00
Igor Nikonov
b4e8c14729 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-08-15 11:12:56 +00:00
Igor Nikonov
a3978b24d0 Fix for subqueries with FINAL 2024-08-15 11:11:49 +00:00
Alexis Arnaud
29bc7cf5d5 post-review changes 2024-08-14 12:03:29 +02:00
Marco Vilas Boas
0a1f61fcbd
Merge branch 'ClickHouse:master' into marco-vb/x509-san-support-wildcard 2024-08-14 10:15:30 +01:00
Igor Nikonov
1a8f45464c Fix min_marks_to_read overflow 2024-08-13 21:05:29 +00:00
Igor Nikonov
62e45a4c8f Test 03222_parallel_replicas_min_marks_to_read_overflow 2024-08-13 21:05:15 +00:00
Alexis Arnaud
a39a4b1080 better 2024-08-13 18:30:43 +02:00
Alexis Arnaud
229fffcd56 post-review changes 2024-08-13 18:30:43 +02:00
Alexis Arnaud
3c586d80c8 post-rebase fixes 2024-08-13 18:30:43 +02:00
Alexis Arnaud
0ff90e4a5c post-review changes 2024-08-13 18:30:43 +02:00
Alexis Arnaud
5d6d378f24 renumbered tests 2024-08-13 18:30:43 +02:00
Alexis Arnaud
14dad25adc trigger build 2024-08-13 18:30:43 +02:00
Alexis Arnaud
906a181b97 fix for clang-tidy 2024-08-13 18:30:43 +02:00
Alexis Arnaud
7b09ec9ccb added input_format_json_empty_as_default to setting changes history 2024-08-13 18:30:43 +02:00
Alexis Arnaud
d5bea37c96 post-review changes 2024-08-13 18:30:43 +02:00
Alexis Arnaud
ad24989b31 slightly better comment 2024-08-13 18:30:43 +02:00
Alexis Arnaud
d36176ad85 Support for the input_format_json_empty_as_default setting 2024-08-13 18:30:43 +02:00
Igor Nikonov
5339717c11 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-08-13 16:16:31 +00:00
Igor Nikonov
4448880bbe Remove settings 03222_pr_final_not_supported 2024-08-13 16:15:28 +00:00
Igor Nikonov
99dcf7e60e Add 03222_pr_final_not_supported.sql 2024-08-13 15:57:09 +00:00
Igor Nikonov
e42b58082b Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-08-13 14:30:33 +00:00
Igor Nikonov
8d6e2e26a5 New test for asan issue with pr 2024-08-13 14:29:52 +00:00
marco-vb
2068fbb157 Added support for single wildcard usage in x509 SubjectAltName user identification (both DNS and URI). 2024-08-12 15:39:52 +00:00
Igor Nikonov
b6f5eb1211 Fix screwed merge 2024-08-11 23:15:19 +00:00
Igor Nikonov
d04db7e26d Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-08-11 20:11:32 +00:00
Antonio Andelic
0d38aa98ce Merge branch 'master' into read-cgroup-memory-usage-async-metrics 2024-08-09 15:44:47 +02:00
Matt Woenker
f8a14e86d8 Add const a few places 2024-08-07 16:32:13 -04:00
Matt Woenker
d09531e48a Rename test files 2024-08-07 16:25:01 -04:00
Matt Woenker
cff7cbac94 Style fix 2024-08-07 16:21:16 -04:00
Matt Woenker
f4ed3f5c6d Add test 2024-08-06 16:14:26 -04:00
Matt Woenker
2a1ee419b4 Fix style issue 2024-08-06 15:53:49 -04:00
Matt Woenker
704ec8dea6 Handle incomplete sequences at end of input 2024-08-06 12:49:23 -04:00
Igor Nikonov
36036c3da9 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-08-01 20:33:41 +00:00
Igor Nikonov
85b8c11175 Fix flaky 0_stateless/02784_parallel_replicas_automatic_decision
- avoid max_threads randomzation
2024-08-01 20:32:03 +00:00
Igor Nikonov
7082a9797e Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-08-01 14:22:26 +00:00
Igor Nikonov
c5a2f3aafb Limit min_marks_for_concurrent_read 2024-08-01 14:21:54 +00:00
Igor Nikonov
936e3d9a21 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-07-31 11:04:01 +00:00
Igor Nikonov
0705bbb42b Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-07-30 16:59:07 +00:00
Igor Nikonov
7d443c6b97 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-07-30 16:27:50 +00:00
Igor Nikonov
bce0f46254 Simplify tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_3.sh
+ enable parallel_replicas_local_plan by default
2024-07-30 16:26:01 +00:00
Igor Nikonov
d711ec118c Fix build 2024-07-30 09:04:59 +00:00
Igor Nikonov
5ffa54bd70 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-07-30 08:00:08 +00:00
Igor Nikonov
9501bc8a75 Fix forgotten conflict 2024-07-29 20:33:06 +00:00
Igor Nikonov
cf27f2665e Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-07-29 20:00:55 +00:00
Igor Nikonov
ce134830b7 Disable pr local plan by default 2024-07-29 17:15:15 +00:00
Antonio Andelic
91e0df33db Merge branch 'master' into read-cgroup-memory-usage-async-metrics 2024-07-26 15:55:42 +02:00
Antonio Andelic
71a5a42f8f Merge branch 'master' into read-cgroup-memory-usage-async-metrics 2024-07-26 11:17:54 +02:00
Antonio Andelic
f449c2fea0 Fix 2024-07-25 08:45:08 +02:00
Igor Nikonov
420075ada0 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-07-24 19:51:47 +00:00
Antonio Andelic
e2e4c8ee0f Better 2024-07-24 10:21:09 +02:00
Antonio Andelic
7e026aec8b Merge branch 'master' into read-cgroup-memory-usage-async-metrics 2024-07-24 09:55:25 +02:00
Antonio Andelic
ec5459a60d Update allocated with resident if no jemalloc 2024-07-24 09:15:34 +02:00
Igor Nikonov
f49412e967 Change local replica position in parallel_replicas cluster 2024-07-23 17:16:40 +00:00
Igor Nikonov
87ad12e5fc Fix typo 2024-07-23 17:16:11 +00:00
Igor Nikonov
6a6147a97f Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-07-23 15:44:18 +00:00
Igor Nikonov
f5630f86e4 Fix: resize pools_to_use correctly (keep local replica in it) 2024-07-23 15:43:25 +00:00
Antonio Andelic
04d80ec276 Fix non-linux build 2024-07-23 16:13:07 +02:00
Antonio Andelic
9d92bcff46 Merge branch 'master' into read-cgroup-memory-usage-async-metrics 2024-07-23 16:09:21 +02:00
Igor Nikonov
217e50d12e Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-07-23 13:38:28 +00:00
Igor Nikonov
842b51c782 Try assigning replica numbers consistently independent of initiator 2024-07-23 13:30:40 +00:00
Antonio Andelic
ade79cfd7a More fixes 2024-07-23 12:27:08 +02:00
Antonio Andelic
5b51a35e01 Add unused variables 2024-07-23 11:31:34 +02:00
Antonio Andelic
5a1b96ac84 Style fix 2024-07-23 10:52:14 +02:00
Antonio Andelic
d78cfd030f Use cgroups as source 2024-07-23 10:33:12 +02:00
Igor Nikonov
fbaca99e3a Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-07-23 06:38:58 +00:00
Igor Nikonov
420716d702 Fix clang-tidy warning 2024-07-23 06:38:37 +00:00
Igor Nikonov
4483f4b389 Better logging for announcement 2024-07-22 16:39:07 +00:00
Igor Nikonov
9900abade6 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-07-22 13:45:31 +00:00
Igor Nikonov
fa0f4543b6 Fix marks multiplier for local replica to avoid overflow with fuzzer 2024-07-22 13:44:25 +00:00
Antonio Andelic
1c3f7d0fd0 Small fix 2024-07-22 15:18:13 +02:00
Antonio Andelic
a3840179cc Merge branch 'master' into read-cgroup-memory-usage-async-metrics 2024-07-22 12:58:36 +02:00
Antonio Andelic
1d1dc9c7ad
Merge branch 'master' into read-cgroup-memory-usage-async-metrics 2024-07-21 10:37:01 +01:00
Antonio Andelic
2147a96475 Better 2024-07-21 11:32:57 +02:00
Igor Nikonov
4753f4cb33 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-07-20 20:54:11 +00:00
Igor Nikonov
4e69cd0d52 Avoid getting callbacks from context for local plan 2024-07-20 20:53:18 +00:00
Igor Nikonov
daabf2275e Remove wrong comment 2024-07-19 21:46:14 +00:00
Igor Nikonov
75f7816c46 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-07-19 08:24:54 +00:00
Igor Nikonov
6db09e884b Simplify code 2024-07-19 08:24:29 +00:00
Antonio Andelic
7d66f400b2 Better 2024-07-18 17:40:17 +02:00
Igor Nikonov
7ad07657a3 Fix 2024-07-18 14:16:44 +00:00
Igor Nikonov
7523dafc06 Fix after incorrect merge conflict resolution 2024-07-18 13:09:07 +00:00
Igor Nikonov
e47fe15968 Simplify condition 2024-07-18 12:22:03 +00:00
Antonio Andelic
05c7dc582a Add some comments 2024-07-18 13:40:03 +02:00
Igor Nikonov
e438f58f21 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-07-18 11:34:04 +00:00
Antonio Andelic
9ec1fd1ab7 Fix non-jemalloc builds 2024-07-18 12:13:44 +02:00
Antonio Andelic
9a43183eb3 Finish background memory thread 2024-07-18 10:40:47 +02:00
Antonio Andelic
c413e7a494 Merge branch 'master' into read-cgroup-memory-usage-async-metrics 2024-07-18 09:01:54 +02:00
Antonio Andelic
21009577d8 Dedicated memory background thread 2024-07-18 09:01:20 +02:00
Antonio Andelic
97f4ec2adb Read cgroup memory usage in async metrics thread 2024-07-17 16:59:35 +02:00
Igor Nikonov
7fba824ed2 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-07-17 11:45:34 +00:00
Igor Nikonov
baf3408b16 Fix: fallback to local plan w/o PR in case of empty table 2024-07-17 11:45:00 +00:00
Igor Nikonov
a3d4fd9246 Fix 02950_parallel_replicas_used_count 2024-07-17 10:01:26 +00:00
Igor Nikonov
fc693cc982 Fix 02982_parallel_replicas_unexpected_cluster 2024-07-16 21:05:27 +00:00
Igor Nikonov
5347754aed Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-07-16 20:03:36 +00:00
Igor Nikonov
08dc1c8c37 Fallback to local execution in case of cluster(shard) has only one node 2024-07-16 19:59:16 +00:00
Igor Nikonov
0056db42be Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-07-16 08:57:57 +00:00
Igor Nikonov
4caf9b6e6f Better diagnostic message formatting 2024-07-16 08:57:15 +00:00
Igor Nikonov
27db36cd4f Separate converting actions in separte source file 2024-07-16 08:46:24 +00:00
Igor Nikonov
69c1e68359 Fix clang-tidy warning: remove include duplicate 2024-07-16 07:48:40 +00:00
Igor Nikonov
65ba0dd669 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-07-15 20:42:25 +00:00
Igor Nikonov
860b18c251 Update test for in-(reverse)-order coordinator 2024-07-15 20:40:55 +00:00
Igor Nikonov
02a2f50916 Fix clang warning 2024-07-15 18:37:19 +00:00
Igor Nikonov
fdbafdb8db Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-07-15 12:04:04 +00:00
Igor Nikonov
261ff133b8 Fix 02784_parallel_replicas_automatic_decision_join 2024-07-15 12:03:12 +00:00
Igor Nikonov
f7befaf68e Polish + comments 2024-07-14 18:46:14 +00:00
Igor Nikonov
9f20e33d50 Fix 2024-07-14 17:52:51 +00:00
Igor Nikonov
c494096d33 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-07-14 16:39:06 +00:00
Igor Nikonov
0c986cca7c Fix in-order coordinator
+ register all replicas for parts in working set
2024-07-14 16:37:00 +00:00
Igor Nikonov
32dc3fe8d1 Simplify code 2024-07-13 21:19:14 +00:00
Igor Nikonov
f42fe31396 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-07-12 21:21:04 +00:00
Igor Nikonov
b952a11f35 Fix 00177_memory_bound_merging 2024-07-12 21:20:23 +00:00
Igor Nikonov
e22d6035fa Cleanup 2024-07-12 14:11:04 +00:00
Igor Nikonov
4e44ecf286 Fixate setting in 02967_parallel_replicas_joins_and_analyzer 2024-07-12 13:52:33 +00:00
Igor Nikonov
6b1a63792d Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-07-12 13:43:34 +00:00
Igor Nikonov
04c3661b0b Randomize parallel_replicas_local_plan 2024-07-12 13:42:25 +00:00
Igor Nikonov
1a8e1eb0fe Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-07-11 11:14:12 +00:00
Igor Nikonov
69652477f4 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-07-11 10:17:28 +00:00
Igor Nikonov
b51e1b27db Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-07-03 07:51:38 +00:00
Igor Nikonov
3b31590477 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-07-02 20:40:04 +00:00
Igor Nikonov
6124772cd8 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-07-02 15:09:40 +00:00
Igor Nikonov
105d39b09f Twick number of threads 2024-07-02 15:06:19 +00:00
Igor Nikonov
6b3750ff83 Clean up 2024-07-01 08:28:54 +00:00
Igor Nikonov
71b0ab94be Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-06-30 20:54:42 +00:00
Igor Nikonov
37fbf905dd Use only local snapshot for in order coordinator 2024-06-30 20:33:43 +00:00
Igor Nikonov
a471716a7f Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-06-28 22:18:07 +00:00
Igor Nikonov
ebcf455f4a Fix: progress bar for reading in order 2024-06-28 22:17:34 +00:00
Igor Nikonov
9b694e611c Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-06-28 14:02:28 +00:00
Igor Nikonov
23d7e0148f Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-06-27 20:31:35 +00:00
Igor Nikonov
372b948d34 Fix: progress bar, reading in order 2024-06-27 20:30:05 +00:00
Igor Nikonov
60153d428d Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-06-26 10:12:42 +00:00
Igor Nikonov
7c6293a747 Fix test_parallel_replicas_no_replicas 2024-06-26 10:12:09 +00:00
Igor Nikonov
3d35d31655 More cleanup 2024-06-25 16:11:04 +00:00
Igor Nikonov
64cfe1628f Cleanup ReadFromMergeTree.h 2024-06-25 15:21:44 +00:00
Igor Nikonov
0eff924475 Use parallel_replicas_local_plan in test 2024-06-25 15:11:02 +00:00
Igor Nikonov
510cb961a1 Fix 2024-06-25 14:47:38 +00:00
Igor Nikonov
318af3af95 Cleanup 2024-06-25 14:31:11 +00:00
Igor Nikonov
96b68cb920 Update setting in 02404_memory_bound_merging 2024-06-25 13:33:36 +00:00
Igor Nikonov
36793bb126 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-06-25 12:30:03 +00:00
Igor Nikonov
f62873b173 Fix 02404_memory_bound_merging 2024-06-25 12:29:21 +00:00
Igor Nikonov
afa4d7fd5c Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-06-24 14:32:05 +00:00
Igor Nikonov
5d91fd9717 Cleanup 2024-06-24 14:31:05 +00:00
Igor Nikonov
9658d37d49 Add parallel_replicas_local_plan to settings history 2024-06-19 11:27:50 +00:00
Igor Nikonov
dba6ea078f Setting parallel_replicas_local_plan 2024-06-18 19:47:07 +00:00
Igor Nikonov
4093a6bacc Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-06-18 13:05:16 +00:00
Igor Nikonov
7c82f45fb0 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-06-18 11:12:40 +00:00
Igor Nikonov
0ecefa6973 Better replicas notation in plan description 2024-06-18 11:07:15 +00:00
Igor Nikonov
366de07856 Remove prefer_localhost_replica 2024-06-18 10:39:50 +00:00
Igor Nikonov
c286188419 Update 00177_memory_bound_merging.reference 2024-06-18 10:39:13 +00:00
Igor Nikonov
81668cc290 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-06-17 12:25:13 +00:00
Igor Nikonov
c87bfe102e Fix: correct local replica exclusion
- fixes 02731_parallel_replicas_join_subquery
2024-06-17 12:24:14 +00:00
Igor Nikonov
70a33e633c Fix 02784_parallel_replicas_automatic_decision 2024-06-14 19:53:04 +00:00
Igor Nikonov
08032e97fd Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-06-14 14:10:54 +00:00
Igor Nikonov
c2d38b3c93 Fix crash 2024-06-14 14:06:12 +00:00
Igor Nikonov
245476b34b Consistent replica id assignment 2024-06-14 12:02:47 +00:00
Igor Nikonov
048cbb17a6 Comment to 02769_parallel_replicas_unavailable_shards.sql 2024-06-13 19:55:30 +00:00
Igor Nikonov
f8afb299f6 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-06-12 10:30:43 +00:00
Igor Nikonov
164b5e47a8 Fix 02769_parallel_replicas_unavailable_shards 2024-06-12 10:30:15 +00:00
Igor Nikonov
eccd56c9b9 Fix style check 2024-06-11 20:16:00 +00:00
Igor Nikonov
da4ed273e3 Fix 02811_parallel_replicas_prewhere_count 2024-06-11 19:24:50 +00:00
Igor Nikonov
f46deb4e79 Fix clang-tidy 2024-06-11 16:49:51 +00:00
Igor Nikonov
f999eed376 Fix 02967_parallel_replicas_join_algo_and_analyzer 2024-06-11 16:49:11 +00:00
Igor Nikonov
a473ecd004 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-06-11 14:38:21 +00:00
Igor Nikonov
d6dec38103 Fix test_parallel_replicas_over_distributed 2024-06-11 14:37:42 +00:00
Igor Nikonov
23ca8608ba Fix 03006_parallel_replicas_prewhere 2024-06-11 13:25:19 +00:00
Igor Nikonov
bb57e0bc9c Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-06-10 18:54:21 +00:00
Igor Nikonov
29d56b61b3 fix 02404_memory_bound_merging 2024-06-07 14:54:00 +00:00
Igor Nikonov
0999501308 Fix crash in in-order coordinator 2024-06-07 14:35:06 +00:00
Igor Nikonov
44fecf66ca Fix 02967_parallel_replicas_joins_and_analyzer 2024-06-06 17:11:42 +00:00
Igor Nikonov
7be90470d5 Fix 02731_parallel_replicas_join_subquery 2024-06-06 16:49:05 +00:00
Igor Nikonov
7c1ffaec95 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-06-06 16:21:56 +00:00
Igor Nikonov
4f37fafe42 Fix 02771_parallel_replicas_analyzer 2024-06-06 15:57:12 +00:00
Igor Nikonov
f8d4aabfe0 Initiliaze working set on pipelie initialization, right after analysis 2024-06-06 14:28:17 +00:00
Igor Nikonov
69009b886f Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-06-05 11:58:42 +00:00
Igor Nikonov
3210d0f471 Simple renaming 2024-06-05 11:56:12 +00:00
Igor Nikonov
f39dbac9a0 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-06-05 06:46:56 +00:00
Igor Nikonov
6476e9ad9b Temporary test fix 2024-06-05 06:45:56 +00:00
Igor Nikonov
eece76bc41 Fix build 2024-06-01 20:53:46 +00:00
Igor Nikonov
2bc236f635 Merge remote-tracking branch 'origin/pr-plan-rewrite' into pr-local-plan 2024-06-01 18:22:03 +00:00
Igor Nikonov
e8a1a800dc Fix crash with JOINs 2024-05-29 20:57:16 +00:00
Igor Nikonov
40aab93db1 Correct replica id for inorder case 2024-05-29 20:55:02 +00:00
Igor Nikonov
e28ca7ebe8 Merge remote-tracking branch 'origin/pr-plan-rewrite' into pr-local-plan 2024-05-29 10:52:59 +00:00
Igor Nikonov
d756729f38 Use local working set for parallel replicas 2024-05-28 21:04:33 +00:00
Igor Nikonov
a9b485a2c1 Disable temporary PR inorder test 2024-05-28 19:05:09 +00:00
Igor Nikonov
4916e3376e Merge remote-tracking branch 'origin/pr-plan-rewrite' into pr-local-plan 2024-05-28 15:13:30 +00:00
Igor Nikonov
0c1bd2e477 Merge remote-tracking branch 'origin/master' into pr-local-plan 2024-05-28 14:35:22 +00:00
Igor Nikonov
1aa5d70d2a Fix style 2024-05-28 13:03:24 +00:00
Igor Nikonov
29346f6073 Init coordinator separately 2024-05-28 12:16:18 +00:00
Igor Nikonov
5cdf8d336c Local reading step from merge tree 2024-05-27 12:22:18 +00:00
Igor Nikonov
be08ebd0f4 Fix 2024-05-24 13:01:30 +00:00
Igor Nikonov
fa8aafa942 Local plan for parallel replicas: save 2024-05-22 21:28:33 +00:00
277 changed files with 6603 additions and 1728 deletions

View File

@ -226,6 +226,13 @@ namespace Crypto
error:
if (pFile)
fclose(pFile);
if (*ppKey)
{
if constexpr (std::is_same_v<K, EVP_PKEY>)
EVP_PKEY_free(*ppKey);
else
EC_KEY_free(*ppKey);
}
throw OpenSSLException("EVPKey::loadKey(string)");
}
@ -287,6 +294,13 @@ namespace Crypto
error:
if (pBIO)
BIO_free(pBIO);
if (*ppKey)
{
if constexpr (std::is_same_v<K, EVP_PKEY>)
EVP_PKEY_free(*ppKey);
else
EC_KEY_free(*ppKey);
}
throw OpenSSLException("EVPKey::loadKey(stream)");
}

View File

@ -481,6 +481,11 @@ if (ARCH_S390X)
else()
set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt75l_dat.S" )
endif()
# ^^ you might be confused how for different little endian platforms (x86, ARM) the same assembly files can be used.
# These files are indeed assembly but they only contain data ('.long' directive), which makes them portable accross CPUs.
# Only the endianness and the character set (ASCII, EBCDIC) makes a difference, also see
# https://unicode-org.github.io/icu/userguide/icu_data/#sharing-icu-data-between-platforms, 'Sharing ICU Data Between Platforms')
# (and as an experiment, try re-generating the data files on x86 vs. ARM, ... you'll get exactly the same files)
set(ICUDATA_SOURCES
"${ICUDATA_SOURCE_FILE}"

2
contrib/libarchive vendored

@ -1 +1 @@
Subproject commit 0c21691b177fac5f4cceca2a1ff2ddfa5d60f51c
Subproject commit 313aa1fa10b657de791e3202c168a6c833bc3543

View File

@ -157,7 +157,7 @@ if (TARGET ch_contrib::zlib)
endif()
if (TARGET ch_contrib::zstd)
target_compile_definitions(_libarchive PUBLIC HAVE_ZSTD_H=1 HAVE_LIBZSTD=1 HAVE_LIBZSTD_COMPRESSOR=1)
target_compile_definitions(_libarchive PUBLIC HAVE_ZSTD_H=1 HAVE_LIBZSTD=1 HAVE_ZSTD_compressStream=1)
target_link_libraries(_libarchive PRIVATE ch_contrib::zstd)
endif()

View File

@ -334,13 +334,16 @@ typedef uint64_t uintmax_t;
/* #undef ARCHIVE_XATTR_LINUX */
/* Version number of bsdcpio */
#define BSDCPIO_VERSION_STRING "3.7.0"
#define BSDCPIO_VERSION_STRING "3.7.4"
/* Version number of bsdtar */
#define BSDTAR_VERSION_STRING "3.7.0"
#define BSDTAR_VERSION_STRING "3.7.4"
/* Version number of bsdcat */
#define BSDCAT_VERSION_STRING "3.7.0"
#define BSDCAT_VERSION_STRING "3.7.4"
/* Version number of bsdunzip */
#define BSDUNZIP_VERSION_STRING "3.7.4"
/* Define to 1 if you have the `acl_create_entry' function. */
/* #undef HAVE_ACL_CREATE_ENTRY */
@ -642,8 +645,8 @@ typedef uint64_t uintmax_t;
/* Define to 1 if you have the `getgrnam_r' function. */
#define HAVE_GETGRNAM_R 1
/* Define to 1 if platform uses `optreset` to reset `getopt` */
#define HAVE_GETOPT_OPTRESET 1
/* Define to 1 if you have the `getline' function. */
#define HAVE_GETLINE 1
/* Define to 1 if you have the `getpid' function. */
#define HAVE_GETPID 1
@ -750,6 +753,12 @@ typedef uint64_t uintmax_t;
/* Define to 1 if you have the `pcreposix' library (-lpcreposix). */
/* #undef HAVE_LIBPCREPOSIX */
/* Define to 1 if you have the `pcre2-8' library (-lpcre2-8). */
/* #undef HAVE_LIBPCRE2 */
/* Define to 1 if you have the `pcreposix' library (-lpcre2posix). */
/* #undef HAVE_LIBPCRE2POSIX */
/* Define to 1 if you have the `xml2' library (-lxml2). */
#define HAVE_LIBXML2 1
@ -765,9 +774,8 @@ typedef uint64_t uintmax_t;
/* Define to 1 if you have the `zstd' library (-lzstd). */
/* #undef HAVE_LIBZSTD */
/* Define to 1 if you have the `zstd' library (-lzstd) with compression
support. */
/* #undef HAVE_LIBZSTD_COMPRESSOR */
/* Define to 1 if you have the ZSTD_compressStream function. */
/* #undef HAVE_ZSTD_compressStream */
/* Define to 1 if you have the <limits.h> header file. */
#define HAVE_LIMITS_H 1
@ -923,6 +931,9 @@ typedef uint64_t uintmax_t;
/* Define to 1 if you have the <pcreposix.h> header file. */
/* #undef HAVE_PCREPOSIX_H */
/* Define to 1 if you have the <pcre2posix.h> header file. */
/* #undef HAVE_PCRE2POSIX_H */
/* Define to 1 if you have the `pipe' function. */
#define HAVE_PIPE 1
@ -1029,6 +1040,12 @@ typedef uint64_t uintmax_t;
/* Define to 1 if you have the `strrchr' function. */
#define HAVE_STRRCHR 1
/* Define to 1 if the system has the type `struct statfs'. */
/* #undef HAVE_STRUCT_STATFS */
/* Define to 1 if `f_iosize' is a member of `struct statfs'. */
/* #undef HAVE_STRUCT_STATFS_F_IOSIZE */
/* Define to 1 if `f_namemax' is a member of `struct statfs'. */
/* #undef HAVE_STRUCT_STATFS_F_NAMEMAX */
@ -1077,6 +1094,9 @@ typedef uint64_t uintmax_t;
/* Define to 1 if you have the `symlink' function. */
#define HAVE_SYMLINK 1
/* Define to 1 if you have the `sysconf' function. */
#define HAVE_SYSCONF 1
/* Define to 1 if you have the <sys/acl.h> header file. */
/* #undef HAVE_SYS_ACL_H */
@ -1276,10 +1296,10 @@ typedef uint64_t uintmax_t;
#define ICONV_CONST
/* Version number of libarchive as a single integer */
#define LIBARCHIVE_VERSION_NUMBER "3007000"
#define LIBARCHIVE_VERSION_NUMBER "3007004"
/* Version number of libarchive */
#define LIBARCHIVE_VERSION_STRING "3.7.0"
#define LIBARCHIVE_VERSION_STRING "3.7.4"
/* Define to 1 if `lstat' dereferences a symlink specified with a trailing
slash. */
@ -1333,7 +1353,7 @@ typedef uint64_t uintmax_t;
#endif /* SAFE_TO_DEFINE_EXTENSIONS */
/* Version number of package */
#define VERSION "3.7.0"
#define VERSION "3.7.4"
/* Number of bits in a file offset, on hosts where this is settable. */
/* #undef _FILE_OFFSET_BITS */

View File

@ -109,7 +109,7 @@ if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CL
<networks>
<ip>::/0</ip>
</networks>
<password>${CLICKHOUSE_PASSWORD}</password>
<password><![CDATA[${CLICKHOUSE_PASSWORD//]]>/]]]]><![CDATA[>}]]></password>
<quota>default</quota>
<access_management>${CLICKHOUSE_ACCESS_MANAGEMENT}</access_management>
</${CLICKHOUSE_USER}>

View File

@ -124,6 +124,8 @@ function setup_logs_replication
check_logs_credentials || return 0
__set_connection_args
echo "My hostname is ${HOSTNAME}"
echo 'Create all configured system logs'
clickhouse-client --query "SYSTEM FLUSH LOGS"
@ -184,7 +186,17 @@ function setup_logs_replication
/^TTL /d
')
echo -e "Creating remote destination table ${table}_${hash} with statement:\n${statement}" >&2
echo -e "Creating remote destination table ${table}_${hash} with statement:" >&2
echo "::group::${table}"
# there's the only way big "$statement" can be printed without causing EAGAIN error
# cat: write error: Resource temporarily unavailable
statement_print="${statement}"
if [ "${#statement_print}" -gt 4000 ]; then
statement_print="${statement::1999}\n…\n${statement:${#statement}-1999}"
fi
echo -e "$statement_print"
echo "::endgroup::"
echo "$statement" | clickhouse-client --database_replicated_initial_query_timeout_sec=10 \
--distributed_ddl_task_timeout=30 --distributed_ddl_output_mode=throw_only_active \

View File

@ -0,0 +1,72 @@
---
slug: /en/engines/table-engines/integrations/azure-queue
sidebar_position: 181
sidebar_label: AzureQueue
---
# AzureQueue Table Engine
This engine provides an integration with [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs) ecosystem, allowing streaming data import.
## Create Table {#creating-a-table}
``` sql
CREATE TABLE test (name String, value UInt32)
ENGINE = AzureQueue(...)
[SETTINGS]
[mode = '',]
[after_processing = 'keep',]
[keeper_path = '',]
...
```
**Engine parameters**
`AzureQueue` parameters are the same as `AzureBlobStorage` table engine supports. See parameters section [here](../../../engines/table-engines/integrations/azureBlobStorage.md).
**Example**
```sql
CREATE TABLE azure_queue_engine_table (name String, value UInt32)
ENGINE=AzureQueue('DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1/data/')
SETTINGS
mode = 'unordered'
```
## Settings {#settings}
The set of supported settings is the same as for `S3Queue` table engine, but without `s3queue_` prefix. See [full list of settings settings](../../../engines/table-engines/integrations/s3queue.md#settings).
## Description {#description}
`SELECT` is not particularly useful for streaming import (except for debugging), because each file can be imported only once. It is more practical to create real-time threads using [materialized views](../../../sql-reference/statements/create/view.md). To do this:
1. Use the engine to create a table for consuming from specified path in S3 and consider it a data stream.
2. Create a table with the desired structure.
3. Create a materialized view that converts data from the engine and puts it into a previously created table.
When the `MATERIALIZED VIEW` joins the engine, it starts collecting data in the background.
Example:
``` sql
CREATE TABLE azure_queue_engine_table (name String, value UInt32)
ENGINE=AzureQueue('<endpoint>', 'CSV', 'gzip')
SETTINGS
mode = 'unordered';
CREATE TABLE stats (name String, value UInt32)
ENGINE = MergeTree() ORDER BY name;
CREATE MATERIALIZED VIEW consumer TO stats
AS SELECT name, value FROM azure_queue_engine_table;
SELECT * FROM stats ORDER BY name;
```
## Virtual columns {#virtual-columns}
- `_path` — Path to the file.
- `_file` — Name of the file.
For more information about virtual columns see [here](../../../engines/table-engines/index.md#table_engines-virtual_columns).

View File

@ -35,7 +35,7 @@ CREATE TABLE s3_engine_table (name String, value UInt32)
[SETTINGS ...]
```
### Engine parameters
### Engine parameters {#parameters}
- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [below](#wildcards-in-path).
- `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed.

View File

@ -5,6 +5,7 @@ sidebar_label: S3Queue
---
# S3Queue Table Engine
This engine provides integration with [Amazon S3](https://aws.amazon.com/s3/) ecosystem and allows streaming import. This engine is similar to the [Kafka](../../../engines/table-engines/integrations/kafka.md), [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) engines, but provides S3-specific features.
## Create Table {#creating-a-table}
@ -16,27 +17,25 @@ CREATE TABLE s3_queue_engine_table (name String, value UInt32)
[mode = '',]
[after_processing = 'keep',]
[keeper_path = '',]
[s3queue_loading_retries = 0,]
[s3queue_processing_threads_num = 1,]
[s3queue_enable_logging_to_s3queue_log = 0,]
[s3queue_polling_min_timeout_ms = 1000,]
[s3queue_polling_max_timeout_ms = 10000,]
[s3queue_polling_backoff_ms = 0,]
[s3queue_tracked_file_ttl_sec = 0,]
[s3queue_tracked_files_limit = 1000,]
[s3queue_cleanup_interval_min_ms = 10000,]
[s3queue_cleanup_interval_max_ms = 30000,]
[loading_retries = 0,]
[processing_threads_num = 1,]
[enable_logging_to_s3queue_log = 0,]
[polling_min_timeout_ms = 1000,]
[polling_max_timeout_ms = 10000,]
[polling_backoff_ms = 0,]
[tracked_file_ttl_sec = 0,]
[tracked_files_limit = 1000,]
[cleanup_interval_min_ms = 10000,]
[cleanup_interval_max_ms = 30000,]
```
Starting with `24.7` settings without `s3queue_` prefix are also supported.
:::warning
Before `24.7`, it is required to use `s3queue_` prefix for all settings apart from `mode`, `after_processing` and `keeper_path`.
:::
**Engine parameters**
- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [below](#wildcards-in-path).
- `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed.
- `format` — The [format](../../../interfaces/formats.md#formats) of the file.
- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3).
- `compression` — Compression type. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Parameter is optional. By default, it will autodetect compression by file extension.
`S3Queue` parameters are the same as `S3` table engine supports. See parameters section [here](../../../engines/table-engines/integrations/s3.md#parameters).
**Example**

View File

@ -1396,6 +1396,7 @@ SELECT * FROM json_each_row_nested
- [input_format_json_ignore_unknown_keys_in_named_tuple](/docs/en/operations/settings/settings-formats.md/#input_format_json_ignore_unknown_keys_in_named_tuple) - ignore unknown keys in json object for named tuples. Default value - `false`.
- [input_format_json_compact_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_json_compact_allow_variable_number_of_columns) - allow variable number of columns in JSONCompact/JSONCompactEachRow format, ignore extra columns and use default values on missing columns. Default value - `false`.
- [input_format_json_throw_on_bad_escape_sequence](/docs/en/operations/settings/settings-formats.md/#input_format_json_throw_on_bad_escape_sequence) - throw an exception if JSON string contains bad escape sequence. If disabled, bad escape sequences will remain as is in the data. Default value - `true`.
- [input_format_json_empty_as_default](/docs/en/operations/settings/settings-formats.md/#input_format_json_empty_as_default) - treat empty fields in JSON input as default values. Default value - `false`. For complex default expressions [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings-formats.md/#input_format_defaults_for_omitted_fields) must be enabled too.
- [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_integers) - controls quoting of 64-bit integers in JSON output format. Default value - `true`.
- [output_format_json_quote_64bit_floats](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_floats) - controls quoting of 64-bit floats in JSON output format. Default value - `false`.
- [output_format_json_quote_denormals](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_denormals) - enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format. Default value - `false`.

View File

@ -233,6 +233,16 @@ Features:
- Useful tools: Zookeeper data exploration, query EXPLAIN, kill queries, etc.
- Visualization metric charts: queries and resource usage, number of merges/mutation, merge performance, query performance, etc.
### CKibana {#ckibana}
[CKibana](https://github.com/TongchengOpenSource/ckibana) is a lightweight service that allows you to effortlessly search, explore, and visualize ClickHouse data using the native Kibana UI.
Features:
- Translates chart requests from the native Kibana UI into ClickHouse query syntax.
- Supports advanced features such as sampling and caching to enhance query performance.
- Minimizes the learning cost for users after migrating from ElasticSearch to ClickHouse.
## Commercial {#commercial}
### DataGrip {#datagrip}

View File

@ -6,7 +6,7 @@ import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.m
<SelfManaged />
[SSL 'strict' option](../server-configuration-parameters/settings.md#server_configuration_parameters-openssl) enables mandatory certificate validation for the incoming connections. In this case, only connections with trusted certificates can be established. Connections with untrusted certificates will be rejected. Thus, certificate validation allows to uniquely authenticate an incoming connection. `Common Name` or `subjectAltName extension` field of the certificate is used to identify the connected user. This allows to associate multiple certificates with the same user. Additionally, reissuing and revoking of the certificates does not affect the ClickHouse configuration.
[SSL 'strict' option](../server-configuration-parameters/settings.md#server_configuration_parameters-openssl) enables mandatory certificate validation for the incoming connections. In this case, only connections with trusted certificates can be established. Connections with untrusted certificates will be rejected. Thus, certificate validation allows to uniquely authenticate an incoming connection. `Common Name` or `subjectAltName extension` field of the certificate is used to identify the connected user. `subjectAltName extension` supports the usage of one wildcard '*' in the server configuration. This allows to associate multiple certificates with the same user. Additionally, reissuing and revoking of the certificates does not affect the ClickHouse configuration.
To enable SSL certificate authentication, a list of `Common Name`'s or `Subject Alt Name`'s for each ClickHouse user must be specified in the settings file `users.xml `:
@ -30,6 +30,12 @@ To enable SSL certificate authentication, a list of `Common Name`'s or `Subject
</ssl_certificates>
<!-- Other settings -->
</user_name_2>
<user_name_3>
<ssl_certificates>
<!-- Wildcard support -->
<subject_alt_name>URI:spiffe://foo.com/*/bar</subject_alt_name>
</ssl_certificates>
</user_name_3>
</users>
</clickhouse>
```

View File

@ -752,6 +752,17 @@ Possible values:
Default value: 0.
### input_format_json_empty_as_default {#input_format_json_empty_as_default}
When enabled, replace empty input fields in JSON with default values. For complex default expressions `input_format_defaults_for_omitted_fields` must be enabled too.
Possible values:
+ 0 — Disable.
+ 1 — Enable.
Default value: 0.
## TSV format settings {#tsv-format-settings}
### input_format_tsv_empty_as_default {#input_format_tsv_empty_as_default}

View File

@ -47,13 +47,15 @@ keeper foo bar
- `ls '[path]'` -- Lists the nodes for the given path (default: cwd)
- `cd '[path]'` -- Changes the working path (default `.`)
- `cp '<src>' '<dest>'` -- Copies 'src' node to 'dest' path
- `mv '<src>' '<dest>'` -- Moves 'src' node to the 'dest' path
- `exists '<path>'` -- Returns `1` if node exists, `0` otherwise
- `set '<path>' <value> [version]` -- Updates the node's value. Only updates if version matches (default: -1)
- `create '<path>' <value> [mode]` -- Creates new node with the set value
- `touch '<path>'` -- Creates new node with an empty string as value. Doesn't throw an exception if the node already exists
- `get '<path>'` -- Returns the node's value
- `rm '<path>' [version]` -- Removes the node only if version matches (default: -1)
- `rmr '<path>'` -- Recursively deletes path. Confirmation required
- `rmr '<path>' [limit]` -- Recursively deletes path if the subtree size is smaller than the limit. Confirmation required (default limit = 100)
- `flwc <command>` -- Executes four-letter-word command
- `help` -- Prints this message
- `get_direct_children_number '[path]'` -- Get numbers of direct children nodes under a specific path

View File

@ -1636,37 +1636,329 @@ SELECT toStartOfInterval(toDateTime('2023-01-01 14:45:00'), INTERVAL 1 MINUTE, t
Converts a date with time to a certain fixed date, while preserving the time.
**Syntax**
```sql
toTime(date[,timezone])
```
**Arguments**
- `date` — Date to convert to a time. [Date](../data-types/date.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md).
- `timezone` (optional) — Timezone for the returned value. [String](../data-types/string.md).
**Returned value**
- DateTime with date equated to `1970-01-02` while preserving the time. [DateTime](../data-types/datetime.md).
:::note
If the `date` input argument contained sub-second components,
they will be dropped in the returned `DateTime` value with second-accuracy.
:::
**Example**
Query:
```sql
SELECT toTime(toDateTime64('1970-12-10 01:20:30.3000',3)) AS result, toTypeName(result);
```
Result:
```response
┌──────────────result─┬─toTypeName(result)─┐
│ 1970-01-02 01:20:30 │ DateTime │
└─────────────────────┴────────────────────┘
```
## toRelativeYearNum
Converts a date, or date with time, to the number of the year, starting from a certain fixed point in the past.
Converts a date, or date with time, to the number of years elapsed since a certain fixed point in the past.
**Syntax**
```sql
toRelativeYearNum(date)
```
**Arguments**
- `date` — Date or date with time. [Date](../data-types/date.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md).
**Returned value**
- The number of years from a fixed reference point in the past. [UInt16](../data-types/int-uint.md).
**Example**
Query:
```sql
SELECT
toRelativeYearNum(toDate('2002-12-08')) AS y1,
toRelativeYearNum(toDate('2010-10-26')) AS y2
```
Result:
```response
┌───y1─┬───y2─┐
│ 2002 │ 2010 │
└──────┴──────┘
```
## toRelativeQuarterNum
Converts a date, or date with time, to the number of the quarter, starting from a certain fixed point in the past.
Converts a date, or date with time, to the number of quarters elapsed since a certain fixed point in the past.
**Syntax**
```sql
toRelativeQuarterNum(date)
```
**Arguments**
- `date` — Date or date with time. [Date](../data-types/date.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md).
**Returned value**
- The number of quarters from a fixed reference point in the past. [UInt32](../data-types/int-uint.md).
**Example**
Query:
```sql
SELECT
toRelativeQuarterNum(toDate('1993-11-25')) AS q1,
toRelativeQuarterNum(toDate('2005-01-05')) AS q2
```
Result:
```response
┌───q1─┬───q2─┐
│ 7975 │ 8020 │
└──────┴──────┘
```
## toRelativeMonthNum
Converts a date, or date with time, to the number of the month, starting from a certain fixed point in the past.
Converts a date, or date with time, to the number of months elapsed since a certain fixed point in the past.
**Syntax**
```sql
toRelativeMonthNum(date)
```
**Arguments**
- `date` — Date or date with time. [Date](../data-types/date.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md).
**Returned value**
- The number of months from a fixed reference point in the past. [UInt32](../data-types/int-uint.md).
**Example**
Query:
```sql
SELECT
toRelativeMonthNum(toDate('2001-04-25')) AS m1,
toRelativeMonthNum(toDate('2009-07-08')) AS m2
```
Result:
```response
┌────m1─┬────m2─┐
│ 24016 │ 24115 │
└───────┴───────┘
```
## toRelativeWeekNum
Converts a date, or date with time, to the number of the week, starting from a certain fixed point in the past.
Converts a date, or date with time, to the number of weeks elapsed since a certain fixed point in the past.
**Syntax**
```sql
toRelativeWeekNum(date)
```
**Arguments**
- `date` — Date or date with time. [Date](../data-types/date.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md).
**Returned value**
- The number of weeks from a fixed reference point in the past. [UInt32](../data-types/int-uint.md).
**Example**
Query:
```sql
SELECT
toRelativeWeekNum(toDate('2000-02-29')) AS w1,
toRelativeWeekNum(toDate('2001-01-12')) AS w2
```
Result:
```response
┌───w1─┬───w2─┐
│ 1574 │ 1619 │
└──────┴──────┘
```
## toRelativeDayNum
Converts a date, or date with time, to the number of the day, starting from a certain fixed point in the past.
Converts a date, or date with time, to the number of days elapsed since a certain fixed point in the past.
**Syntax**
```sql
toRelativeDayNum(date)
```
**Arguments**
- `date` — Date or date with time. [Date](../data-types/date.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md).
**Returned value**
- The number of days from a fixed reference point in the past. [UInt32](../data-types/int-uint.md).
**Example**
Query:
```sql
SELECT
toRelativeDayNum(toDate('1993-10-05')) AS d1,
toRelativeDayNum(toDate('2000-09-20')) AS d2
```
Result:
```response
┌───d1─┬────d2─┐
│ 8678 │ 11220 │
└──────┴───────┘
```
## toRelativeHourNum
Converts a date, or date with time, to the number of the hour, starting from a certain fixed point in the past.
Converts a date, or date with time, to the number of hours elapsed since a certain fixed point in the past.
**Syntax**
```sql
toRelativeHourNum(date)
```
**Arguments**
- `date` — Date or date with time. [Date](../data-types/date.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md).
**Returned value**
- The number of hours from a fixed reference point in the past. [UInt32](../data-types/int-uint.md).
**Example**
Query:
```sql
SELECT
toRelativeHourNum(toDateTime('1993-10-05 05:20:36')) AS h1,
toRelativeHourNum(toDateTime('2000-09-20 14:11:29')) AS h2
```
Result:
```response
┌─────h1─┬─────h2─┐
│ 208276 │ 269292 │
└────────┴────────┘
```
## toRelativeMinuteNum
Converts a date, or date with time, to the number of the minute, starting from a certain fixed point in the past.
Converts a date, or date with time, to the number of minutes elapsed since a certain fixed point in the past.
**Syntax**
```sql
toRelativeMinuteNum(date)
```
**Arguments**
- `date` — Date or date with time. [Date](../data-types/date.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md).
**Returned value**
- The number of minutes from a fixed reference point in the past. [UInt32](../data-types/int-uint.md).
**Example**
Query:
```sql
SELECT
toRelativeMinuteNum(toDateTime('1993-10-05 05:20:36')) AS m1,
toRelativeMinuteNum(toDateTime('2000-09-20 14:11:29')) AS m2
```
Result:
```response
┌───────m1─┬───────m2─┐
│ 12496580 │ 16157531 │
└──────────┴──────────┘
```
## toRelativeSecondNum
Converts a date, or date with time, to the number of the second, starting from a certain fixed point in the past.
Converts a date, or date with time, to the number of the seconds elapsed since a certain fixed point in the past.
**Syntax**
```sql
toRelativeSecondNum(date)
```
**Arguments**
- `date` — Date or date with time. [Date](../data-types/date.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md).
**Returned value**
- The number of seconds from a fixed reference point in the past. [UInt32](../data-types/int-uint.md).
**Example**
Query:
```sql
SELECT
toRelativeSecondNum(toDateTime('1993-10-05 05:20:36')) AS s1,
toRelativeSecondNum(toDateTime('2000-09-20 14:11:29')) AS s2
```
Result:
```response
┌────────s1─┬────────s2─┐
│ 749794836 │ 969451889 │
└───────────┴───────────┘
```
## toISOYear

View File

@ -3906,7 +3906,7 @@ Result:
## toDateTime64
Converts the argument to the [DateTime64](../data-types/datetime64.md) data type.
Converts an input value to a value of type [DateTime64](../data-types/datetime64.md).
**Syntax**
@ -3918,7 +3918,7 @@ toDateTime64(expr, scale, [timezone])
- `expr` — The value. [String](../data-types/string.md), [UInt32](../data-types/int-uint.md), [Float](../data-types/float.md) or [DateTime](../data-types/datetime.md).
- `scale` - Tick size (precision): 10<sup>-precision</sup> seconds. Valid range: [ 0 : 9 ].
- `timezone` - Time zone of the specified datetime64 object.
- `timezone` (optional) - Time zone of the specified datetime64 object.
**Returned value**
@ -3977,10 +3977,137 @@ SELECT toDateTime64('2019-01-01 00:00:00', 3, 'Asia/Istanbul') AS value, toTypeN
## toDateTime64OrZero
Like [toDateTime64](#todatetime64), this function converts an input value to a value of type [DateTime64](../data-types/datetime64.md) but returns the min value of [DateTime64](../data-types/datetime64.md) if an invalid argument is received.
**Syntax**
``` sql
toDateTime64OrZero(expr, scale, [timezone])
```
**Arguments**
- `expr` — The value. [String](../data-types/string.md), [UInt32](../data-types/int-uint.md), [Float](../data-types/float.md) or [DateTime](../data-types/datetime.md).
- `scale` - Tick size (precision): 10<sup>-precision</sup> seconds. Valid range: [ 0 : 9 ].
- `timezone` (optional) - Time zone of the specified DateTime64 object.
**Returned value**
- A calendar date and time of day, with sub-second precision, otherwise the minimum value of `DateTime64`: `1970-01-01 01:00:00.000`. [DateTime64](../data-types/datetime64.md).
**Example**
Query:
```sql
SELECT toDateTime64OrZero('2008-10-12 00:00:00 00:30:30', 3) AS invalid_arg
```
Result:
```response
┌─────────────invalid_arg─┐
│ 1970-01-01 01:00:00.000 │
└─────────────────────────┘
```
**See also**
- [toDateTime64](#todatetime64).
- [toDateTime64OrNull](#todatetime64ornull).
- [toDateTime64OrDefault](#todatetime64ordefault).
## toDateTime64OrNull
Like [toDateTime64](#todatetime64), this function converts an input value to a value of type [DateTime64](../data-types/datetime64.md) but returns `NULL` if an invalid argument is received.
**Syntax**
``` sql
toDateTime64OrNull(expr, scale, [timezone])
```
**Arguments**
- `expr` — The value. [String](../data-types/string.md), [UInt32](../data-types/int-uint.md), [Float](../data-types/float.md) or [DateTime](../data-types/datetime.md).
- `scale` - Tick size (precision): 10<sup>-precision</sup> seconds. Valid range: [ 0 : 9 ].
- `timezone` (optional) - Time zone of the specified DateTime64 object.
**Returned value**
- A calendar date and time of day, with sub-second precision, otherwise `NULL`. [DateTime64](../data-types/datetime64.md)/[NULL](../data-types/nullable.md).
**Example**
Query:
```sql
SELECT
toDateTime64OrNull('1976-10-18 00:00:00.30', 3) AS valid_arg,
toDateTime64OrNull('1976-10-18 00:00:00 30', 3) AS invalid_arg
```
Result:
```response
┌───────────────valid_arg─┬─invalid_arg─┐
│ 1976-10-18 00:00:00.300 │ ᴺᵁᴸᴸ │
└─────────────────────────┴─────────────┘
```
**See also**
- [toDateTime64](#todatetime64).
- [toDateTime64OrZero](#todatetime64orzero).
- [toDateTime64OrDefault](#todatetime64ordefault).
## toDateTime64OrDefault
Like [toDateTime64](#todatetime64), this function converts an input value to a value of type [DateTime64](../data-types/datetime64.md),
but returns either the default value of [DateTime64](../data-types/datetime64.md)
or the provided default if an invalid argument is received.
**Syntax**
``` sql
toDateTime64OrNull(expr, scale, [timezone, default])
```
**Arguments**
- `expr` — The value. [String](../data-types/string.md), [UInt32](../data-types/int-uint.md), [Float](../data-types/float.md) or [DateTime](../data-types/datetime.md).
- `scale` - Tick size (precision): 10<sup>-precision</sup> seconds. Valid range: [ 0 : 9 ].
- `timezone` (optional) - Time zone of the specified DateTime64 object.
- `default` (optional) - Default value to return if an invalid argument is received. [DateTime64](../data-types/datetime64.md).
**Returned value**
- A calendar date and time of day, with sub-second precision, otherwise the minimum value of `DateTime64` or the `default` value if provided. [DateTime64](../data-types/datetime64.md).
**Example**
Query:
```sql
SELECT
toDateTime64OrDefault('1976-10-18 00:00:00 30', 3) AS invalid_arg,
toDateTime64OrDefault('1976-10-18 00:00:00 30', 3, 'UTC', toDateTime64('2001-01-01 00:00:00.00',3)) AS invalid_arg_with_default
```
Result:
```response
┌─────────────invalid_arg─┬─invalid_arg_with_default─┐
│ 1970-01-01 01:00:00.000 │ 2000-12-31 23:00:00.000 │
└─────────────────────────┴──────────────────────────┘
```
**See also**
- [toDateTime64](#todatetime64).
- [toDateTime64OrZero](#todatetime64orzero).
- [toDateTime64OrNull](#todatetime64ornull).
## toDecimal32
Converts an input value to a value of type [`Decimal(9, S)`](../data-types/decimal.md) with scale of `S`. Throws an exception in case of an error.

View File

@ -506,14 +506,23 @@ bool RMRCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & nod
return false;
node->args.push_back(std::move(path));
ASTPtr remove_nodes_limit;
if (ParserUnsignedInteger{}.parse(pos, remove_nodes_limit, expected))
node->args.push_back(remove_nodes_limit->as<ASTLiteral &>().value);
else
node->args.push_back(UInt64(100));
return true;
}
void RMRCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
{
String path = client->getAbsolutePath(query->args[0].safeGet<String>());
UInt64 remove_nodes_limit = query->args[1].safeGet<UInt64>();
client->askConfirmation(
"You are going to recursively delete path " + path, [client, path] { client->zookeeper->removeRecursive(path); });
"You are going to recursively delete path " + path,
[client, path, remove_nodes_limit] { client->zookeeper->removeRecursive(path, static_cast<UInt32>(remove_nodes_limit)); });
}
bool ReconfigCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, DB::Expected & expected) const

View File

@ -184,7 +184,7 @@ class RMRCommand : public IKeeperClientCommand
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "{} <path> -- Recursively deletes path. Confirmation required"; }
String getHelpMessage() const override { return "{} <path> [limit] -- Recursively deletes path if the subtree size is smaller than the limit. Confirmation required (default limit = 100)"; }
};
class ReconfigCommand : public IKeeperClientCommand

View File

@ -11,6 +11,7 @@
#include <Core/ServerUUID.h>
#include <Common/logger_useful.h>
#include <Common/CgroupsMemoryUsageObserver.h>
#include <Common/MemoryWorker.h>
#include <Common/ErrorHandlers.h>
#include <Common/assertProcessUserMatchesDataOwner.h>
#include <Common/makeSocketAddress.h>
@ -384,6 +385,9 @@ try
LOG_INFO(log, "Background threads finished in {} ms", watch.elapsedMilliseconds());
});
MemoryWorker memory_worker(config().getUInt64("memory_worker_period_ms", 0));
memory_worker.start();
static ServerErrorHandler error_handler;
Poco::ErrorHandler::set(&error_handler);
@ -425,8 +429,9 @@ try
for (const auto & server : *servers)
metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads(), server.refusedConnections()});
return metrics;
}
);
},
/*update_jemalloc_epoch_=*/memory_worker.getSource() != MemoryWorker::MemoryUsageSource::Jemalloc,
/*update_rss_=*/memory_worker.getSource() == MemoryWorker::MemoryUsageSource::None);
std::vector<std::string> listen_hosts = DB::getMultipleValuesFromConfig(config(), "", "listen_host");
@ -655,7 +660,6 @@ try
GWPAsan::initFinished();
#endif
LOG_INFO(log, "Ready for connections.");
waitForTerminationRequest();

View File

@ -11,7 +11,6 @@
#include <Poco/Util/HelpFormatter.h>
#include <Poco/Environment.h>
#include <Poco/Config.h>
#include <Common/Jemalloc.h>
#include <Common/scope_guard_safe.h>
#include <Common/logger_useful.h>
#include <base/phdr_cache.h>
@ -25,6 +24,7 @@
#include <base/Numa.h>
#include <Common/PoolId.h>
#include <Common/MemoryTracker.h>
#include <Common/MemoryWorker.h>
#include <Common/ClickHouseRevision.h>
#include <Common/DNSResolver.h>
#include <Common/CgroupsMemoryUsageObserver.h>
@ -111,6 +111,8 @@
#include <filesystem>
#include <unordered_set>
#include <Common/Jemalloc.h>
#include "config.h"
#include <Common/config_version.h>
@ -449,9 +451,12 @@ void checkForUsersNotInMainConfig(
}
}
namespace
{
/// Unused in other builds
#if defined(OS_LINUX)
static String readLine(const String & path)
String readLine(const String & path)
{
ReadBufferFromFile in(path);
String contents;
@ -459,7 +464,7 @@ static String readLine(const String & path)
return contents;
}
static int readNumber(const String & path)
int readNumber(const String & path)
{
ReadBufferFromFile in(path);
int result;
@ -469,7 +474,7 @@ static int readNumber(const String & path)
#endif
static void sanityChecks(Server & server)
void sanityChecks(Server & server)
{
std::string data_path = getCanonicalPath(server.config().getString("path", DBMS_DEFAULT_PATH));
std::string logs_path = server.config().getString("logger.log", "");
@ -590,6 +595,8 @@ static void sanityChecks(Server & server)
}
}
}
void loadStartupScripts(const Poco::Util::AbstractConfiguration & config, ContextMutablePtr context, Poco::Logger * log)
{
try
@ -906,6 +913,8 @@ try
LOG_INFO(log, "Background threads finished in {} ms", watch.elapsedMilliseconds());
});
MemoryWorker memory_worker(global_context->getServerSettings().memory_worker_period_ms);
/// This object will periodically calculate some metrics.
ServerAsynchronousMetrics async_metrics(
global_context,
@ -924,8 +933,9 @@ try
for (const auto & server : servers)
metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads(), server.refusedConnections()});
return metrics;
}
);
},
/*update_jemalloc_epoch_=*/memory_worker.getSource() != MemoryWorker::MemoryUsageSource::Jemalloc,
/*update_rss_=*/memory_worker.getSource() == MemoryWorker::MemoryUsageSource::None);
/// NOTE: global context should be destroyed *before* GlobalThreadPool::shutdown()
/// Otherwise GlobalThreadPool::shutdown() will hang, since Context holds some threads.
@ -1204,6 +1214,8 @@ try
FailPointInjection::enableFromGlobalConfig(config());
memory_worker.start();
int default_oom_score = 0;
#if !defined(NDEBUG)
@ -1547,15 +1559,6 @@ try
total_memory_tracker.setDescription("(total)");
total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
if (cgroups_memory_usage_observer)
{
double hard_limit_ratio = new_server_settings.cgroup_memory_watcher_hard_limit_ratio;
double soft_limit_ratio = new_server_settings.cgroup_memory_watcher_soft_limit_ratio;
cgroups_memory_usage_observer->setMemoryUsageLimits(
static_cast<uint64_t>(max_server_memory_usage * hard_limit_ratio),
static_cast<uint64_t>(max_server_memory_usage * soft_limit_ratio));
}
size_t merges_mutations_memory_usage_soft_limit = new_server_settings.merges_mutations_memory_usage_soft_limit;
size_t default_merges_mutations_server_memory_usage = static_cast<size_t>(current_physical_server_memory * new_server_settings.merges_mutations_memory_usage_to_ram_ratio);
@ -1584,8 +1587,6 @@ try
background_memory_tracker.setDescription("(background)");
background_memory_tracker.setMetric(CurrentMetrics::MergesMutationsMemoryTracking);
total_memory_tracker.setAllowUseJemallocMemory(new_server_settings.allow_use_jemalloc_memory);
auto * global_overcommit_tracker = global_context->getGlobalOvercommitTracker();
total_memory_tracker.setOvercommitTracker(global_overcommit_tracker);

View File

@ -239,15 +239,36 @@ bool Authentication::areCredentialsValid(
throw Authentication::Require<GSSAcceptorContext>(auth_data.getKerberosRealm());
case AuthenticationType::SSL_CERTIFICATE:
{
for (SSLCertificateSubjects::Type type : {SSLCertificateSubjects::Type::CN, SSLCertificateSubjects::Type::SAN})
{
for (const auto & subject : auth_data.getSSLCertificateSubjects().at(type))
{
if (ssl_certificate_credentials->getSSLCertificateSubjects().at(type).contains(subject))
return true;
// Wildcard support (1 only)
if (subject.contains('*'))
{
auto prefix = std::string_view(subject).substr(0, subject.find('*'));
auto suffix = std::string_view(subject).substr(subject.find('*') + 1);
auto slashes = std::count(subject.begin(), subject.end(), '/');
for (const auto & certificate_subject : ssl_certificate_credentials->getSSLCertificateSubjects().at(type))
{
bool matches_wildcard = certificate_subject.starts_with(prefix) && certificate_subject.ends_with(suffix);
// '*' must not represent a '/' in URI, so check if the number of '/' are equal
bool matches_slashes = slashes == count(certificate_subject.begin(), certificate_subject.end(), '/');
if (matches_wildcard && matches_slashes)
return true;
}
}
}
}
return false;
}
case AuthenticationType::SSH_KEY:
#if USE_SSH

View File

@ -116,15 +116,17 @@ class GroupConcatImpl final
SerializationPtr serialization;
UInt64 limit;
const String delimiter;
const DataTypePtr type;
public:
GroupConcatImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 limit_, const String & delimiter_)
: IAggregateFunctionDataHelper<GroupConcatData<has_limit>, GroupConcatImpl<has_limit>>(
{data_type_}, parameters_, std::make_shared<DataTypeString>())
, serialization(this->argument_types[0]->getDefaultSerialization())
, limit(limit_)
, delimiter(delimiter_)
, type(data_type_)
{
serialization = isFixedString(type) ? std::make_shared<DataTypeString>()->getDefaultSerialization() : this->argument_types[0]->getDefaultSerialization();
}
String getName() const override { return name; }
@ -140,6 +142,13 @@ public:
if (cur_data.data_size != 0)
cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena);
if (isFixedString(type))
{
ColumnWithTypeAndName col = {columns[0]->getPtr(), type, "column"};
const auto & col_str = castColumn(col, std::make_shared<DataTypeString>());
cur_data.insert(col_str.get(), serialization, row_num, arena);
}
else
cur_data.insert(columns[0], serialization, row_num, arena);
}

View File

@ -176,7 +176,7 @@ add_library (clickhouse_new_delete STATIC Common/new_delete.cpp)
target_link_libraries (clickhouse_new_delete PRIVATE clickhouse_common_io)
if (TARGET ch_contrib::jemalloc)
target_link_libraries (clickhouse_new_delete PRIVATE ch_contrib::jemalloc)
target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::jemalloc)
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::jemalloc)
target_link_libraries (clickhouse_storages_system PRIVATE ch_contrib::jemalloc)
endif()

View File

@ -168,7 +168,7 @@ std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::g
{ return tryGetEntry(pool, timeouts, fail_message, settings, &table_to_check, /*async_callback=*/ {}); };
return getManyImpl(settings, pool_mode, try_get_entry,
/*skip_unavailable_endpoints=*/ std::nullopt,
/*skip_unavailable_endpoints=*/ false, /// skip_unavailable_endpoints is used to get the min number of entries, and we need at least one
/*priority_func=*/ {},
settings.distributed_insert_skip_read_only_replicas);
}

View File

@ -42,7 +42,7 @@ public:
size_t max_error_cap = DBMS_CONNECTION_POOL_WITH_FAILOVER_MAX_ERROR_COUNT);
using Entry = IConnectionPool::Entry;
using PoolWithFailoverBase<IConnectionPool>::checkTryResultIsValid;
using PoolWithFailoverBase<IConnectionPool>::getValidTryResult;
/** Allocates connection to work. */
Entry get(const ConnectionTimeouts & timeouts) override;
@ -98,7 +98,7 @@ public:
std::vector<Base::ShuffledPool> getShuffledPools(const Settings & settings, GetPriorityFunc priority_func = {}, bool use_slowdown_count = false);
size_t getMaxErrorCup() const { return Base::max_error_cap; }
size_t getMaxErrorCap() const { return Base::max_error_cap; }
void updateSharedError(std::vector<ShuffledPool> & shuffled_pools)
{

View File

@ -327,7 +327,7 @@ HedgedConnectionsFactory::State HedgedConnectionsFactory::processFinishedConnect
ShuffledPool & shuffled_pool = shuffled_pools[index];
LOG_INFO(log, "Connection failed at try №{}, reason: {}", (shuffled_pool.error_count + 1), fail_message);
shuffled_pool.error_count = std::min(pool->getMaxErrorCup(), shuffled_pool.error_count + 1);
shuffled_pool.error_count = std::min(pool->getMaxErrorCap(), shuffled_pool.error_count + 1);
shuffled_pool.slowdown_count = 0;
if (shuffled_pool.error_count >= max_tries)

View File

@ -1,5 +1,3 @@
#include <Common/AsynchronousMetrics.h>
#include <IO/MMappedFileCache.h>
#include <IO/ReadHelpers.h>
#include <IO/UncompressedCache.h>
@ -8,8 +6,10 @@
#include <base/find_symbols.h>
#include <base/getPageSize.h>
#include <sys/resource.h>
#include <Common/AsynchronousMetrics.h>
#include <Common/CurrentMetrics.h>
#include <Common/Exception.h>
#include <Common/Jemalloc.h>
#include <Common/filesystemHelpers.h>
#include <Common/formatReadable.h>
#include <Common/logger_useful.h>
@ -69,10 +69,14 @@ static void openCgroupv2MetricFile(const std::string & filename, std::optional<R
AsynchronousMetrics::AsynchronousMetrics(
unsigned update_period_seconds,
const ProtocolServerMetricsFunc & protocol_server_metrics_func_)
const ProtocolServerMetricsFunc & protocol_server_metrics_func_,
bool update_jemalloc_epoch_,
bool update_rss_)
: update_period(update_period_seconds)
, log(getLogger("AsynchronousMetrics"))
, protocol_server_metrics_func(protocol_server_metrics_func_)
, update_jemalloc_epoch(update_jemalloc_epoch_)
, update_rss(update_rss_)
{
#if defined(OS_LINUX)
openFileIfExists("/proc/cpuinfo", cpuinfo);
@ -411,9 +415,7 @@ Value saveJemallocMetricImpl(
const std::string & jemalloc_full_name,
const std::string & clickhouse_full_name)
{
Value value{};
size_t size = sizeof(value);
mallctl(jemalloc_full_name.c_str(), &value, &size, nullptr, 0);
auto value = getJemallocValue<Value>(jemalloc_full_name.c_str());
values[clickhouse_full_name] = AsynchronousMetricValue(value, "An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html");
return value;
}
@ -768,8 +770,11 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update)
// 'epoch' is a special mallctl -- it updates the statistics. Without it, all
// the following calls will return stale values. It increments and returns
// the current epoch number, which might be useful to log as a sanity check.
auto epoch = updateJemallocEpoch();
new_values["jemalloc.epoch"] = { epoch, "An internal incremental update number of the statistics of jemalloc (Jason Evans' memory allocator), used in all other `jemalloc` metrics." };
auto epoch = update_jemalloc_epoch ? updateJemallocEpoch() : getJemallocValue<uint64_t>("epoch");
new_values["jemalloc.epoch"]
= {epoch,
"An internal incremental update number of the statistics of jemalloc (Jason Evans' memory allocator), used in all other "
"`jemalloc` metrics."};
// Collect the statistics themselves.
saveJemallocMetric<size_t>(new_values, "allocated");
@ -782,10 +787,10 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update)
saveJemallocMetric<size_t>(new_values, "background_thread.num_threads");
saveJemallocMetric<uint64_t>(new_values, "background_thread.num_runs");
saveJemallocMetric<uint64_t>(new_values, "background_thread.run_intervals");
saveJemallocProf<size_t>(new_values, "active");
saveJemallocProf<bool>(new_values, "active");
saveAllArenasMetric<size_t>(new_values, "pactive");
[[maybe_unused]] size_t je_malloc_pdirty = saveAllArenasMetric<size_t>(new_values, "pdirty");
[[maybe_unused]] size_t je_malloc_pmuzzy = saveAllArenasMetric<size_t>(new_values, "pmuzzy");
saveAllArenasMetric<size_t>(new_values, "pdirty");
saveAllArenasMetric<size_t>(new_values, "pmuzzy");
saveAllArenasMetric<size_t>(new_values, "dirty_purged");
saveAllArenasMetric<size_t>(new_values, "muzzy_purged");
#endif
@ -814,41 +819,8 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update)
" It is unspecified whether it includes the per-thread stacks and most of the allocated memory, that is allocated with the 'mmap' system call."
" This metric exists only for completeness reasons. I recommend to use the `MemoryResident` metric for monitoring."};
/// We must update the value of total_memory_tracker periodically.
/// Otherwise it might be calculated incorrectly - it can include a "drift" of memory amount.
/// See https://github.com/ClickHouse/ClickHouse/issues/10293
{
Int64 amount = total_memory_tracker.get();
Int64 peak = total_memory_tracker.getPeak();
Int64 rss = data.resident;
Int64 free_memory_in_allocator_arenas = 0;
#if USE_JEMALLOC
/// According to jemalloc man, pdirty is:
///
/// Number of pages within unused extents that are potentially
/// dirty, and for which madvise() or similar has not been called.
///
/// So they will be subtracted from RSS to make accounting more
/// accurate, since those pages are not really RSS but a memory
/// that can be used at anytime via jemalloc.
free_memory_in_allocator_arenas = je_malloc_pdirty * getPageSize();
#endif
Int64 difference = rss - amount;
/// Log only if difference is high. This is for convenience. The threshold is arbitrary.
if (difference >= 1048576 || difference <= -1048576)
LOG_TRACE(log,
"MemoryTracking: was {}, peak {}, free memory in arenas {}, will set to {} (RSS), difference: {}",
ReadableSize(amount),
ReadableSize(peak),
ReadableSize(free_memory_in_allocator_arenas),
ReadableSize(rss),
ReadableSize(difference));
MemoryTracker::setRSS(rss, free_memory_in_allocator_arenas);
}
if (update_rss)
MemoryTracker::updateRSS(data.resident);
}
{

View File

@ -1,15 +1,14 @@
#pragma once
#include <Common/CgroupsMemoryUsageObserver.h>
#include <Common/MemoryStatisticsOS.h>
#include <Common/ThreadPool.h>
#include <Common/Stopwatch.h>
#include <IO/ReadBufferFromFile.h>
#include <condition_variable>
#include <map>
#include <mutex>
#include <string>
#include <thread>
#include <vector>
#include <optional>
#include <unordered_map>
@ -69,7 +68,9 @@ public:
AsynchronousMetrics(
unsigned update_period_seconds,
const ProtocolServerMetricsFunc & protocol_server_metrics_func_);
const ProtocolServerMetricsFunc & protocol_server_metrics_func_,
bool update_jemalloc_epoch_,
bool update_rss_);
virtual ~AsynchronousMetrics();
@ -112,6 +113,9 @@ private:
MemoryStatisticsOS memory_stat TSA_GUARDED_BY(data_mutex);
#endif
[[maybe_unused]] const bool update_jemalloc_epoch;
[[maybe_unused]] const bool update_rss;
#if defined(OS_LINUX)
std::optional<ReadBufferFromFilePRead> meminfo TSA_GUARDED_BY(data_mutex);
std::optional<ReadBufferFromFilePRead> loadavg TSA_GUARDED_BY(data_mutex);

View File

@ -14,239 +14,21 @@
#include <fmt/ranges.h>
#include <cstdint>
#include <filesystem>
#include <memory>
#include <optional>
#include "config.h"
#if USE_JEMALLOC
# include <jemalloc/jemalloc.h>
#define STRINGIFY_HELPER(x) #x
#define STRINGIFY(x) STRINGIFY_HELPER(x)
#endif
using namespace DB;
namespace fs = std::filesystem;
namespace DB
{
namespace ErrorCodes
{
extern const int FILE_DOESNT_EXIST;
extern const int INCORRECT_DATA;
}
}
namespace
{
/// Format is
/// kernel 5
/// rss 15
/// [...]
using Metrics = std::map<std::string, uint64_t>;
Metrics readAllMetricsFromStatFile(ReadBufferFromFile & buf)
{
Metrics metrics;
while (!buf.eof())
{
std::string current_key;
readStringUntilWhitespace(current_key, buf);
assertChar(' ', buf);
uint64_t value = 0;
readIntText(value, buf);
assertChar('\n', buf);
auto [_, inserted] = metrics.emplace(std::move(current_key), value);
chassert(inserted, "Duplicate keys in stat file");
}
return metrics;
}
uint64_t readMetricFromStatFile(ReadBufferFromFile & buf, const std::string & key)
{
const auto all_metrics = readAllMetricsFromStatFile(buf);
if (const auto it = all_metrics.find(key); it != all_metrics.end())
return it->second;
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot find '{}' in '{}'", key, buf.getFileName());
}
struct CgroupsV1Reader : ICgroupsReader
{
explicit CgroupsV1Reader(const fs::path & stat_file_dir) : buf(stat_file_dir / "memory.stat") { }
uint64_t readMemoryUsage() override
{
std::lock_guard lock(mutex);
buf.rewind();
return readMetricFromStatFile(buf, "rss");
}
std::string dumpAllStats() override
{
std::lock_guard lock(mutex);
buf.rewind();
return fmt::format("{}", readAllMetricsFromStatFile(buf));
}
private:
std::mutex mutex;
ReadBufferFromFile buf TSA_GUARDED_BY(mutex);
};
struct CgroupsV2Reader : ICgroupsReader
{
explicit CgroupsV2Reader(const fs::path & stat_file_dir)
: current_buf(stat_file_dir / "memory.current"), stat_buf(stat_file_dir / "memory.stat")
{
}
uint64_t readMemoryUsage() override
{
std::lock_guard lock(mutex);
current_buf.rewind();
stat_buf.rewind();
int64_t mem_usage = 0;
/// memory.current contains a single number
/// the reason why we subtract it described here: https://github.com/ClickHouse/ClickHouse/issues/64652#issuecomment-2149630667
readIntText(mem_usage, current_buf);
mem_usage -= readMetricFromStatFile(stat_buf, "inactive_file");
chassert(mem_usage >= 0, "Negative memory usage");
return mem_usage;
}
std::string dumpAllStats() override
{
std::lock_guard lock(mutex);
stat_buf.rewind();
return fmt::format("{}", readAllMetricsFromStatFile(stat_buf));
}
private:
std::mutex mutex;
ReadBufferFromFile current_buf TSA_GUARDED_BY(mutex);
ReadBufferFromFile stat_buf TSA_GUARDED_BY(mutex);
};
/// Caveats:
/// - All of the logic in this file assumes that the current process is the only process in the
/// containing cgroup (or more precisely: the only process with significant memory consumption).
/// If this is not the case, then other processe's memory consumption may affect the internal
/// memory tracker ...
/// - Cgroups v1 and v2 allow nested cgroup hierarchies. As v1 is deprecated for over half a
/// decade and will go away at some point, hierarchical detection is only implemented for v2.
/// - I did not test what happens if a host has v1 and v2 simultaneously enabled. I believe such
/// systems existed only for a short transition period.
std::optional<std::string> getCgroupsV1Path()
{
auto path = default_cgroups_mount / "memory/memory.stat";
if (!fs::exists(path))
return {};
return {default_cgroups_mount / "memory"};
}
std::pair<std::string, CgroupsMemoryUsageObserver::CgroupsVersion> getCgroupsPath()
{
auto v2_path = getCgroupsV2PathContainingFile("memory.current");
if (v2_path.has_value())
return {*v2_path, CgroupsMemoryUsageObserver::CgroupsVersion::V2};
auto v1_path = getCgroupsV1Path();
if (v1_path.has_value())
return {*v1_path, CgroupsMemoryUsageObserver::CgroupsVersion::V1};
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot find cgroups v1 or v2 current memory file");
}
}
namespace DB
{
CgroupsMemoryUsageObserver::CgroupsMemoryUsageObserver(std::chrono::seconds wait_time_)
: log(getLogger("CgroupsMemoryUsageObserver")), wait_time(wait_time_)
{
const auto [cgroup_path, version] = getCgroupsPath();
cgroup_reader = createCgroupsReader(version, cgroup_path);
LOG_INFO(
log,
"Will read the current memory usage from '{}' (cgroups version: {}), wait time is {} sec",
cgroup_path,
(version == CgroupsVersion::V1) ? "v1" : "v2",
wait_time.count());
}
{}
CgroupsMemoryUsageObserver::~CgroupsMemoryUsageObserver()
{
stopThread();
}
void CgroupsMemoryUsageObserver::setMemoryUsageLimits(uint64_t hard_limit_, uint64_t soft_limit_)
{
std::lock_guard<std::mutex> limit_lock(limit_mutex);
if (hard_limit_ == hard_limit && soft_limit_ == soft_limit)
return;
hard_limit = hard_limit_;
soft_limit = soft_limit_;
on_hard_limit = [this, hard_limit_](bool up)
{
if (up)
{
LOG_WARNING(log, "Exceeded hard memory limit ({})", ReadableSize(hard_limit_));
/// Update current usage in memory tracker. Also reset free_memory_in_allocator_arenas to zero though we don't know if they are
/// really zero. Trying to avoid OOM ...
MemoryTracker::setRSS(hard_limit_, 0);
}
else
{
LOG_INFO(log, "Dropped below hard memory limit ({})", ReadableSize(hard_limit_));
}
};
on_soft_limit = [this, soft_limit_](bool up)
{
if (up)
{
LOG_WARNING(log, "Exceeded soft memory limit ({})", ReadableSize(soft_limit_));
# if USE_JEMALLOC
LOG_INFO(log, "Purging jemalloc arenas");
mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", nullptr, nullptr, nullptr, 0);
# endif
/// Reset current usage in memory tracker. Expect zero for free_memory_in_allocator_arenas as we just purged them.
uint64_t memory_usage = cgroup_reader->readMemoryUsage();
LOG_TRACE(
log,
"Read current memory usage {} bytes ({}) from cgroups, full available stats: {}",
memory_usage,
ReadableSize(memory_usage),
cgroup_reader->dumpAllStats());
MemoryTracker::setRSS(memory_usage, 0);
LOG_INFO(log, "Purged jemalloc arenas. Current memory usage is {}", ReadableSize(memory_usage));
}
else
{
LOG_INFO(log, "Dropped below soft memory limit ({})", ReadableSize(soft_limit_));
}
};
LOG_INFO(log, "Set new limits, soft limit: {}, hard limit: {}", ReadableSize(soft_limit_), ReadableSize(hard_limit_));
}
void CgroupsMemoryUsageObserver::setOnMemoryAmountAvailableChangedFn(OnMemoryAmountAvailableChangedFn on_memory_amount_available_changed_)
{
std::lock_guard<std::mutex> memory_amount_available_changed_lock(memory_amount_available_changed_mutex);
@ -300,35 +82,6 @@ void CgroupsMemoryUsageObserver::runThread()
std::lock_guard<std::mutex> memory_amount_available_changed_lock(memory_amount_available_changed_mutex);
on_memory_amount_available_changed();
}
std::lock_guard<std::mutex> limit_lock(limit_mutex);
if (soft_limit > 0 && hard_limit > 0)
{
uint64_t memory_usage = cgroup_reader->readMemoryUsage();
LOG_TRACE(log, "Read current memory usage {} bytes ({}) from cgroups", memory_usage, ReadableSize(memory_usage));
if (memory_usage > hard_limit)
{
if (last_memory_usage <= hard_limit)
on_hard_limit(true);
}
else
{
if (last_memory_usage > hard_limit)
on_hard_limit(false);
}
if (memory_usage > soft_limit)
{
if (last_memory_usage <= soft_limit)
on_soft_limit(true);
}
else
{
if (last_memory_usage > soft_limit)
on_soft_limit(false);
}
last_memory_usage = memory_usage;
}
}
catch (...)
{
@ -337,13 +90,6 @@ void CgroupsMemoryUsageObserver::runThread()
}
}
std::unique_ptr<ICgroupsReader> createCgroupsReader(CgroupsMemoryUsageObserver::CgroupsVersion version, const fs::path & cgroup_path)
{
if (version == CgroupsMemoryUsageObserver::CgroupsVersion::V2)
return std::make_unique<CgroupsV2Reader>(cgroup_path);
else
return std::make_unique<CgroupsV1Reader>(cgroup_path);
}
}
#endif

View File

@ -3,30 +3,12 @@
#include <Common/ThreadPool.h>
#include <chrono>
#include <memory>
#include <mutex>
namespace DB
{
struct ICgroupsReader
{
virtual ~ICgroupsReader() = default;
virtual uint64_t readMemoryUsage() = 0;
virtual std::string dumpAllStats() = 0;
};
/// Does two things:
/// 1. Periodically reads the memory usage of the process from Linux cgroups.
/// You can specify soft or hard memory limits:
/// - When the soft memory limit is hit, drop jemalloc cache.
/// - When the hard memory limit is hit, update MemoryTracking metric to throw memory exceptions faster.
/// The goal of this is to avoid that the process hits the maximum allowed memory limit at which there is a good
/// chance that the Limux OOM killer terminates it. All of this is done is because internal memory tracking in
/// ClickHouse can unfortunately under-estimate the actually used memory.
/// 2. Periodically reads the the maximum memory available to the process (which can change due to cgroups settings).
/// Periodically reads the the maximum memory available to the process (which can change due to cgroups settings).
/// You can specify a callback to react on changes. The callback typically reloads the configuration, i.e. Server
/// or Keeper configuration file. This reloads settings 'max_server_memory_usage' (Server) and 'max_memory_usage_soft_limit'
/// (Keeper) from which various other internal limits are calculated, including the soft and hard limits for (1.).
@ -37,19 +19,11 @@ struct ICgroupsReader
class CgroupsMemoryUsageObserver
{
public:
using OnMemoryLimitFn = std::function<void(bool)>;
using OnMemoryAmountAvailableChangedFn = std::function<void()>;
enum class CgroupsVersion : uint8_t
{
V1,
V2
};
explicit CgroupsMemoryUsageObserver(std::chrono::seconds wait_time_);
~CgroupsMemoryUsageObserver();
void setMemoryUsageLimits(uint64_t hard_limit_, uint64_t soft_limit_);
void setOnMemoryAmountAvailableChangedFn(OnMemoryAmountAvailableChangedFn on_memory_amount_available_changed_);
void startThread();
@ -60,32 +34,22 @@ private:
const std::chrono::seconds wait_time;
std::mutex limit_mutex;
size_t hard_limit TSA_GUARDED_BY(limit_mutex) = 0;
size_t soft_limit TSA_GUARDED_BY(limit_mutex) = 0;
OnMemoryLimitFn on_hard_limit TSA_GUARDED_BY(limit_mutex);
OnMemoryLimitFn on_soft_limit TSA_GUARDED_BY(limit_mutex);
std::mutex memory_amount_available_changed_mutex;
OnMemoryAmountAvailableChangedFn on_memory_amount_available_changed TSA_GUARDED_BY(memory_amount_available_changed_mutex);
uint64_t last_memory_usage = 0; /// how much memory does the process use
uint64_t last_available_memory_amount; /// how much memory can the process use
void stopThread();
void runThread();
std::unique_ptr<ICgroupsReader> cgroup_reader;
std::mutex thread_mutex;
std::condition_variable cond;
ThreadFromGlobalPool thread;
bool quit = false;
};
std::unique_ptr<ICgroupsReader>
createCgroupsReader(CgroupsMemoryUsageObserver::CgroupsVersion version, const std::filesystem::path & cgroup_path);
#else
class CgroupsMemoryUsageObserver
{

View File

@ -609,6 +609,7 @@
M(728, UNEXPECTED_DATA_TYPE) \
M(729, ILLEGAL_TIME_SERIES_TAGS) \
M(730, REFRESH_FAILED) \
M(731, QUERY_CACHE_USED_WITH_NON_THROW_OVERFLOW_MODE) \
\
M(900, DISTRIBUTED_CACHE_ERROR) \
M(901, CANNOT_USE_DISTRIBUTED_CACHE) \

View File

@ -5,7 +5,6 @@
#include <Common/Exception.h>
#include <Common/Stopwatch.h>
#include <Common/logger_useful.h>
#include <jemalloc/jemalloc.h>
#define STRINGIFY_HELPER(x) #x
#define STRINGIFY(x) STRINGIFY_HELPER(x)
@ -26,7 +25,6 @@ namespace ErrorCodes
void purgeJemallocArenas()
{
LOG_TRACE(getLogger("SystemJemalloc"), "Purging unused memory");
Stopwatch watch;
mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", nullptr, nullptr, nullptr, 0);
ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurge);
@ -46,20 +44,6 @@ void checkJemallocProfilingEnabled()
"set: MALLOC_CONF=background_thread:true,prof:true");
}
template <typename T>
void setJemallocValue(const char * name, T value)
{
T old_value;
size_t old_value_size = sizeof(T);
if (mallctl(name, &old_value, &old_value_size, reinterpret_cast<void*>(&value), sizeof(T)))
{
LOG_WARNING(getLogger("Jemalloc"), "mallctl for {} failed", name);
return;
}
LOG_INFO(getLogger("Jemalloc"), "Value for {} set to {} (from {})", name, value, old_value);
}
void setJemallocProfileActive(bool value)
{
checkJemallocProfilingEnabled();

View File

@ -5,6 +5,8 @@
#if USE_JEMALLOC
#include <string>
#include <Common/logger_useful.h>
#include <jemalloc/jemalloc.h>
namespace DB
{
@ -21,6 +23,59 @@ void setJemallocBackgroundThreads(bool enabled);
void setJemallocMaxBackgroundThreads(size_t max_threads);
template <typename T>
void setJemallocValue(const char * name, T value)
{
T old_value;
size_t old_value_size = sizeof(T);
mallctl(name, &old_value, &old_value_size, reinterpret_cast<void*>(&value), sizeof(T));
LOG_INFO(getLogger("Jemalloc"), "Value for {} set to {} (from {})", name, value, old_value);
}
template <typename T>
T getJemallocValue(const char * name)
{
T value;
size_t value_size = sizeof(T);
mallctl(name, &value, &value_size, nullptr, 0);
return value;
}
/// Each mallctl call consists of string name lookup which can be expensive.
/// This can be avoided by translating name to "Management Information Base" (MIB)
/// and using it in mallctlbymib calls
template <typename T>
struct JemallocMibCache
{
explicit JemallocMibCache(const char * name)
{
mallctlnametomib(name, mib, &mib_length);
}
void setValue(T value)
{
mallctlbymib(mib, mib_length, nullptr, nullptr, reinterpret_cast<void*>(&value), sizeof(T));
}
T getValue()
{
T value;
size_t value_size = sizeof(T);
mallctlbymib(mib, mib_length, &value, &value_size, nullptr, 0);
return value;
}
void run()
{
mallctlbymib(mib, mib_length, nullptr, nullptr, nullptr, 0);
}
private:
static constexpr size_t max_mib_length = 4;
size_t mib[max_mib_length];
size_t mib_length = max_mib_length;
};
}
#endif

View File

@ -20,13 +20,9 @@
#if USE_JEMALLOC
# include <jemalloc/jemalloc.h>
#define STRINGIFY_HELPER(x) #x
#define STRINGIFY(x) STRINGIFY_HELPER(x)
#endif
#include <atomic>
#include <cmath>
#include <random>
#include <cstdlib>
#include <string>
@ -115,8 +111,6 @@ void AllocationTrace::onFreeImpl(void * ptr, size_t size) const
namespace ProfileEvents
{
extern const Event QueryMemoryLimitExceeded;
extern const Event MemoryAllocatorPurge;
extern const Event MemoryAllocatorPurgeTimeMicroseconds;
}
using namespace std::chrono_literals;
@ -126,15 +120,13 @@ static constexpr size_t log_peak_memory_usage_every = 1ULL << 30;
MemoryTracker total_memory_tracker(nullptr, VariableContext::Global);
MemoryTracker background_memory_tracker(&total_memory_tracker, VariableContext::User, false);
std::atomic<Int64> MemoryTracker::free_memory_in_allocator_arenas;
MemoryTracker::MemoryTracker(VariableContext level_) : parent(&total_memory_tracker), level(level_) {}
MemoryTracker::MemoryTracker(MemoryTracker * parent_, VariableContext level_) : parent(parent_), level(level_) {}
MemoryTracker::MemoryTracker(MemoryTracker * parent_, VariableContext level_, bool log_peak_memory_usage_in_destructor_)
: parent(parent_)
, log_peak_memory_usage_in_destructor(log_peak_memory_usage_in_destructor_)
, level(level_)
{}
: parent(parent_), log_peak_memory_usage_in_destructor(log_peak_memory_usage_in_destructor_), level(level_)
{
}
MemoryTracker::~MemoryTracker()
{
@ -204,10 +196,14 @@ void MemoryTracker::debugLogBigAllocationWithoutCheck(Int64 size [[maybe_unused]
return;
MemoryTrackerBlockerInThread blocker(VariableContext::Global);
LOG_TEST(getLogger("MemoryTracker"), "Too big allocation ({} bytes) without checking memory limits, "
"it may lead to OOM. Stack trace: {}", size, StackTrace().toString());
LOG_TEST(
getLogger("MemoryTracker"),
"Too big allocation ({} bytes) without checking memory limits, "
"it may lead to OOM. Stack trace: {}",
size,
StackTrace().toString());
#else
return; /// Avoid trash logging in release builds
/// Avoid trash logging in release builds
#endif
}
@ -228,6 +224,7 @@ AllocationTrace MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceed
{
/// For global memory tracker always update memory usage.
amount.fetch_add(size, std::memory_order_relaxed);
rss.fetch_add(size, std::memory_order_relaxed);
auto metric_loaded = metric.load(std::memory_order_relaxed);
if (metric_loaded != CurrentMetrics::end())
@ -249,6 +246,7 @@ AllocationTrace MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceed
* So, we allow over-allocations.
*/
Int64 will_be = size ? size + amount.fetch_add(size, std::memory_order_relaxed) : amount.load(std::memory_order_relaxed);
Int64 will_be_rss = size ? size + rss.fetch_add(size, std::memory_order_relaxed) : rss.load(std::memory_order_relaxed);
auto metric_loaded = metric.load(std::memory_order_relaxed);
if (metric_loaded != CurrentMetrics::end() && size)
@ -275,6 +273,7 @@ AllocationTrace MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceed
{
/// Revert
amount.fetch_sub(size, std::memory_order_relaxed);
rss.fetch_sub(size, std::memory_order_relaxed);
/// Prevent recursion. Exception::ctor -> std::string -> new[] -> MemoryTracker::alloc
MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global);
@ -297,33 +296,8 @@ AllocationTrace MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceed
}
}
Int64 limit_to_check = current_hard_limit;
#if USE_JEMALLOC
if (level == VariableContext::Global && allow_use_jemalloc_memory.load(std::memory_order_relaxed))
{
/// Jemalloc arenas may keep some extra memory.
/// This memory was substucted from RSS to decrease memory drift.
/// In case memory is close to limit, try to pugre the arenas.
/// This is needed to avoid OOM, because some allocations are directly done with mmap.
Int64 current_free_memory_in_allocator_arenas = free_memory_in_allocator_arenas.load(std::memory_order_relaxed);
if (current_free_memory_in_allocator_arenas > 0 && current_hard_limit && current_free_memory_in_allocator_arenas + will_be > current_hard_limit)
{
if (free_memory_in_allocator_arenas.exchange(-current_free_memory_in_allocator_arenas) > 0)
{
Stopwatch watch;
mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", nullptr, nullptr, nullptr, 0);
ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurge);
ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurgeTimeMicroseconds, watch.elapsedMicroseconds());
}
}
limit_to_check += abs(current_free_memory_in_allocator_arenas);
}
#endif
if (unlikely(current_hard_limit && will_be > limit_to_check))
if (unlikely(
current_hard_limit && (will_be > current_hard_limit || (level == VariableContext::Global && will_be_rss > current_hard_limit))))
{
if (memoryTrackerCanThrow(level, false) && throw_if_memory_exceeded)
{
@ -335,6 +309,7 @@ AllocationTrace MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceed
{
/// Revert
amount.fetch_sub(size, std::memory_order_relaxed);
rss.fetch_sub(size, std::memory_order_relaxed);
/// Prevent recursion. Exception::ctor -> std::string -> new[] -> MemoryTracker::alloc
MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global);
@ -343,12 +318,13 @@ AllocationTrace MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceed
throw DB::Exception(
DB::ErrorCodes::MEMORY_LIMIT_EXCEEDED,
"Memory limit{}{} exceeded: "
"would use {} (attempt to allocate chunk of {} bytes), maximum: {}."
"would use {} (attempt to allocate chunk of {} bytes), current RSS {}, maximum: {}."
"{}{}",
description ? " " : "",
description ? description : "",
formatReadableSizeWithBinarySuffix(will_be),
size,
formatReadableSizeWithBinarySuffix(rss.load(std::memory_order_relaxed)),
formatReadableSizeWithBinarySuffix(current_hard_limit),
overcommit_result == OvercommitResult::NONE ? "" : " OvercommitTracker decision: ",
toDescription(overcommit_result));
@ -442,6 +418,7 @@ AllocationTrace MemoryTracker::free(Int64 size, double _sample_probability)
{
/// For global memory tracker always update memory usage.
amount.fetch_sub(size, std::memory_order_relaxed);
rss.fetch_sub(size, std::memory_order_relaxed);
auto metric_loaded = metric.load(std::memory_order_relaxed);
if (metric_loaded != CurrentMetrics::end())
CurrentMetrics::sub(metric_loaded, size);
@ -455,7 +432,12 @@ AllocationTrace MemoryTracker::free(Int64 size, double _sample_probability)
}
Int64 accounted_size = size;
if (level == VariableContext::Thread || level == VariableContext::Global)
if (level == VariableContext::Global)
{
amount.fetch_sub(accounted_size, std::memory_order_relaxed);
rss.fetch_sub(accounted_size, std::memory_order_relaxed);
}
else if (level == VariableContext::Thread)
{
/// Could become negative if memory allocated in this thread is freed in another one
amount.fetch_sub(accounted_size, std::memory_order_relaxed);
@ -529,21 +511,29 @@ void MemoryTracker::reset()
}
void MemoryTracker::setRSS(Int64 rss_, Int64 free_memory_in_allocator_arenas_)
void MemoryTracker::updateRSS(Int64 rss_)
{
Int64 new_amount = rss_;
total_memory_tracker.rss.store(rss_, std::memory_order_relaxed);
}
void MemoryTracker::updateAllocated(Int64 allocated_)
{
Int64 new_amount = allocated_;
LOG_INFO(
getLogger("MemoryTracker"),
"Correcting the value of global memory tracker from {} to {}",
ReadableSize(total_memory_tracker.amount.load(std::memory_order_relaxed)),
ReadableSize(allocated_));
total_memory_tracker.amount.store(new_amount, std::memory_order_relaxed);
free_memory_in_allocator_arenas.store(free_memory_in_allocator_arenas_, std::memory_order_relaxed);
auto metric_loaded = total_memory_tracker.metric.load(std::memory_order_relaxed);
if (metric_loaded != CurrentMetrics::end())
CurrentMetrics::set(metric_loaded, new_amount);
bool log_memory_usage = true;
total_memory_tracker.updatePeak(rss_, log_memory_usage);
total_memory_tracker.updatePeak(new_amount, log_memory_usage);
}
void MemoryTracker::setSoftLimit(Int64 value)
{
soft_limit.store(value, std::memory_order_relaxed);

View File

@ -2,7 +2,6 @@
#include <atomic>
#include <chrono>
#include <optional>
#include <base/types.h>
#include <Common/CurrentMetrics.h>
#include <Common/VariableContext.h>
@ -57,9 +56,8 @@ private:
std::atomic<Int64> soft_limit {0};
std::atomic<Int64> hard_limit {0};
std::atomic<Int64> profiler_limit {0};
std::atomic_bool allow_use_jemalloc_memory {true};
static std::atomic<Int64> free_memory_in_allocator_arenas;
std::atomic<Int64> rss{0};
Int64 profiler_step = 0;
@ -122,6 +120,11 @@ public:
return amount.load(std::memory_order_relaxed);
}
Int64 getRSS() const
{
return rss.load(std::memory_order_relaxed);
}
// Merges and mutations may pass memory ownership to other threads thus in the end of execution
// MemoryTracker for background task may have a non-zero counter.
// This method is intended to fix the counter inside of background_memory_tracker.
@ -154,14 +157,6 @@ public:
{
return soft_limit.load(std::memory_order_relaxed);
}
void setAllowUseJemallocMemory(bool value)
{
allow_use_jemalloc_memory.store(value, std::memory_order_relaxed);
}
bool getAllowUseJemallocMmemory() const
{
return allow_use_jemalloc_memory.load(std::memory_order_relaxed);
}
/** Set limit if it was not set.
* Otherwise, set limit to new value, if new value is greater than previous limit.
@ -249,10 +244,9 @@ public:
/// Reset the accumulated data.
void reset();
/// Reset current counter to an RSS value.
/// Jemalloc may have pre-allocated arenas, they are accounted in RSS.
/// We can free this arenas in case of exception to avoid OOM.
static void setRSS(Int64 rss_, Int64 free_memory_in_allocator_arenas_);
/// update values based on external information (e.g. jemalloc's stat)
static void updateRSS(Int64 rss_);
static void updateAllocated(Int64 allocated_);
/// Prints info about peak memory consumption into log.
void logPeakMemoryUsage();

333
src/Common/MemoryWorker.cpp Normal file
View File

@ -0,0 +1,333 @@
#include <Common/MemoryWorker.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadBufferFromFileDescriptor.h>
#include <IO/ReadHelpers.h>
#include <base/cgroupsv2.h>
#include <Common/Jemalloc.h>
#include <Common/MemoryTracker.h>
#include <Common/ProfileEvents.h>
#include <Common/formatReadable.h>
#include <Common/logger_useful.h>
#include <fmt/ranges.h>
#include <filesystem>
#include <optional>
namespace fs = std::filesystem;
namespace ProfileEvents
{
extern const Event MemoryAllocatorPurge;
extern const Event MemoryAllocatorPurgeTimeMicroseconds;
extern const Event MemoryWorkerRun;
extern const Event MemoryWorkerRunElapsedMicroseconds;
}
namespace DB
{
namespace ErrorCodes
{
extern const int FILE_DOESNT_EXIST;
extern const int LOGICAL_ERROR;
}
#if defined(OS_LINUX)
namespace
{
using Metrics = std::map<std::string, uint64_t>;
/// Format is
/// kernel 5
/// rss 15
/// [...]
Metrics readAllMetricsFromStatFile(ReadBufferFromFile & buf)
{
Metrics metrics;
while (!buf.eof())
{
std::string current_key;
readStringUntilWhitespace(current_key, buf);
assertChar(' ', buf);
uint64_t value = 0;
readIntText(value, buf);
assertChar('\n', buf);
auto [_, inserted] = metrics.emplace(std::move(current_key), value);
chassert(inserted, "Duplicate keys in stat file");
}
return metrics;
}
uint64_t readMetricFromStatFile(ReadBufferFromFile & buf, std::string_view key)
{
while (!buf.eof())
{
std::string current_key;
readStringUntilWhitespace(current_key, buf);
if (current_key != key)
{
std::string dummy;
readStringUntilNewlineInto(dummy, buf);
buf.ignore();
continue;
}
assertChar(' ', buf);
uint64_t value = 0;
readIntText(value, buf);
return value;
}
LOG_ERROR(getLogger("CgroupsReader"), "Cannot find '{}' in '{}'", key, buf.getFileName());
return 0;
}
struct CgroupsV1Reader : ICgroupsReader
{
explicit CgroupsV1Reader(const fs::path & stat_file_dir) : buf(stat_file_dir / "memory.stat") { }
uint64_t readMemoryUsage() override
{
std::lock_guard lock(mutex);
buf.rewind();
return readMetricFromStatFile(buf, "rss");
}
std::string dumpAllStats() override
{
std::lock_guard lock(mutex);
buf.rewind();
return fmt::format("{}", readAllMetricsFromStatFile(buf));
}
private:
std::mutex mutex;
ReadBufferFromFile buf TSA_GUARDED_BY(mutex);
};
struct CgroupsV2Reader : ICgroupsReader
{
explicit CgroupsV2Reader(const fs::path & stat_file_dir) : stat_buf(stat_file_dir / "memory.stat") { }
uint64_t readMemoryUsage() override
{
std::lock_guard lock(mutex);
stat_buf.rewind();
return readMetricFromStatFile(stat_buf, "anon");
}
std::string dumpAllStats() override
{
std::lock_guard lock(mutex);
stat_buf.rewind();
return fmt::format("{}", readAllMetricsFromStatFile(stat_buf));
}
private:
std::mutex mutex;
ReadBufferFromFile stat_buf TSA_GUARDED_BY(mutex);
};
/// Caveats:
/// - All of the logic in this file assumes that the current process is the only process in the
/// containing cgroup (or more precisely: the only process with significant memory consumption).
/// If this is not the case, then other processe's memory consumption may affect the internal
/// memory tracker ...
/// - Cgroups v1 and v2 allow nested cgroup hierarchies. As v1 is deprecated for over half a
/// decade and will go away at some point, hierarchical detection is only implemented for v2.
/// - I did not test what happens if a host has v1 and v2 simultaneously enabled. I believe such
/// systems existed only for a short transition period.
std::optional<std::string> getCgroupsV1Path()
{
auto path = default_cgroups_mount / "memory/memory.stat";
if (!fs::exists(path))
return {};
return {default_cgroups_mount / "memory"};
}
std::pair<std::string, ICgroupsReader::CgroupsVersion> getCgroupsPath()
{
auto v2_path = getCgroupsV2PathContainingFile("memory.current");
if (v2_path.has_value())
return {*v2_path, ICgroupsReader::CgroupsVersion::V2};
auto v1_path = getCgroupsV1Path();
if (v1_path.has_value())
return {*v1_path, ICgroupsReader::CgroupsVersion::V1};
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot find cgroups v1 or v2 current memory file");
}
}
std::shared_ptr<ICgroupsReader> ICgroupsReader::createCgroupsReader(ICgroupsReader::CgroupsVersion version, const std::filesystem::path & cgroup_path)
{
if (version == CgroupsVersion::V2)
return std::make_shared<CgroupsV2Reader>(cgroup_path);
else
{
chassert(version == CgroupsVersion::V1);
return std::make_shared<CgroupsV1Reader>(cgroup_path);
}
}
#endif
namespace
{
std::string_view sourceToString(MemoryWorker::MemoryUsageSource source)
{
switch (source)
{
case MemoryWorker::MemoryUsageSource::Cgroups: return "Cgroups";
case MemoryWorker::MemoryUsageSource::Jemalloc: return "Jemalloc";
case MemoryWorker::MemoryUsageSource::None: return "None";
}
}
}
/// We try to pick the best possible supported source for reading memory usage.
/// Supported sources in order of priority
/// - reading from cgroups' pseudo-files (fastest and most accurate)
/// - reading jemalloc's resident stat (doesn't take into account allocations that didn't use jemalloc)
/// Also, different tick rates are used because not all options are equally fast
MemoryWorker::MemoryWorker(uint64_t period_ms_)
: log(getLogger("MemoryWorker"))
, period_ms(period_ms_)
{
#if defined(OS_LINUX)
try
{
static constexpr uint64_t cgroups_memory_usage_tick_ms{50};
const auto [cgroup_path, version] = getCgroupsPath();
LOG_INFO(
getLogger("CgroupsReader"),
"Will create cgroup reader from '{}' (cgroups version: {})",
cgroup_path,
(version == ICgroupsReader::CgroupsVersion::V1) ? "v1" : "v2");
cgroups_reader = ICgroupsReader::createCgroupsReader(version, cgroup_path);
source = MemoryUsageSource::Cgroups;
if (period_ms == 0)
period_ms = cgroups_memory_usage_tick_ms;
return;
}
catch (...)
{
tryLogCurrentException(log, "Cannot use cgroups reader");
}
#endif
#if USE_JEMALLOC
static constexpr uint64_t jemalloc_memory_usage_tick_ms{100};
source = MemoryUsageSource::Jemalloc;
if (period_ms == 0)
period_ms = jemalloc_memory_usage_tick_ms;
#endif
}
MemoryWorker::MemoryUsageSource MemoryWorker::getSource()
{
return source;
}
void MemoryWorker::start()
{
if (source == MemoryUsageSource::None)
return;
LOG_INFO(
getLogger("MemoryWorker"),
"Starting background memory thread with period of {}ms, using {} as source",
period_ms,
sourceToString(source));
background_thread = ThreadFromGlobalPool([this] { backgroundThread(); });
}
MemoryWorker::~MemoryWorker()
{
{
std::unique_lock lock(mutex);
shutdown = true;
}
cv.notify_all();
if (background_thread.joinable())
background_thread.join();
}
uint64_t MemoryWorker::getMemoryUsage()
{
switch (source)
{
case MemoryUsageSource::Cgroups:
return cgroups_reader != nullptr ? cgroups_reader->readMemoryUsage() : 0;
case MemoryUsageSource::Jemalloc:
#if USE_JEMALLOC
return resident_mib.getValue();
#else
return 0;
#endif
case MemoryUsageSource::None:
throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Trying to fetch memory usage while no memory source can be used");
}
}
void MemoryWorker::backgroundThread()
{
std::chrono::milliseconds chrono_period_ms{period_ms};
[[maybe_unused]] bool first_run = true;
std::unique_lock lock(mutex);
while (true)
{
cv.wait_for(lock, chrono_period_ms, [this] { return shutdown; });
if (shutdown)
return;
Stopwatch total_watch;
#if USE_JEMALLOC
if (source == MemoryUsageSource::Jemalloc)
epoch_mib.setValue(0);
#endif
Int64 resident = getMemoryUsage();
MemoryTracker::updateRSS(resident);
#if USE_JEMALLOC
if (resident > total_memory_tracker.getHardLimit())
{
Stopwatch purge_watch;
purge_mib.run();
ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurge);
ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurgeTimeMicroseconds, purge_watch.elapsedMicroseconds());
}
#endif
#if USE_JEMALLOC
if (unlikely(first_run || total_memory_tracker.get() < 0))
{
if (source != MemoryUsageSource::Jemalloc)
epoch_mib.setValue(0);
MemoryTracker::updateAllocated(allocated_mib.getValue());
}
#endif
ProfileEvents::increment(ProfileEvents::MemoryWorkerRun);
ProfileEvents::increment(ProfileEvents::MemoryWorkerRunElapsedMicroseconds, total_watch.elapsedMicroseconds());
first_run = false;
}
}
}

84
src/Common/MemoryWorker.h Normal file
View File

@ -0,0 +1,84 @@
#pragma once
#include <Common/CgroupsMemoryUsageObserver.h>
#include <Common/ThreadPool.h>
#include <Common/Jemalloc.h>
namespace DB
{
struct ICgroupsReader
{
enum class CgroupsVersion : uint8_t
{
V1,
V2
};
#if defined(OS_LINUX)
static std::shared_ptr<ICgroupsReader>
createCgroupsReader(ICgroupsReader::CgroupsVersion version, const std::filesystem::path & cgroup_path);
#endif
virtual ~ICgroupsReader() = default;
virtual uint64_t readMemoryUsage() = 0;
virtual std::string dumpAllStats() = 0;
};
/// Correct MemoryTracker based on external information (e.g. Cgroups or stats.resident from jemalloc)
/// The worker spawns a background thread which periodically reads current resident memory from the source,
/// whose value is sent to global MemoryTracker.
/// It can do additional things like purging jemalloc dirty pages if the current memory usage is higher than global hard limit.
class MemoryWorker
{
public:
explicit MemoryWorker(uint64_t period_ms_);
enum class MemoryUsageSource : uint8_t
{
None,
Cgroups,
Jemalloc
};
MemoryUsageSource getSource();
void start();
~MemoryWorker();
private:
uint64_t getMemoryUsage();
void backgroundThread();
ThreadFromGlobalPool background_thread;
std::mutex mutex;
std::condition_variable cv;
bool shutdown = false;
LoggerPtr log;
uint64_t period_ms;
MemoryUsageSource source{MemoryUsageSource::None};
std::shared_ptr<ICgroupsReader> cgroups_reader;
#if USE_JEMALLOC
JemallocMibCache<uint64_t> epoch_mib{"epoch"};
JemallocMibCache<size_t> resident_mib{"stats.resident"};
JemallocMibCache<size_t> allocated_mib{"stats.allocated"};
#define STRINGIFY_HELPER(x) #x
#define STRINGIFY(x) STRINGIFY_HELPER(x)
JemallocMibCache<size_t> purge_mib{"arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge"};
#undef STRINGIFY
#undef STRINGIFY_HELPER
#endif
};
}

View File

@ -122,12 +122,18 @@ public:
return result.entry.isNull() || !result.is_usable || (skip_read_only_replicas && result.is_readonly);
}
void checkTryResultIsValid(const TryResult & result, bool skip_read_only_replicas) const
TryResult getValidTryResult(const std::vector<TryResult> & results, bool skip_read_only_replicas) const
{
if (results.empty())
throw DB::Exception(DB::ErrorCodes::ALL_CONNECTION_TRIES_FAILED, "Cannot get any valid connection because all connection tries failed");
auto result = results.front();
if (isTryResultInvalid(result, skip_read_only_replicas))
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR,
"Got an invalid connection result: entry.isNull {}, is_usable {}, is_up_to_date {}, delay {}, is_readonly {}, skip_read_only_replicas {}",
result.entry.isNull(), result.is_usable, result.is_up_to_date, result.delay, result.is_readonly, skip_read_only_replicas);
return result;
}
size_t getPoolSize() const { return nested_pools.size(); }

View File

@ -827,6 +827,9 @@ The server successfully detected this situation and will download merged part fr
M(GWPAsanAllocateSuccess, "Number of successful allocations done by GWPAsan") \
M(GWPAsanAllocateFailed, "Number of failed allocations done by GWPAsan (i.e. filled pool)") \
M(GWPAsanFree, "Number of free operations done by GWPAsan") \
\
M(MemoryWorkerRun, "Number of runs done by MemoryWorker in background") \
M(MemoryWorkerRunElapsedMicroseconds, "Total time spent by MemoryWorker for background work") \
#ifdef APPLY_FOR_EXTERNAL_EVENTS

View File

@ -171,6 +171,7 @@ bool isUserError(Error zk_return_code)
void CreateRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); }
void RemoveRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); }
void RemoveRecursiveRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); }
void ExistsRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); }
void GetRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); }
void SetRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); }

View File

@ -248,6 +248,23 @@ struct RemoveResponse : virtual Response
{
};
struct RemoveRecursiveRequest : virtual Request
{
String path;
/// strict limit for number of deleted nodes
uint32_t remove_nodes_limit = 1;
void addRootPath(const String & root_path) override;
String getPath() const override { return path; }
size_t bytesSize() const override { return path.size() + sizeof(remove_nodes_limit); }
};
struct RemoveRecursiveResponse : virtual Response
{
};
struct ExistsRequest : virtual Request
{
String path;
@ -430,6 +447,7 @@ struct ErrorResponse : virtual Response
using CreateCallback = std::function<void(const CreateResponse &)>;
using RemoveCallback = std::function<void(const RemoveResponse &)>;
using RemoveRecursiveCallback = std::function<void(const RemoveRecursiveResponse &)>;
using ExistsCallback = std::function<void(const ExistsResponse &)>;
using GetCallback = std::function<void(const GetResponse &)>;
using SetCallback = std::function<void(const SetResponse &)>;
@ -587,6 +605,11 @@ public:
int32_t version,
RemoveCallback callback) = 0;
virtual void removeRecursive(
const String & path,
uint32_t remove_nodes_limit,
RemoveRecursiveCallback callback) = 0;
virtual void exists(
const String & path,
ExistsCallback callback,

View File

@ -90,6 +90,36 @@ struct TestKeeperRemoveRequest final : RemoveRequest, TestKeeperRequest
}
};
struct TestKeeperRemoveRecursiveRequest final : RemoveRecursiveRequest, TestKeeperRequest
{
TestKeeperRemoveRecursiveRequest() = default;
explicit TestKeeperRemoveRecursiveRequest(const RemoveRecursiveRequest & base) : RemoveRecursiveRequest(base) {}
ResponsePtr createResponse() const override;
std::pair<ResponsePtr, Undo> process(TestKeeper::Container & container, int64_t zxid) const override;
void processWatches(TestKeeper::Watches & node_watches, TestKeeper::Watches & list_watches) const override
{
std::vector<std::pair<String, size_t>> deleted;
auto add_deleted_watches = [&](TestKeeper::Watches & w)
{
for (const auto & [watch_path, _] : w)
if (watch_path.starts_with(path))
deleted.emplace_back(watch_path, std::count(watch_path.begin(), watch_path.end(), '/'));
};
add_deleted_watches(node_watches);
add_deleted_watches(list_watches);
std::sort(deleted.begin(), deleted.end(), [](const auto & lhs, const auto & rhs)
{
return lhs.second < rhs.second;
});
for (const auto & [watch_path, _] : deleted)
processWatchesImpl(watch_path, node_watches, list_watches);
}
};
struct TestKeeperExistsRequest final : ExistsRequest, TestKeeperRequest
{
ResponsePtr createResponse() const override;
@ -175,6 +205,10 @@ struct TestKeeperMultiRequest final : MultiRequest, TestKeeperRequest
{
requests.push_back(std::make_shared<TestKeeperRemoveRequest>(*concrete_request_remove));
}
else if (const auto * concrete_request_remove_recursive = dynamic_cast<const RemoveRecursiveRequest *>(generic_request.get()))
{
requests.push_back(std::make_shared<TestKeeperRemoveRecursiveRequest>(*concrete_request_remove_recursive));
}
else if (const auto * concrete_request_set = dynamic_cast<const SetRequest *>(generic_request.get()))
{
requests.push_back(std::make_shared<TestKeeperSetRequest>(*concrete_request_set));
@ -313,6 +347,62 @@ std::pair<ResponsePtr, Undo> TestKeeperRemoveRequest::process(TestKeeper::Contai
return { std::make_shared<RemoveResponse>(response), undo };
}
std::pair<ResponsePtr, Undo> TestKeeperRemoveRecursiveRequest::process(TestKeeper::Container & container, int64_t zxid) const
{
RemoveRecursiveResponse response;
response.zxid = zxid;
Undo undo;
auto root_it = container.find(path);
if (root_it == container.end())
{
response.error = Error::ZNONODE;
return { std::make_shared<RemoveRecursiveResponse>(response), undo };
}
std::vector<std::pair<std::string, Coordination::TestKeeper::Node>> children;
for (auto it = std::next(root_it); it != container.end(); ++it)
{
const auto & [child_path, child_node] = *it;
if (child_path.starts_with(path))
children.emplace_back(child_path, child_node);
else
break;
}
if (children.size() > remove_nodes_limit)
{
response.error = Error::ZNOTEMPTY;
return { std::make_shared<RemoveRecursiveResponse>(response), undo };
}
auto & parent = container.at(parentPath(path));
--parent.stat.numChildren;
++parent.stat.cversion;
for (const auto & [child_path, child_node] : children)
{
auto child_it = container.find(child_path);
chassert(child_it != container.end());
container.erase(child_it);
}
response.error = Error::ZOK;
undo = [&container, dead = std::move(children), root_path = path]()
{
for (auto && [child_path, child_node] : dead)
container.emplace(child_path, child_node);
auto & undo_parent = container.at(parentPath(root_path));
++undo_parent.stat.numChildren;
--undo_parent.stat.cversion;
};
return { std::make_shared<RemoveRecursiveResponse>(response), undo };
}
std::pair<ResponsePtr, Undo> TestKeeperExistsRequest::process(TestKeeper::Container & container, int64_t zxid) const
{
ExistsResponse response;
@ -530,6 +620,7 @@ std::pair<ResponsePtr, Undo> TestKeeperMultiRequest::process(TestKeeper::Contain
ResponsePtr TestKeeperCreateRequest::createResponse() const { return std::make_shared<CreateResponse>(); }
ResponsePtr TestKeeperRemoveRequest::createResponse() const { return std::make_shared<RemoveResponse>(); }
ResponsePtr TestKeeperRemoveRecursiveRequest::createResponse() const { return std::make_shared<RemoveRecursiveResponse>(); }
ResponsePtr TestKeeperExistsRequest::createResponse() const { return std::make_shared<ExistsResponse>(); }
ResponsePtr TestKeeperGetRequest::createResponse() const { return std::make_shared<GetResponse>(); }
ResponsePtr TestKeeperSetRequest::createResponse() const { return std::make_shared<SetResponse>(); }
@ -771,6 +862,21 @@ void TestKeeper::remove(
pushRequest(std::move(request_info));
}
void TestKeeper::removeRecursive(
const String & path,
uint32_t remove_nodes_limit,
RemoveRecursiveCallback callback)
{
TestKeeperRemoveRecursiveRequest request;
request.path = path;
request.remove_nodes_limit = remove_nodes_limit;
RequestInfo request_info;
request_info.request = std::make_shared<TestKeeperRemoveRecursiveRequest>(std::move(request));
request_info.callback = [callback](const Response & response) { callback(dynamic_cast<const RemoveRecursiveResponse &>(response)); };
pushRequest(std::move(request_info));
}
void TestKeeper::exists(
const String & path,
ExistsCallback callback,

View File

@ -58,6 +58,11 @@ public:
int32_t version,
RemoveCallback callback) override;
void removeRecursive(
const String & path,
uint32_t remove_nodes_limit,
RemoveRecursiveCallback callback) override;
void exists(
const String & path,
ExistsCallback callback,

View File

@ -31,6 +31,7 @@ using AsyncResponses = std::vector<std::pair<std::string, std::future<R>>>;
Coordination::RequestPtr makeCreateRequest(const std::string & path, const std::string & data, int create_mode, bool ignore_if_exists = false);
Coordination::RequestPtr makeRemoveRequest(const std::string & path, int version);
Coordination::RequestPtr makeRemoveRecursiveRequest(const std::string & path, uint32_t remove_nodes_limit);
Coordination::RequestPtr makeSetRequest(const std::string & path, const std::string & data, int version);
Coordination::RequestPtr makeCheckRequest(const std::string & path, int version);

View File

@ -979,18 +979,47 @@ bool ZooKeeper::tryRemoveChildrenRecursive(const std::string & path, bool probab
return removed_as_expected;
}
void ZooKeeper::removeRecursive(const std::string & path)
void ZooKeeper::removeRecursive(const std::string & path, uint32_t remove_nodes_limit)
{
if (!isFeatureEnabled(DB::KeeperFeatureFlag::REMOVE_RECURSIVE))
{
removeChildrenRecursive(path);
remove(path);
return;
}
void ZooKeeper::tryRemoveRecursive(const std::string & path)
check(tryRemoveRecursive(path, remove_nodes_limit), path);
}
Coordination::Error ZooKeeper::tryRemoveRecursive(const std::string & path, uint32_t remove_nodes_limit)
{
if (!isFeatureEnabled(DB::KeeperFeatureFlag::REMOVE_RECURSIVE))
{
tryRemoveChildrenRecursive(path);
tryRemove(path);
return tryRemove(path);
}
auto promise = std::make_shared<std::promise<Coordination::RemoveRecursiveResponse>>();
auto future = promise->get_future();
auto callback = [promise](const Coordination::RemoveRecursiveResponse & response) mutable
{
promise->set_value(response);
};
impl->removeRecursive(path, remove_nodes_limit, std::move(callback));
if (future.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)) != std::future_status::ready)
{
impl->finalize(fmt::format("Operation timeout on {} {}", Coordination::OpNum::RemoveRecursive, path));
return Coordination::Error::ZOPERATIONTIMEOUT;
}
else
{
auto response = future.get();
return response.error;
}
}
namespace
{
@ -1619,6 +1648,14 @@ Coordination::RequestPtr makeRemoveRequest(const std::string & path, int version
return request;
}
Coordination::RequestPtr makeRemoveRecursiveRequest(const std::string & path, uint32_t remove_nodes_limit)
{
auto request = std::make_shared<Coordination::RemoveRecursiveRequest>();
request->path = path;
request->remove_nodes_limit = remove_nodes_limit;
return request;
}
Coordination::RequestPtr makeSetRequest(const std::string & path, const std::string & data, int version)
{
auto request = std::make_shared<Coordination::SetRequest>();

View File

@ -479,15 +479,16 @@ public:
Int64 getClientID();
/// Remove the node with the subtree. If someone concurrently adds or removes a node
/// in the subtree, the result is undefined.
void removeRecursive(const std::string & path);
/// Remove the node with the subtree.
/// If Keeper supports RemoveRecursive operation then it will be performed atomically.
/// Otherwise if someone concurrently adds or removes a node in the subtree, the result is undefined.
void removeRecursive(const std::string & path, uint32_t remove_nodes_limit = 100);
/// Remove the node with the subtree. If someone concurrently removes a node in the subtree,
/// this will not cause errors.
/// Same as removeRecursive but in case if Keeper does not supports RemoveRecursive and
/// if someone concurrently removes a node in the subtree, this will not cause errors.
/// For instance, you can call this method twice concurrently for the same node and the end
/// result would be the same as for the single call.
void tryRemoveRecursive(const std::string & path);
Coordination::Error tryRemoveRecursive(const std::string & path, uint32_t remove_nodes_limit = 100);
/// Similar to removeRecursive(...) and tryRemoveRecursive(...), but does not remove path itself.
/// Node defined as RemoveException will not be deleted.

View File

@ -1,5 +1,5 @@
#include "Common/ZooKeeper/IKeeper.h"
#include "Common/ZooKeeper/ZooKeeperConstants.h"
#include <Common/ZooKeeper/IKeeper.h>
#include <Common/ZooKeeper/ZooKeeperConstants.h>
#include <Common/ZooKeeper/ZooKeeperCommon.h>
#include <Common/ZooKeeper/ZooKeeperIO.h>
#include <Common/Stopwatch.h>
@ -232,6 +232,27 @@ void ZooKeeperRemoveRequest::readImpl(ReadBuffer & in)
Coordination::read(version, in);
}
void ZooKeeperRemoveRecursiveRequest::writeImpl(WriteBuffer & out) const
{
Coordination::write(path, out);
Coordination::write(remove_nodes_limit, out);
}
void ZooKeeperRemoveRecursiveRequest::readImpl(ReadBuffer & in)
{
Coordination::read(path, in);
Coordination::read(remove_nodes_limit, in);
}
std::string ZooKeeperRemoveRecursiveRequest::toStringImpl(bool /*short_format*/) const
{
return fmt::format(
"path = {}\n"
"remove_nodes_limit = {}",
path,
remove_nodes_limit);
}
void ZooKeeperExistsRequest::writeImpl(WriteBuffer & out) const
{
Coordination::write(path, out);
@ -510,6 +531,11 @@ ZooKeeperMultiRequest::ZooKeeperMultiRequest(std::span<const Coordination::Reque
checkOperationType(Write);
requests.push_back(std::make_shared<ZooKeeperRemoveRequest>(*concrete_request_remove));
}
else if (const auto * concrete_request_remove_recursive = dynamic_cast<const RemoveRecursiveRequest *>(generic_request.get()))
{
checkOperationType(Write);
requests.push_back(std::make_shared<ZooKeeperRemoveRecursiveRequest>(*concrete_request_remove_recursive));
}
else if (const auto * concrete_request_set = dynamic_cast<const SetRequest *>(generic_request.get()))
{
checkOperationType(Write);
@ -707,6 +733,7 @@ ZooKeeperResponsePtr ZooKeeperHeartbeatRequest::makeResponse() const { return se
ZooKeeperResponsePtr ZooKeeperSyncRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperSyncResponse>()); }
ZooKeeperResponsePtr ZooKeeperAuthRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperAuthResponse>()); }
ZooKeeperResponsePtr ZooKeeperRemoveRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperRemoveResponse>()); }
ZooKeeperResponsePtr ZooKeeperRemoveRecursiveRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperRemoveRecursiveResponse>()); }
ZooKeeperResponsePtr ZooKeeperExistsRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperExistsResponse>()); }
ZooKeeperResponsePtr ZooKeeperGetRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperGetResponse>()); }
ZooKeeperResponsePtr ZooKeeperSetRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperSetResponse>()); }
@ -1024,6 +1051,7 @@ ZooKeeperRequestFactory::ZooKeeperRequestFactory()
registerZooKeeperRequest<OpNum::SetACL, ZooKeeperSetACLRequest>(*this);
registerZooKeeperRequest<OpNum::FilteredList, ZooKeeperFilteredListRequest>(*this);
registerZooKeeperRequest<OpNum::CheckNotExists, ZooKeeperCheckRequest>(*this);
registerZooKeeperRequest<OpNum::RemoveRecursive, ZooKeeperRemoveRecursiveRequest>(*this);
}
PathMatchResult matchPath(std::string_view path, std::string_view match_to)

View File

@ -285,6 +285,31 @@ struct ZooKeeperRemoveResponse final : RemoveResponse, ZooKeeperResponse
size_t bytesSize() const override { return RemoveResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
};
struct ZooKeeperRemoveRecursiveRequest final : RemoveRecursiveRequest, ZooKeeperRequest
{
ZooKeeperRemoveRecursiveRequest() = default;
explicit ZooKeeperRemoveRecursiveRequest(const RemoveRecursiveRequest & base) : RemoveRecursiveRequest(base) {}
OpNum getOpNum() const override { return OpNum::RemoveRecursive; }
void writeImpl(WriteBuffer & out) const override;
void readImpl(ReadBuffer & in) override;
std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override;
bool isReadRequest() const override { return false; }
size_t bytesSize() const override { return RemoveRecursiveRequest::bytesSize() + sizeof(xid); }
};
struct ZooKeeperRemoveRecursiveResponse : RemoveRecursiveResponse, ZooKeeperResponse
{
void readImpl(ReadBuffer &) override {}
void writeImpl(WriteBuffer &) const override {}
OpNum getOpNum() const override { return OpNum::RemoveRecursive; }
size_t bytesSize() const override { return RemoveRecursiveResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
};
struct ZooKeeperExistsRequest final : ExistsRequest, ZooKeeperRequest
{
ZooKeeperExistsRequest() = default;

View File

@ -29,6 +29,7 @@ static const std::unordered_set<int32_t> VALID_OPERATIONS =
static_cast<int32_t>(OpNum::GetACL),
static_cast<int32_t>(OpNum::FilteredList),
static_cast<int32_t>(OpNum::CheckNotExists),
static_cast<int32_t>(OpNum::RemoveRecursive),
};
OpNum getOpNum(int32_t raw_op_num)

View File

@ -40,6 +40,7 @@ enum class OpNum : int32_t
FilteredList = 500,
CheckNotExists = 501,
CreateIfNotExists = 502,
RemoveRecursive = 503,
SessionID = 997, /// Special internal request
};

View File

@ -1347,6 +1347,25 @@ void ZooKeeper::remove(
ProfileEvents::increment(ProfileEvents::ZooKeeperRemove);
}
void ZooKeeper::removeRecursive(
const String &path,
uint32_t remove_nodes_limit,
RemoveRecursiveCallback callback)
{
if (!isFeatureEnabled(KeeperFeatureFlag::REMOVE_RECURSIVE))
throw Exception::fromMessage(Error::ZBADARGUMENTS, "RemoveRecursive request type cannot be used because it's not supported by the server");
ZooKeeperRemoveRecursiveRequest request;
request.path = path;
request.remove_nodes_limit = remove_nodes_limit;
RequestInfo request_info;
request_info.request = std::make_shared<ZooKeeperRemoveRecursiveRequest>(std::move(request));
request_info.callback = [callback](const Response & response) { callback(dynamic_cast<const RemoveRecursiveResponse &>(response)); };
pushRequest(std::move(request_info));
ProfileEvents::increment(ProfileEvents::ZooKeeperRemove);
}
void ZooKeeper::exists(
const String & path,

View File

@ -146,6 +146,11 @@ public:
int32_t version,
RemoveCallback callback) override;
void removeRecursive(
const String &path,
uint32_t remove_nodes_limit,
RemoveRecursiveCallback callback) override;
void exists(
const String & path,
ExistsCallback callback,

View File

@ -57,11 +57,13 @@ namespace ErrorCodes
namespace DB
{
static bool supportsAtomicRenameImpl()
static std::optional<std::string> supportsAtomicRenameImpl()
{
VersionNumber renameat2_minimal_version(3, 15, 0);
VersionNumber linux_version(Poco::Environment::osVersion());
return linux_version >= renameat2_minimal_version;
if (linux_version >= renameat2_minimal_version)
return std::nullopt;
return fmt::format("Linux kernel 3.15+ is required, got {}", linux_version.toString());
}
static bool renameat2(const std::string & old_path, const std::string & new_path, int flags)
@ -97,10 +99,14 @@ static bool renameat2(const std::string & old_path, const std::string & new_path
ErrnoException::throwFromPath(ErrorCodes::SYSTEM_ERROR, new_path, "Cannot rename {} to {}", old_path, new_path);
}
bool supportsAtomicRename()
bool supportsAtomicRename(std::string * out_message)
{
static bool supports = supportsAtomicRenameImpl();
return supports;
static auto error = supportsAtomicRenameImpl();
if (!error.has_value())
return true;
if (out_message)
*out_message = error.value();
return false;
}
}
@ -152,16 +158,22 @@ static bool renameat2(const std::string & old_path, const std::string & new_path
}
static bool supportsAtomicRenameImpl()
static std::optional<std::string> supportsAtomicRenameImpl()
{
auto fun = dlsym(RTLD_DEFAULT, "renamex_np");
return fun != nullptr;
if (fun != nullptr)
return std::nullopt;
return "macOS 10.12 or later is required";
}
bool supportsAtomicRename()
bool supportsAtomicRename(std::string * out_message)
{
static bool supports = supportsAtomicRenameImpl();
return supports;
static auto error = supportsAtomicRenameImpl();
if (!error.has_value())
return true;
if (out_message)
*out_message = error.value();
return false;
}
}
@ -179,8 +191,10 @@ static bool renameat2(const std::string &, const std::string &, int)
return false;
}
bool supportsAtomicRename()
bool supportsAtomicRename(std::string * out_message)
{
if (out_message)
*out_message = "only Linux and macOS are supported";
return false;
}

View File

@ -6,7 +6,7 @@ namespace DB
{
/// Returns true, if the following functions supported by the system
bool supportsAtomicRename();
bool supportsAtomicRename(std::string * out_message = nullptr);
/// Atomically rename old_path to new_path. If new_path exists, do not overwrite it and throw exception
void renameNoReplace(const std::string & old_path, const std::string & new_path);

View File

@ -6,7 +6,7 @@
#include <filesystem>
#include <IO/WriteBufferFromFile.h>
#include <Common/CgroupsMemoryUsageObserver.h>
#include <Common/MemoryWorker.h>
#include <Common/filesystemHelpers.h>
using namespace DB;
@ -126,7 +126,7 @@ const std::string EXPECTED[2]
"\"workingset_restore_anon\": 0, \"workingset_restore_file\": 0, \"zswap\": 0, \"zswapped\": 0, \"zswpin\": 0, \"zswpout\": 0}"};
class CgroupsMemoryUsageObserverFixture : public ::testing::TestWithParam<CgroupsMemoryUsageObserver::CgroupsVersion>
class CgroupsMemoryUsageObserverFixture : public ::testing::TestWithParam<ICgroupsReader::CgroupsVersion>
{
void SetUp() override
{
@ -138,7 +138,7 @@ class CgroupsMemoryUsageObserverFixture : public ::testing::TestWithParam<Cgroup
stat_file.write(SAMPLE_FILE[version].data(), SAMPLE_FILE[version].size());
stat_file.sync();
if (GetParam() == CgroupsMemoryUsageObserver::CgroupsVersion::V2)
if (GetParam() == ICgroupsReader::CgroupsVersion::V2)
{
auto current_file = WriteBufferFromFile(tmp_dir + "/memory.current");
current_file.write("29645422592", 11);
@ -154,18 +154,18 @@ protected:
TEST_P(CgroupsMemoryUsageObserverFixture, ReadMemoryUsageTest)
{
const auto version = GetParam();
auto reader = createCgroupsReader(version, tmp_dir);
auto reader = ICgroupsReader::createCgroupsReader(version, tmp_dir);
ASSERT_EQ(
reader->readMemoryUsage(),
version == CgroupsMemoryUsageObserver::CgroupsVersion::V1 ? /* rss from memory.stat */ 2232029184
: /* value from memory.current - inactive_file */ 20952338432);
version == ICgroupsReader::CgroupsVersion::V1 ? /* rss from memory.stat */ 2232029184
: /* anon from memory.stat */ 10429399040);
}
TEST_P(CgroupsMemoryUsageObserverFixture, DumpAllStatsTest)
{
const auto version = GetParam();
auto reader = createCgroupsReader(version, tmp_dir);
auto reader = ICgroupsReader::createCgroupsReader(version, tmp_dir);
ASSERT_EQ(reader->dumpAllStats(), EXPECTED[static_cast<uint8_t>(version)]);
}
@ -173,6 +173,6 @@ TEST_P(CgroupsMemoryUsageObserverFixture, DumpAllStatsTest)
INSTANTIATE_TEST_SUITE_P(
CgroupsMemoryUsageObserverTests,
CgroupsMemoryUsageObserverFixture,
::testing::Values(CgroupsMemoryUsageObserver::CgroupsVersion::V1, CgroupsMemoryUsageObserver::CgroupsVersion::V2));
::testing::Values(ICgroupsReader::CgroupsVersion::V1, ICgroupsReader::CgroupsVersion::V2));
#endif

View File

@ -39,7 +39,7 @@ using Checksum = CityHash_v1_0_2::uint128;
/// Validate checksum of data, and if it mismatches, find out possible reason and throw exception.
static void validateChecksum(char * data, size_t size, const Checksum expected_checksum)
static void validateChecksum(char * data, size_t size, const Checksum expected_checksum, bool external_data)
{
auto calculated_checksum = CityHash_v1_0_2::CityHash128(data, size);
if (expected_checksum == calculated_checksum)
@ -64,6 +64,8 @@ static void validateChecksum(char * data, size_t size, const Checksum expected_c
"this can be caused by disk bit rot. This exception protects ClickHouse "
"from data corruption due to hardware failures.";
int error_code = external_data ? ErrorCodes::CANNOT_DECOMPRESS : ErrorCodes::CHECKSUM_DOESNT_MATCH;
auto flip_bit = [](char * buf, size_t pos)
{
buf[pos / 8] ^= 1 << pos % 8;
@ -87,7 +89,7 @@ static void validateChecksum(char * data, size_t size, const Checksum expected_c
{
message << ". The mismatch is caused by single bit flip in data block at byte " << (bit_pos / 8) << ", bit " << (bit_pos % 8) << ". "
<< message_hardware_failure;
throw Exception::createDeprecated(message.str(), ErrorCodes::CHECKSUM_DOESNT_MATCH);
throw Exception::createDeprecated(message.str(), error_code);
}
flip_bit(tmp_data, bit_pos); /// Restore
@ -102,10 +104,10 @@ static void validateChecksum(char * data, size_t size, const Checksum expected_c
{
message << ". The mismatch is caused by single bit flip in checksum. "
<< message_hardware_failure;
throw Exception::createDeprecated(message.str(), ErrorCodes::CHECKSUM_DOESNT_MATCH);
throw Exception::createDeprecated(message.str(), error_code);
}
throw Exception::createDeprecated(message.str(), ErrorCodes::CHECKSUM_DOESNT_MATCH);
throw Exception::createDeprecated(message.str(), error_code);
}
static void readHeaderAndGetCodecAndSize(
@ -151,7 +153,7 @@ static void readHeaderAndGetCodecAndSize(
"Most likely corrupted data.", size_compressed_without_checksum);
if (size_compressed_without_checksum < header_size)
throw Exception(ErrorCodes::CORRUPTED_DATA, "Can't decompress data: "
throw Exception(external_data ? ErrorCodes::CANNOT_DECOMPRESS : ErrorCodes::CORRUPTED_DATA, "Can't decompress data: "
"the compressed data size ({}, this should include header size) is less than the header size ({})",
size_compressed_without_checksum, static_cast<size_t>(header_size));
}
@ -202,7 +204,7 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed,
readBinaryLittleEndian(checksum.low64, checksum_in);
readBinaryLittleEndian(checksum.high64, checksum_in);
validateChecksum(compressed_buffer, size_compressed_without_checksum, checksum);
validateChecksum(compressed_buffer, size_compressed_without_checksum, checksum, external_data);
}
ProfileEvents::increment(ProfileEvents::ReadCompressedBytes, size_compressed_without_checksum + sizeof(Checksum));
@ -247,7 +249,7 @@ size_t CompressedReadBufferBase::readCompressedDataBlockForAsynchronous(size_t &
readBinaryLittleEndian(checksum.low64, checksum_in);
readBinaryLittleEndian(checksum.high64, checksum_in);
validateChecksum(compressed_buffer, size_compressed_without_checksum, checksum);
validateChecksum(compressed_buffer, size_compressed_without_checksum, checksum, external_data);
}
ProfileEvents::increment(ProfileEvents::ReadCompressedBytes, size_compressed_without_checksum + sizeof(Checksum));
@ -307,7 +309,7 @@ void CompressedReadBufferBase::decompress(BufferBase::Buffer & to, size_t size_d
UInt8 header_size = ICompressionCodec::getHeaderSize();
if (size_compressed_without_checksum < header_size)
throw Exception(ErrorCodes::CORRUPTED_DATA,
throw Exception(external_data ? ErrorCodes::CANNOT_DECOMPRESS : ErrorCodes::CORRUPTED_DATA,
"Can't decompress data: the compressed data size ({}, this should include header size) is less than the header size ({})",
size_compressed_without_checksum, static_cast<size_t>(header_size));

View File

@ -55,10 +55,29 @@ void CompressedWriteBuffer::nextImpl()
out.write(compressed_buffer.data(), compressed_size);
}
/// Increase buffer size for next data if adaptive buffer size is used and nextImpl was called because of end of buffer.
if (!available() && use_adaptive_buffer_size && memory.size() < adaptive_buffer_max_size)
{
memory.resize(std::min(memory.size() * 2, adaptive_buffer_max_size));
BufferBase::set(memory.data(), memory.size(), 0);
}
}
CompressedWriteBuffer::CompressedWriteBuffer(WriteBuffer & out_, CompressionCodecPtr codec_, size_t buf_size)
: BufferWithOwnMemory<WriteBuffer>(buf_size), out(out_), codec(std::move(codec_))
void CompressedWriteBuffer::finalizeImpl()
{
/// Don't try to resize buffer in nextImpl.
use_adaptive_buffer_size = false;
next();
}
CompressedWriteBuffer::CompressedWriteBuffer(
WriteBuffer & out_, CompressionCodecPtr codec_, size_t buf_size, bool use_adaptive_buffer_size_, size_t adaptive_buffer_initial_size)
: BufferWithOwnMemory<WriteBuffer>(use_adaptive_buffer_size_ ? adaptive_buffer_initial_size : buf_size)
, out(out_)
, codec(std::move(codec_))
, use_adaptive_buffer_size(use_adaptive_buffer_size_)
, adaptive_buffer_max_size(buf_size)
{
}

View File

@ -19,7 +19,9 @@ public:
explicit CompressedWriteBuffer(
WriteBuffer & out_,
CompressionCodecPtr codec_ = CompressionCodecFactory::instance().getDefaultCodec(),
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
bool use_adaptive_buffer_size_ = false,
size_t adaptive_buffer_initial_size = DBMS_DEFAULT_INITIAL_ADAPTIVE_BUFFER_SIZE);
~CompressedWriteBuffer() override;
@ -45,10 +47,17 @@ public:
private:
void nextImpl() override;
void finalizeImpl() override;
WriteBuffer & out;
CompressionCodecPtr codec;
/// If true, the size of internal buffer will be exponentially increased up to
/// adaptive_buffer_max_size after each nextImpl call. It can be used to avoid
/// large buffer allocation when actual size of written data is small.
bool use_adaptive_buffer_size;
size_t adaptive_buffer_max_size;
PODArray<char> compressed_buffer;
};

View File

@ -114,8 +114,13 @@ void updateKeeperInformation(KeeperDispatcher & keeper_dispatcher, AsynchronousM
}
KeeperAsynchronousMetrics::KeeperAsynchronousMetrics(
ContextPtr context_, unsigned update_period_seconds, const ProtocolServerMetricsFunc & protocol_server_metrics_func_)
: AsynchronousMetrics(update_period_seconds, protocol_server_metrics_func_), context(std::move(context_))
ContextPtr context_,
unsigned update_period_seconds,
const ProtocolServerMetricsFunc & protocol_server_metrics_func_,
bool update_jemalloc_epoch_,
bool update_rss_)
: AsynchronousMetrics(update_period_seconds, protocol_server_metrics_func_, update_jemalloc_epoch_, update_rss_)
, context(std::move(context_))
{
}

View File

@ -13,9 +13,13 @@ class KeeperAsynchronousMetrics : public AsynchronousMetrics
{
public:
KeeperAsynchronousMetrics(
ContextPtr context_, unsigned update_period_seconds, const ProtocolServerMetricsFunc & protocol_server_metrics_func_);
~KeeperAsynchronousMetrics() override;
ContextPtr context_,
unsigned update_period_seconds,
const ProtocolServerMetricsFunc & protocol_server_metrics_func_,
bool update_jemalloc_epoch_,
bool update_rss_);
~KeeperAsynchronousMetrics() override;
private:
ContextPtr context;

View File

@ -11,6 +11,7 @@ enum class KeeperApiVersion : uint8_t
WITH_FILTERED_LIST,
WITH_MULTI_READ,
WITH_CHECK_NOT_EXISTS,
WITH_REMOVE_RECURSIVE,
};
const String keeper_system_path = "/keeper";

View File

@ -91,6 +91,12 @@ bool checkIfRequestIncreaseMem(const Coordination::ZooKeeperRequestPtr & request
memory_delta -= remove_req.bytesSize();
break;
}
case Coordination::OpNum::RemoveRecursive:
{
Coordination::ZooKeeperRemoveRecursiveRequest & remove_req = dynamic_cast<Coordination::ZooKeeperRemoveRecursiveRequest &>(*sub_zk_request);
memory_delta -= remove_req.bytesSize();
break;
}
default:
break;
}
@ -148,7 +154,14 @@ void KeeperDispatcher::requestThread()
Int64 mem_soft_limit = keeper_context->getKeeperMemorySoftLimit();
if (configuration_and_settings->standalone_keeper && isExceedingMemorySoftLimit() && checkIfRequestIncreaseMem(request.request))
{
LOG_WARNING(log, "Processing requests refused because of max_memory_usage_soft_limit {}, the total used memory is {}, request type is {}", ReadableSize(mem_soft_limit), ReadableSize(total_memory_tracker.get()), request.request->getOpNum());
LOG_WARNING(
log,
"Processing requests refused because of max_memory_usage_soft_limit {}, the total allocated memory is {}, RSS is {}, request type "
"is {}",
ReadableSize(mem_soft_limit),
ReadableSize(total_memory_tracker.get()),
ReadableSize(total_memory_tracker.getRSS()),
request.request->getOpNum());
addErrorResponses({request}, Coordination::Error::ZCONNECTIONLOSS);
continue;
}

View File

@ -12,6 +12,7 @@ enum class KeeperFeatureFlag : size_t
MULTI_READ,
CHECK_NOT_EXISTS,
CREATE_IF_NOT_EXISTS,
REMOVE_RECURSIVE,
};
class KeeperFeatureFlags

View File

@ -35,6 +35,7 @@
# include <Poco/Net/Context.h>
# include <Poco/Net/SSLManager.h>
# include <Poco/Net/Utility.h>
# include <Poco/StringTokenizer.h>
#endif
#include <chrono>
@ -107,15 +108,28 @@ void setSSLParams(nuraft::asio_service::options & asio_opts)
params.loadDefaultCAs = config.getBool("openSSL.server.loadDefaultCAFile", false);
params.verificationMode = Poco::Net::Utility::convertVerificationMode(config.getString("openSSL.server.verificationMode", "none"));
asio_opts.ssl_context_provider_server_ = [ctx_params = params, certificate_data]
std::string disabled_protocols_list = config.getString("openSSL.server.disableProtocols", "");
Poco::StringTokenizer dp_tok(disabled_protocols_list, ";,", Poco::StringTokenizer::TOK_TRIM | Poco::StringTokenizer::TOK_IGNORE_EMPTY);
int disabled_protocols = 0;
for (const auto & token : dp_tok)
{
Poco::Net::Context context(Poco::Net::Context::Usage::TLSV1_2_SERVER_USE, ctx_params);
if (token == "sslv2")
disabled_protocols |= Poco::Net::Context::PROTO_SSLV2;
else if (token == "sslv3")
disabled_protocols |= Poco::Net::Context::PROTO_SSLV3;
else if (token == "tlsv1")
disabled_protocols |= Poco::Net::Context::PROTO_TLSV1;
else if (token == "tlsv1_1")
disabled_protocols |= Poco::Net::Context::PROTO_TLSV1_1;
else if (token == "tlsv1_2")
disabled_protocols |= Poco::Net::Context::PROTO_TLSV1_2;
}
asio_opts.ssl_context_provider_server_ = [params, certificate_data, disabled_protocols]
{
Poco::Net::Context context(Poco::Net::Context::Usage::TLSV1_2_SERVER_USE, params);
context.disableProtocols(disabled_protocols);
SSL_CTX * ssl_ctx = context.takeSslContext();
uint64_t options = 0;
options |= SSL_OP_ALL;
options |= SSL_OP_NO_SSLv2;
options |= SSL_OP_SINGLE_DH_USE;
SSL_CTX_set_options(ssl_ctx, options);
SSL_CTX_set_cert_cb(ssl_ctx, callSetCertificate, reinterpret_cast<void *>(certificate_data.get()));
return ssl_ctx;
};
@ -646,7 +660,7 @@ bool KeeperServer::isLeaderAlive() const
bool KeeperServer::isExceedingMemorySoftLimit() const
{
Int64 mem_soft_limit = keeper_context->getKeeperMemorySoftLimit();
return mem_soft_limit > 0 && total_memory_tracker.get() >= mem_soft_limit;
return mem_soft_limit > 0 && std::max(total_memory_tracker.get(), total_memory_tracker.getRSS()) >= mem_soft_limit;
}
/// TODO test whether taking failed peer in count

View File

@ -832,6 +832,15 @@ std::shared_ptr<typename Container::Node> KeeperStorage<Container>::UncommittedS
return tryGetNodeFromStorage(path);
}
template<typename Container>
const typename Container::Node * KeeperStorage<Container>::UncommittedState::getActualNodeView(StringRef path, const Node & storage_node) const
{
if (auto node_it = nodes.find(path.toView()); node_it != nodes.end())
return node_it->second.node.get();
return &storage_node;
}
template<typename Container>
Coordination::ACLs KeeperStorage<Container>::UncommittedState::getACLs(StringRef path) const
{
@ -1124,7 +1133,7 @@ struct KeeperStorageRequestProcessor
}
virtual KeeperStorageBase::ResponsesForSessions
processWatches(KeeperStorageBase::Watches & /*watches*/, KeeperStorageBase::Watches & /*list_watches*/) const
processWatches(const Storage & /*storage*/, int64_t /*zxid*/, KeeperStorageBase::Watches & /*watches*/, KeeperStorageBase::Watches & /*list_watches*/) const
{
return {};
}
@ -1241,7 +1250,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr
using KeeperStorageRequestProcessor<Storage>::KeeperStorageRequestProcessor;
KeeperStorageBase::ResponsesForSessions
processWatches(KeeperStorageBase::Watches & watches, KeeperStorageBase::Watches & list_watches) const override
processWatches(const Storage & /*storage*/, int64_t /*zxid*/, KeeperStorageBase::Watches & watches, KeeperStorageBase::Watches & list_watches) const override
{
return processWatchesImpl(this->zk_request->getPath(), watches, list_watches, Coordination::Event::CREATED);
}
@ -1462,16 +1471,41 @@ struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProce
}
};
namespace
{
template <typename Storage>
void addUpdateParentPzxidDelta(Storage & storage, std::vector<typename Storage::Delta> & deltas, int64_t zxid, StringRef path)
{
auto parent_path = parentNodePath(path);
if (!storage.uncommitted_state.getNode(parent_path))
return;
deltas.emplace_back(
std::string{parent_path},
zxid,
typename Storage::UpdateNodeDelta
{
[zxid](Storage::Node & parent)
{
parent.pzxid = std::max(parent.pzxid, zxid);
}
}
);
}
}
template<typename Storage>
struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestProcessor<Storage>
{
using KeeperStorageRequestProcessor<Storage>::KeeperStorageRequestProcessor;
bool checkAuth(Storage & storage, int64_t session_id, bool is_local) const override
{
return storage.checkACL(parentNodePath(this->zk_request->getPath()), Coordination::ACL::Delete, session_id, is_local);
}
using KeeperStorageRequestProcessor<Storage>::KeeperStorageRequestProcessor;
std::vector<typename Storage::Delta>
preprocess(Storage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/, uint64_t & digest, const KeeperContext & keeper_context) const override
{
@ -1488,31 +1522,12 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr
return {typename Storage::Delta{zxid, Coordination::Error::ZBADARGUMENTS}};
}
const auto update_parent_pzxid = [&]()
{
auto parent_path = parentNodePath(request.path);
if (!storage.uncommitted_state.getNode(parent_path))
return;
new_deltas.emplace_back(
std::string{parent_path},
zxid,
typename Storage::UpdateNodeDelta
{
[zxid](Storage::Node & parent)
{
parent.pzxid = std::max(parent.pzxid, zxid);
}
}
);
};
auto node = storage.uncommitted_state.getNode(request.path);
if (!node)
{
if (request.restored_from_zookeeper_log)
update_parent_pzxid();
addUpdateParentPzxidDelta(storage, new_deltas, zxid, request.path);
return {typename Storage::Delta{zxid, Coordination::Error::ZNONODE}};
}
else if (request.version != -1 && request.version != node->version)
@ -1521,7 +1536,7 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr
return {typename Storage::Delta{zxid, Coordination::Error::ZNOTEMPTY}};
if (request.restored_from_zookeeper_log)
update_parent_pzxid();
addUpdateParentPzxidDelta(storage, new_deltas, zxid, request.path);
new_deltas.emplace_back(
std::string{parentNodePath(request.path)},
@ -1552,12 +1567,318 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr
}
KeeperStorageBase::ResponsesForSessions
processWatches(KeeperStorageBase::Watches & watches, KeeperStorageBase::Watches & list_watches) const override
processWatches(const Storage & /*storage*/, int64_t /*zxid*/, KeeperStorageBase::Watches & watches, KeeperStorageBase::Watches & list_watches) const override
{
return processWatchesImpl(this->zk_request->getPath(), watches, list_watches, Coordination::Event::DELETED);
}
};
template<typename Storage>
struct KeeperStorageRemoveRecursiveRequestProcessor final : public KeeperStorageRequestProcessor<Storage>
{
using KeeperStorageRequestProcessor<Storage>::KeeperStorageRequestProcessor;
bool checkAuth(Storage & storage, int64_t session_id, bool is_local) const override
{
return storage.checkACL(parentNodePath(this->zk_request->getPath()), Coordination::ACL::Delete, session_id, is_local);
}
std::vector<typename Storage::Delta>
preprocess(Storage & storage, int64_t zxid, int64_t session_id, int64_t /*time*/, uint64_t & digest, const KeeperContext & keeper_context) const override
{
ProfileEvents::increment(ProfileEvents::KeeperRemoveRequest);
Coordination::ZooKeeperRemoveRecursiveRequest & request = dynamic_cast<Coordination::ZooKeeperRemoveRecursiveRequest &>(*this->zk_request);
std::vector<typename Storage::Delta> new_deltas;
if (Coordination::matchPath(request.path, keeper_system_path) != Coordination::PathMatchResult::NOT_MATCH)
{
auto error_msg = fmt::format("Trying to delete an internal Keeper path ({}) which is not allowed", request.path);
handleSystemNodeModification(keeper_context, error_msg);
return {typename Storage::Delta{zxid, Coordination::Error::ZBADARGUMENTS}};
}
auto node = storage.uncommitted_state.getNode(request.path);
if (!node)
{
if (request.restored_from_zookeeper_log)
addUpdateParentPzxidDelta(storage, new_deltas, zxid, request.path);
return {typename Storage::Delta{zxid, Coordination::Error::ZNONODE}};
}
ToDeleteTreeCollector collector(storage, zxid, session_id, request.remove_nodes_limit);
auto collect_status = collector.collect(request.path, *node);
if (collect_status == ToDeleteTreeCollector::CollectStatus::NoAuth)
return {typename Storage::Delta{zxid, Coordination::Error::ZNOAUTH}};
if (collect_status == ToDeleteTreeCollector::CollectStatus::LimitExceeded)
return {typename Storage::Delta{zxid, Coordination::Error::ZNOTEMPTY}};
if (request.restored_from_zookeeper_log)
addUpdateParentPzxidDelta(storage, new_deltas, zxid, request.path);
auto delete_deltas = collector.extractDeltas();
for (const auto & delta : delete_deltas)
{
const auto * remove_delta = std::get_if<typename Storage::RemoveNodeDelta>(&delta.operation);
if (remove_delta && remove_delta->ephemeral_owner)
storage.unregisterEphemeralPath(remove_delta->ephemeral_owner, delta.path);
}
new_deltas.insert(new_deltas.end(), std::make_move_iterator(delete_deltas.begin()), std::make_move_iterator(delete_deltas.end()));
digest = storage.calculateNodesDigest(digest, new_deltas);
return new_deltas;
}
Coordination::ZooKeeperResponsePtr process(Storage & storage, int64_t zxid) const override
{
Coordination::ZooKeeperResponsePtr response_ptr = this->zk_request->makeResponse();
Coordination::ZooKeeperRemoveRecursiveResponse & response = dynamic_cast<Coordination::ZooKeeperRemoveRecursiveResponse &>(*response_ptr);
response.error = storage.commit(zxid);
return response_ptr;
}
KeeperStorageBase::ResponsesForSessions
processWatches(const Storage & storage, int64_t zxid, KeeperStorageBase::Watches & watches, KeeperStorageBase::Watches & list_watches) const override
{
/// need to iterate over zxid deltas and update watches for deleted tree.
const auto & deltas = storage.uncommitted_state.deltas;
KeeperStorageBase::ResponsesForSessions responses;
for (auto it = deltas.rbegin(); it != deltas.rend() && it->zxid == zxid; ++it)
{
const auto * remove_delta = std::get_if<typename Storage::RemoveNodeDelta>(&it->operation);
if (remove_delta)
{
auto new_responses = processWatchesImpl(it->path, watches, list_watches, Coordination::Event::DELETED);
responses.insert(responses.end(), std::make_move_iterator(new_responses.begin()), std::make_move_iterator(new_responses.end()));
}
}
return responses;
}
private:
using SNode = typename Storage::Node;
class ToDeleteTreeCollector
{
Storage & storage;
int64_t zxid;
int64_t session_id;
uint32_t limit;
uint32_t max_level = 0;
uint32_t nodes_observed = 1; /// root node
std::unordered_map<uint32_t, std::vector<typename Storage::Delta>> by_level_deltas;
struct Step
{
String path;
std::variant<SNode, const SNode *> node;
uint32_t level;
};
enum class CollectStatus
{
Ok,
NoAuth,
LimitExceeded,
};
friend struct KeeperStorageRemoveRecursiveRequestProcessor;
public:
ToDeleteTreeCollector(Storage & storage_, int64_t zxid_, int64_t session_id_, uint32_t limit_)
: storage(storage_)
, zxid(zxid_)
, session_id(session_id_)
, limit(limit_)
{
}
CollectStatus collect(StringRef root_path, const SNode & root_node)
{
std::deque<Step> steps;
if (checkLimits(&root_node))
return CollectStatus::LimitExceeded;
steps.push_back(Step{root_path.toString(), &root_node, 0});
while (!steps.empty())
{
Step step = std::move(steps.front());
steps.pop_front();
StringRef path = step.path;
uint32_t level = step.level;
const SNode * node_ptr = nullptr;
if (auto * rdb = std::get_if<SNode>(&step.node))
node_ptr = rdb;
else
node_ptr = std::get<const SNode *>(step.node);
chassert(!path.empty());
chassert(node_ptr != nullptr);
const auto & node = *node_ptr;
auto actual_node_ptr = storage.uncommitted_state.getActualNodeView(path, node);
chassert(actual_node_ptr != nullptr); /// explicitly check that node is not deleted
if (actual_node_ptr->numChildren() > 0 && !storage.checkACL(path, Coordination::ACL::Delete, session_id, /*is_local=*/false))
return CollectStatus::NoAuth;
if (auto status = visitRocksDBNode(steps, path, level); status != CollectStatus::Ok)
return status;
if (auto status = visitMemNode(steps, path, level); status != CollectStatus::Ok)
return status;
if (auto status = visitRootAndUncommitted(steps, path, node, level); status != CollectStatus::Ok)
return status;
}
return CollectStatus::Ok;
}
std::vector<typename Storage::Delta> extractDeltas()
{
std::vector<typename Storage::Delta> deltas;
for (ssize_t level = max_level; level >= 0; --level)
{
auto & level_deltas = by_level_deltas[static_cast<uint32_t>(level)];
deltas.insert(deltas.end(), std::make_move_iterator(level_deltas.begin()), std::make_move_iterator(level_deltas.end()));
}
return std::move(deltas);
}
private:
CollectStatus visitRocksDBNode(std::deque<Step> & steps, StringRef root_path, uint32_t level)
{
if constexpr (Storage::use_rocksdb)
{
std::filesystem::path root_fs_path(root_path.toString());
auto children = storage.container.getChildren(root_path.toString());
for (auto && [child_name, child_node] : children)
{
auto child_path = (root_fs_path / child_name).generic_string();
const auto actual_child_node_ptr = storage.uncommitted_state.getActualNodeView(child_path, child_node);
if (actual_child_node_ptr == nullptr) /// node was deleted in previous step of multi transaction
continue;
if (checkLimits(actual_child_node_ptr))
return CollectStatus::LimitExceeded;
steps.push_back(Step{std::move(child_path), std::move(child_node), level + 1});
}
}
return CollectStatus::Ok;
}
CollectStatus visitMemNode(std::deque<Step> & steps, StringRef root_path, uint32_t level)
{
if constexpr (!Storage::use_rocksdb)
{
auto node_it = storage.container.find(root_path);
if (node_it == storage.container.end())
return CollectStatus::Ok;
std::filesystem::path root_fs_path(root_path.toString());
const auto & children = node_it->value.getChildren();
for (const auto & child_name : children)
{
auto child_path = (root_fs_path / child_name.toView()).generic_string();
auto child_it = storage.container.find(child_path);
chassert(child_it != storage.container.end());
const auto & child_node = child_it->value;
const auto actual_child_node_ptr = storage.uncommitted_state.getActualNodeView(child_path, child_node);
if (actual_child_node_ptr == nullptr) /// node was deleted in previous step of multi transaction
continue;
if (checkLimits(actual_child_node_ptr))
return CollectStatus::LimitExceeded;
steps.push_back(Step{std::move(child_path), &child_node, level + 1});
}
}
return CollectStatus::Ok;
}
CollectStatus visitRootAndUncommitted(std::deque<Step> & steps, StringRef root_path, const SNode & root_node, uint32_t level)
{
const auto & nodes = storage.uncommitted_state.nodes;
/// nodes are sorted by paths with level locality
auto it = nodes.upper_bound(root_path.toString() + "/");
for (; it != nodes.end() && parentNodePath(it->first) == root_path; ++it)
{
const auto actual_child_node_ptr = it->second.node.get();
if (actual_child_node_ptr == nullptr) /// node was deleted in previous step of multi transaction
continue;
if (checkLimits(actual_child_node_ptr))
return CollectStatus::LimitExceeded;
const String & child_path = it->first;
const SNode & child_node = *it->second.node;
steps.push_back(Step{child_path, &child_node, level + 1});
}
addDelta(root_path, root_node, level);
return CollectStatus::Ok;
}
void addDelta(StringRef root_path, const SNode & root_node, uint32_t level)
{
max_level = std::max(max_level, level);
by_level_deltas[level].emplace_back(
parentNodePath(root_path).toString(),
zxid,
typename Storage::UpdateNodeDelta{
[](SNode & parent)
{
++parent.cversion;
parent.decreaseNumChildren();
}
});
by_level_deltas[level].emplace_back(root_path.toString(), zxid, typename Storage::RemoveNodeDelta{root_node.version, root_node.ephemeralOwner()});
}
bool checkLimits(const SNode * node)
{
chassert(node != nullptr);
nodes_observed += node->numChildren();
return nodes_observed > limit;
}
};
};
template<typename Storage>
struct KeeperStorageExistsRequestProcessor final : public KeeperStorageRequestProcessor<Storage>
{
@ -1709,7 +2030,7 @@ struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProce
}
KeeperStorageBase::ResponsesForSessions
processWatches(typename Storage::Watches & watches, typename Storage::Watches & list_watches) const override
processWatches(const Storage & /*storage*/, int64_t /*zxid*/, typename Storage::Watches & watches, typename Storage::Watches & list_watches) const override
{
return processWatchesImpl(this->zk_request->getPath(), watches, list_watches, Coordination::Event::CHANGED);
}
@ -2131,6 +2452,10 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro
check_operation_type(OperationType::Write);
concrete_requests.push_back(std::make_shared<KeeperStorageRemoveRequestProcessor<Storage>>(sub_zk_request));
break;
case Coordination::OpNum::RemoveRecursive:
check_operation_type(OperationType::Write);
concrete_requests.push_back(std::make_shared<KeeperStorageRemoveRecursiveRequestProcessor<Storage>>(sub_zk_request));
break;
case Coordination::OpNum::Set:
check_operation_type(OperationType::Write);
concrete_requests.push_back(std::make_shared<KeeperStorageSetRequestProcessor<Storage>>(sub_zk_request));
@ -2250,12 +2575,12 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro
}
KeeperStorageBase::ResponsesForSessions
processWatches(typename Storage::Watches & watches, typename Storage::Watches & list_watches) const override
processWatches(const Storage & storage, int64_t zxid, typename Storage::Watches & watches, typename Storage::Watches & list_watches) const override
{
typename Storage::ResponsesForSessions result;
for (const auto & generic_request : concrete_requests)
{
auto responses = generic_request->processWatches(watches, list_watches);
auto responses = generic_request->processWatches(storage, zxid, watches, list_watches);
result.insert(result.end(), responses.begin(), responses.end());
}
return result;
@ -2400,6 +2725,7 @@ KeeperStorageRequestProcessorsFactory<Storage>::KeeperStorageRequestProcessorsFa
registerKeeperRequestProcessor<Coordination::OpNum::SetACL, KeeperStorageSetACLRequestProcessor<Storage>>(*this);
registerKeeperRequestProcessor<Coordination::OpNum::GetACL, KeeperStorageGetACLRequestProcessor<Storage>>(*this);
registerKeeperRequestProcessor<Coordination::OpNum::CheckNotExists, KeeperStorageCheckRequestProcessor<Storage>>(*this);
registerKeeperRequestProcessor<Coordination::OpNum::RemoveRecursive, KeeperStorageRemoveRecursiveRequestProcessor<Storage>>(*this);
}
@ -2718,7 +3044,7 @@ KeeperStorage<Container>::ResponsesForSessions KeeperStorage<Container>::process
/// If this requests processed successfully we need to check watches
if (response->error == Coordination::Error::ZOK)
{
auto watch_responses = request_processor->processWatches(watches, list_watches);
auto watch_responses = request_processor->processWatches(*this, zxid, watches, list_watches);
results.insert(results.end(), watch_responses.begin(), watch_responses.end());
}

View File

@ -566,6 +566,7 @@ public:
void rollback(int64_t rollback_zxid);
std::shared_ptr<Node> getNode(StringRef path) const;
const Node * getActualNodeView(StringRef path, const Node & storage_node) const;
Coordination::ACLs getACLs(StringRef path) const;
void applyDelta(const Delta & delta);
@ -609,7 +610,18 @@ public:
using is_transparent = void; // required to make find() work with different type than key_type
};
mutable std::unordered_map<std::string, UncommittedNode, Hash, Equal> nodes;
struct PathCmp
{
using is_transparent = std::true_type;
auto operator()(const std::string_view a,
const std::string_view b) const
{
return a.size() < b.size() || (a.size() == b.size() && a < b);
}
};
mutable std::map<std::string, UncommittedNode, PathCmp> nodes;
std::unordered_map<std::string, std::list<const Delta *>, Hash, Equal> deltas_for_path;
std::list<Delta> deltas;

View File

@ -3113,6 +3113,8 @@ TYPED_TEST(CoordinationTest, TestFeatureFlags)
ASSERT_TRUE(feature_flags.isEnabled(KeeperFeatureFlag::FILTERED_LIST));
ASSERT_TRUE(feature_flags.isEnabled(KeeperFeatureFlag::MULTI_READ));
ASSERT_FALSE(feature_flags.isEnabled(KeeperFeatureFlag::CHECK_NOT_EXISTS));
ASSERT_FALSE(feature_flags.isEnabled(KeeperFeatureFlag::CREATE_IF_NOT_EXISTS));
ASSERT_FALSE(feature_flags.isEnabled(KeeperFeatureFlag::REMOVE_RECURSIVE));
}
TYPED_TEST(CoordinationTest, TestSystemNodeModify)
@ -3374,6 +3376,474 @@ TYPED_TEST(CoordinationTest, TestReapplyingDeltas)
ASSERT_TRUE(children1_set == children2_set);
}
TYPED_TEST(CoordinationTest, TestRemoveRecursiveRequest)
{
using namespace DB;
using namespace Coordination;
using Storage = typename TestFixture::Storage;
ChangelogDirTest rocks("./rocksdb");
this->setRocksDBDirectory("./rocksdb");
Storage storage{500, "", this->keeper_context};
int32_t zxid = 0;
const auto create = [&](const String & path, int create_mode)
{
int new_zxid = ++zxid;
const auto create_request = std::make_shared<ZooKeeperCreateRequest>();
create_request->path = path;
create_request->is_ephemeral = create_mode == zkutil::CreateMode::Ephemeral || create_mode == zkutil::CreateMode::EphemeralSequential;
create_request->is_sequential = create_mode == zkutil::CreateMode::PersistentSequential || create_mode == zkutil::CreateMode::EphemeralSequential;
storage.preprocessRequest(create_request, 1, 0, new_zxid);
auto responses = storage.processRequest(create_request, 1, new_zxid);
EXPECT_EQ(responses.size(), 1);
EXPECT_EQ(responses[0].response->error, Coordination::Error::ZOK) << "Failed to create " << path;
};
const auto remove = [&](const String & path, int32_t version = -1)
{
int new_zxid = ++zxid;
auto remove_request = std::make_shared<ZooKeeperRemoveRequest>();
remove_request->path = path;
remove_request->version = version;
storage.preprocessRequest(remove_request, 1, 0, new_zxid);
return storage.processRequest(remove_request, 1, new_zxid);
};
const auto remove_recursive = [&](const String & path, uint32_t remove_nodes_limit = 1)
{
int new_zxid = ++zxid;
auto remove_request = std::make_shared<ZooKeeperRemoveRecursiveRequest>();
remove_request->path = path;
remove_request->remove_nodes_limit = remove_nodes_limit;
storage.preprocessRequest(remove_request, 1, 0, new_zxid);
return storage.processRequest(remove_request, 1, new_zxid);
};
const auto exists = [&](const String & path)
{
int new_zxid = ++zxid;
const auto exists_request = std::make_shared<ZooKeeperExistsRequest>();
exists_request->path = path;
storage.preprocessRequest(exists_request, 1, 0, new_zxid);
auto responses = storage.processRequest(exists_request, 1, new_zxid);
EXPECT_EQ(responses.size(), 1);
return responses[0].response->error == Coordination::Error::ZOK;
};
{
SCOPED_TRACE("Single Remove Single Node");
create("/T1", zkutil::CreateMode::Persistent);
auto responses = remove("/T1");
ASSERT_EQ(responses.size(), 1);
ASSERT_EQ(responses[0].response->error, Coordination::Error::ZOK);
ASSERT_FALSE(exists("/T1"));
}
{
SCOPED_TRACE("Single Remove Tree");
create("/T2", zkutil::CreateMode::Persistent);
create("/T2/A", zkutil::CreateMode::Persistent);
auto responses = remove("/T2");
ASSERT_EQ(responses.size(), 1);
ASSERT_EQ(responses[0].response->error, Coordination::Error::ZNOTEMPTY);
ASSERT_TRUE(exists("/T2"));
}
{
SCOPED_TRACE("Recursive Remove Single Node");
create("/T3", zkutil::CreateMode::Persistent);
auto responses = remove_recursive("/T3", 100);
ASSERT_EQ(responses.size(), 1);
ASSERT_EQ(responses[0].response->error, Coordination::Error::ZOK);
ASSERT_FALSE(exists("/T3"));
}
{
SCOPED_TRACE("Recursive Remove Tree Small Limit");
create("/T5", zkutil::CreateMode::Persistent);
create("/T5/A", zkutil::CreateMode::Persistent);
create("/T5/B", zkutil::CreateMode::Persistent);
create("/T5/A/C", zkutil::CreateMode::Persistent);
auto responses = remove_recursive("/T5", 2);
ASSERT_EQ(responses.size(), 1);
ASSERT_EQ(responses[0].response->error, Coordination::Error::ZNOTEMPTY);
ASSERT_TRUE(exists("/T5"));
ASSERT_TRUE(exists("/T5/A"));
ASSERT_TRUE(exists("/T5/B"));
ASSERT_TRUE(exists("/T5/A/C"));
}
{
SCOPED_TRACE("Recursive Remove Tree Big Limit");
create("/T6", zkutil::CreateMode::Persistent);
create("/T6/A", zkutil::CreateMode::Persistent);
create("/T6/B", zkutil::CreateMode::Persistent);
create("/T6/A/C", zkutil::CreateMode::Persistent);
auto responses = remove_recursive("/T6", 4);
ASSERT_EQ(responses.size(), 1);
ASSERT_EQ(responses[0].response->error, Coordination::Error::ZOK);
ASSERT_FALSE(exists("/T6"));
ASSERT_FALSE(exists("/T6/A"));
ASSERT_FALSE(exists("/T6/B"));
ASSERT_FALSE(exists("/T6/A/C"));
}
{
SCOPED_TRACE("Recursive Remove Ephemeral");
create("/T7", zkutil::CreateMode::Ephemeral);
ASSERT_EQ(storage.ephemerals.size(), 1);
auto responses = remove_recursive("/T7", 100);
ASSERT_EQ(responses.size(), 1);
ASSERT_EQ(responses[0].response->error, Coordination::Error::ZOK);
ASSERT_EQ(storage.ephemerals.size(), 0);
ASSERT_FALSE(exists("/T7"));
}
{
SCOPED_TRACE("Recursive Remove Tree With Ephemeral");
create("/T8", zkutil::CreateMode::Persistent);
create("/T8/A", zkutil::CreateMode::Persistent);
create("/T8/B", zkutil::CreateMode::Ephemeral);
create("/T8/A/C", zkutil::CreateMode::Ephemeral);
ASSERT_EQ(storage.ephemerals.size(), 1);
auto responses = remove_recursive("/T8", 4);
ASSERT_EQ(responses.size(), 1);
ASSERT_EQ(responses[0].response->error, Coordination::Error::ZOK);
ASSERT_EQ(storage.ephemerals.size(), 0);
ASSERT_FALSE(exists("/T8"));
ASSERT_FALSE(exists("/T8/A"));
ASSERT_FALSE(exists("/T8/B"));
ASSERT_FALSE(exists("/T8/A/C"));
}
}
TYPED_TEST(CoordinationTest, TestRemoveRecursiveInMultiRequest)
{
using namespace DB;
using namespace Coordination;
using Storage = typename TestFixture::Storage;
ChangelogDirTest rocks("./rocksdb");
this->setRocksDBDirectory("./rocksdb");
Storage storage{500, "", this->keeper_context};
int zxid = 0;
auto prepare_create_tree = []()
{
return Coordination::Requests{
zkutil::makeCreateRequest("/A", "A", zkutil::CreateMode::Persistent),
zkutil::makeCreateRequest("/A/B", "B", zkutil::CreateMode::Persistent),
zkutil::makeCreateRequest("/A/C", "C", zkutil::CreateMode::Ephemeral),
zkutil::makeCreateRequest("/A/B/D", "D", zkutil::CreateMode::Ephemeral),
};
};
const auto exists = [&](const String & path)
{
int new_zxid = ++zxid;
const auto exists_request = std::make_shared<ZooKeeperExistsRequest>();
exists_request->path = path;
storage.preprocessRequest(exists_request, 1, 0, new_zxid);
auto responses = storage.processRequest(exists_request, 1, new_zxid);
EXPECT_EQ(responses.size(), 1);
return responses[0].response->error == Coordination::Error::ZOK;
};
const auto is_multi_ok = [&](Coordination::ZooKeeperResponsePtr response)
{
const auto & multi_response = dynamic_cast<Coordination::ZooKeeperMultiResponse &>(*response);
for (const auto & op_response : multi_response.responses)
if (op_response->error != Coordination::Error::ZOK)
return false;
return true;
};
{
SCOPED_TRACE("Remove In Multi Tx");
int new_zxid = ++zxid;
auto ops = prepare_create_tree();
ops.push_back(zkutil::makeRemoveRequest("/A", -1));
const auto request = std::make_shared<ZooKeeperMultiRequest>(ops, ACLs{});
storage.preprocessRequest(request, 1, 0, new_zxid);
auto responses = storage.processRequest(request, 1, new_zxid);
ops.pop_back();
ASSERT_EQ(responses.size(), 1);
ASSERT_FALSE(is_multi_ok(responses[0].response));
}
{
SCOPED_TRACE("Recursive Remove In Multi Tx");
int new_zxid = ++zxid;
auto ops = prepare_create_tree();
ops.push_back(zkutil::makeRemoveRecursiveRequest("/A", 4));
const auto request = std::make_shared<ZooKeeperMultiRequest>(ops, ACLs{});
storage.preprocessRequest(request, 1, 0, new_zxid);
auto responses = storage.processRequest(request, 1, new_zxid);
ops.pop_back();
ASSERT_EQ(responses.size(), 1);
ASSERT_TRUE(is_multi_ok(responses[0].response));
ASSERT_FALSE(exists("/A"));
ASSERT_FALSE(exists("/A/C"));
ASSERT_FALSE(exists("/A/B"));
ASSERT_FALSE(exists("/A/B/D"));
}
{
SCOPED_TRACE("Recursive Remove With Regular In Multi Tx");
int new_zxid = ++zxid;
auto ops = prepare_create_tree();
ops.push_back(zkutil::makeRemoveRequest("/A/C", -1));
ops.push_back(zkutil::makeRemoveRecursiveRequest("/A", 3));
const auto request = std::make_shared<ZooKeeperMultiRequest>(ops, ACLs{});
storage.preprocessRequest(request, 1, 0, new_zxid);
auto responses = storage.processRequest(request, 1, new_zxid);
ops.pop_back();
ops.pop_back();
ASSERT_EQ(responses.size(), 1);
ASSERT_TRUE(is_multi_ok(responses[0].response));
ASSERT_FALSE(exists("/A"));
ASSERT_FALSE(exists("/A/C"));
ASSERT_FALSE(exists("/A/B"));
ASSERT_FALSE(exists("/A/B/D"));
}
{
SCOPED_TRACE("Recursive Remove From Committed and Uncommitted states");
int create_zxid = ++zxid;
auto ops = prepare_create_tree();
/// First create nodes
const auto create_request = std::make_shared<ZooKeeperMultiRequest>(ops, ACLs{});
storage.preprocessRequest(create_request, 1, 0, create_zxid);
auto create_responses = storage.processRequest(create_request, 1, create_zxid);
ASSERT_EQ(create_responses.size(), 1);
ASSERT_TRUE(is_multi_ok(create_responses[0].response));
ASSERT_TRUE(exists("/A"));
ASSERT_TRUE(exists("/A/C"));
ASSERT_TRUE(exists("/A/B"));
ASSERT_TRUE(exists("/A/B/D"));
/// Remove node A/C as a single remove request.
/// Remove all other as remove recursive request.
/// In this case we should list storage to understand the tree topology
/// but ignore already deleted nodes in uncommitted state.
int remove_zxid = ++zxid;
ops = {
zkutil::makeRemoveRequest("/A/C", -1),
zkutil::makeRemoveRecursiveRequest("/A", 3),
};
const auto remove_request = std::make_shared<ZooKeeperMultiRequest>(ops, ACLs{});
storage.preprocessRequest(remove_request, 1, 0, remove_zxid);
auto remove_responses = storage.processRequest(remove_request, 1, remove_zxid);
ASSERT_EQ(remove_responses.size(), 1);
ASSERT_TRUE(is_multi_ok(remove_responses[0].response));
ASSERT_FALSE(exists("/A"));
ASSERT_FALSE(exists("/A/C"));
ASSERT_FALSE(exists("/A/B"));
ASSERT_FALSE(exists("/A/B/D"));
}
}
TYPED_TEST(CoordinationTest, TestRemoveRecursiveWatches)
{
using namespace DB;
using namespace Coordination;
using Storage = typename TestFixture::Storage;
ChangelogDirTest rocks("./rocksdb");
this->setRocksDBDirectory("./rocksdb");
Storage storage{500, "", this->keeper_context};
int zxid = 0;
const auto create = [&](const String & path, int create_mode)
{
int new_zxid = ++zxid;
const auto create_request = std::make_shared<ZooKeeperCreateRequest>();
create_request->path = path;
create_request->is_ephemeral = create_mode == zkutil::CreateMode::Ephemeral || create_mode == zkutil::CreateMode::EphemeralSequential;
create_request->is_sequential = create_mode == zkutil::CreateMode::PersistentSequential || create_mode == zkutil::CreateMode::EphemeralSequential;
storage.preprocessRequest(create_request, 1, 0, new_zxid);
auto responses = storage.processRequest(create_request, 1, new_zxid);
EXPECT_EQ(responses.size(), 1);
EXPECT_EQ(responses[0].response->error, Coordination::Error::ZOK) << "Failed to create " << path;
};
const auto add_watch = [&](const String & path)
{
int new_zxid = ++zxid;
const auto exists_request = std::make_shared<ZooKeeperExistsRequest>();
exists_request->path = path;
exists_request->has_watch = true;
storage.preprocessRequest(exists_request, 1, 0, new_zxid);
auto responses = storage.processRequest(exists_request, 1, new_zxid);
EXPECT_EQ(responses.size(), 1);
EXPECT_EQ(responses[0].response->error, Coordination::Error::ZOK);
};
const auto add_list_watch = [&](const String & path)
{
int new_zxid = ++zxid;
const auto list_request = std::make_shared<ZooKeeperListRequest>();
list_request->path = path;
list_request->has_watch = true;
storage.preprocessRequest(list_request, 1, 0, new_zxid);
auto responses = storage.processRequest(list_request, 1, new_zxid);
EXPECT_EQ(responses.size(), 1);
EXPECT_EQ(responses[0].response->error, Coordination::Error::ZOK);
};
create("/A", zkutil::CreateMode::Persistent);
create("/A/B", zkutil::CreateMode::Persistent);
create("/A/C", zkutil::CreateMode::Ephemeral);
create("/A/B/D", zkutil::CreateMode::Ephemeral);
add_watch("/A");
add_watch("/A/B");
add_watch("/A/C");
add_watch("/A/B/D");
add_list_watch("/A");
add_list_watch("/A/B");
ASSERT_EQ(storage.watches.size(), 4);
ASSERT_EQ(storage.list_watches.size(), 2);
int new_zxid = ++zxid;
auto remove_request = std::make_shared<ZooKeeperRemoveRecursiveRequest>();
remove_request->path = "/A";
remove_request->remove_nodes_limit = 4;
storage.preprocessRequest(remove_request, 1, 0, new_zxid);
auto responses = storage.processRequest(remove_request, 1, new_zxid);
ASSERT_EQ(responses.size(), 7);
for (size_t i = 0; i < 7; ++i)
{
ASSERT_EQ(responses[i].response->error, Coordination::Error::ZOK);
if (const auto * watch_response = dynamic_cast<Coordination::ZooKeeperWatchResponse *>(responses[i].response.get()))
ASSERT_EQ(watch_response->type, Coordination::Event::DELETED);
}
ASSERT_EQ(storage.watches.size(), 0);
ASSERT_EQ(storage.list_watches.size(), 0);
}
TYPED_TEST(CoordinationTest, TestRemoveRecursiveAcls)
{
using namespace DB;
using namespace Coordination;
using Storage = typename TestFixture::Storage;
ChangelogDirTest rocks("./rocksdb");
this->setRocksDBDirectory("./rocksdb");
Storage storage{500, "", this->keeper_context};
int zxid = 0;
{
int new_zxid = ++zxid;
String user_auth_data = "test_user:test_password";
const auto auth_request = std::make_shared<ZooKeeperAuthRequest>();
auth_request->scheme = "digest";
auth_request->data = user_auth_data;
storage.preprocessRequest(auth_request, 1, 0, new_zxid);
auto responses = storage.processRequest(auth_request, 1, new_zxid);
EXPECT_EQ(responses.size(), 1);
EXPECT_EQ(responses[0].response->error, Coordination::Error::ZOK) << "Failed to add auth to session";
}
const auto create = [&](const String & path)
{
int new_zxid = ++zxid;
const auto create_request = std::make_shared<ZooKeeperCreateRequest>();
create_request->path = path;
create_request->acls = {{.permissions = ACL::Create, .scheme = "auth", .id = ""}};
storage.preprocessRequest(create_request, 1, 0, new_zxid);
auto responses = storage.processRequest(create_request, 1, new_zxid);
EXPECT_EQ(responses.size(), 1);
EXPECT_EQ(responses[0].response->error, Coordination::Error::ZOK) << "Failed to create " << path;
};
/// Add nodes with only Create ACL
create("/A");
create("/A/B");
create("/A/C");
create("/A/B/D");
{
int new_zxid = ++zxid;
auto remove_request = std::make_shared<ZooKeeperRemoveRecursiveRequest>();
remove_request->path = "/A";
remove_request->remove_nodes_limit = 4;
storage.preprocessRequest(remove_request, 1, 0, new_zxid);
auto responses = storage.processRequest(remove_request, 1, new_zxid);
EXPECT_EQ(responses.size(), 1);
EXPECT_EQ(responses[0].response->error, Coordination::Error::ZNOAUTH);
}
}
/// INSTANTIATE_TEST_SUITE_P(CoordinationTestSuite,
/// CoordinationTest,
/// ::testing::ValuesIn(std::initializer_list<CompressionParam>{CompressionParam{true, ".zstd"}, CompressionParam{false, ""}}));

View File

@ -20,6 +20,9 @@ static constexpr auto DBMS_DEFAULT_POLL_INTERVAL = 10;
/// The size of the I/O buffer by default.
static constexpr auto DBMS_DEFAULT_BUFFER_SIZE = 1048576ULL;
/// The initial size of adaptive I/O buffer by default.
static constexpr auto DBMS_DEFAULT_INITIAL_ADAPTIVE_BUFFER_SIZE = 16384ULL;
static constexpr auto PADDING_FOR_SIMD = 64;
/** Which blocks by default read the data (by number of rows).
@ -40,7 +43,7 @@ static constexpr auto SHOW_CHARS_ON_SYNTAX_ERROR = ptrdiff_t(160);
/// each period reduces the error counter by 2 times
/// too short a period can cause errors to disappear immediately after creation.
static constexpr auto DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_DECREASE_ERROR_PERIOD = 60;
/// replica error max cap, this is to prevent replica from accumulating too many errors and taking to long to recover.
/// replica error max cap, this is to prevent replica from accumulating too many errors and taking too long to recover.
static constexpr auto DBMS_CONNECTION_POOL_WITH_FAILOVER_MAX_ERROR_COUNT = 1000;
/// The boundary on which the blocks for asynchronous file operations should be aligned.

View File

@ -148,6 +148,7 @@ namespace DB
M(Bool, storage_metadata_write_full_object_key, false, "Write disk metadata files with VERSION_FULL_OBJECT_KEY format", 0) \
M(UInt64, max_materialized_views_count_for_table, 0, "A limit on the number of materialized views attached to a table.", 0) \
M(UInt32, max_database_replicated_create_table_thread_pool_size, 1, "The number of threads to create tables during replica recovery in DatabaseReplicated. Zero means number of threads equal number of cores.", 0) \
M(Bool, database_replicated_allow_detach_permanently, true, "Allow detaching tables permanently in Replicated databases", 0) \
M(Bool, format_alter_operations_with_parentheses, false, "If enabled, each operation in alter queries will be surrounded with parentheses in formatted queries to make them less ambiguous.", 0) \
M(String, default_replica_path, "/clickhouse/tables/{uuid}/{shard}", "The path to the table in ZooKeeper", 0) \
M(String, default_replica_name, "{replica}", "The replica name in ZooKeeper", 0) \
@ -169,6 +170,7 @@ namespace DB
M(Bool, prepare_system_log_tables_on_startup, false, "If true, ClickHouse creates all configured `system.*_log` tables before the startup. It can be helpful if some startup scripts depend on these tables.", 0) \
M(Double, gwp_asan_force_sample_probability, 0.0003, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \
M(UInt64, config_reload_interval_ms, 2000, "How often clickhouse will reload config and check for new changes", 0) \
M(UInt64, memory_worker_period_ms, 0, "Tick period of background memory worker which corrects memory tracker memory usages and cleans up unused pages during higher memory usage. If set to 0, default value will be used depending on the memory usage source", 0) \
M(Bool, disable_insertion_and_mutation, false, "Disable all insert/alter/delete queries. This setting will be enabled if someone needs read-only nodes to prevent insertion and mutation affect reading performance.", 0)
/// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp

View File

@ -923,6 +923,9 @@ class IColumn;
M(Bool, implicit_transaction, false, "If enabled and not already inside a transaction, wraps the query inside a full transaction (begin + commit or rollback)", 0) \
M(UInt64, grace_hash_join_initial_buckets, 1, "Initial number of grace hash join buckets", 0) \
M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \
M(Int32, join_to_sort_minimum_perkey_rows, 40, "The lower limit of per-key average rows in the right table to determine whether to rerange the right table by key in left or inner join. This setting ensures that the optimization is not applied for sparse table keys", 0) \
M(Int32, join_to_sort_maximum_table_rows, 10000, "The maximum number of rows in the right table to determine whether to rerange the right table by key in left or inner join.", 0) \
M(Bool, allow_experimental_join_right_table_sorting, false, "If it is set to true, and the conditions of `join_to_sort_minimum_perkey_rows` and `join_to_sort_maximum_table_rows` are met, rerange the right table by key to improve the performance in left or inner hash join.", 0) \
M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \
M(Bool, use_hive_partitioning, false, "Allows to use hive partitioning for File, URL, S3, AzureBlobStorage and HDFS engines.", 0)\
\
@ -945,6 +948,7 @@ class IColumn;
M(Bool, parallel_replicas_prefer_local_join, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN.", 0) \
M(UInt64, parallel_replicas_mark_segment_size, 128, "Parts virtually divided into segments to be distributed between replicas for parallel reading. This setting controls the size of these segments. Not recommended to change until you're absolutely sure in what you're doing", 0) \
M(Bool, allow_archive_path_syntax, true, "File/S3 engines/table function will parse paths with '::' as '<archive> :: <file>' if archive has correct extension", 0) \
M(Bool, parallel_replicas_local_plan, false, "Build local plan for local replica", 0) \
\
M(Bool, allow_experimental_inverted_index, false, "If it is set to true, allow to use experimental inverted index.", 0) \
M(Bool, allow_experimental_full_text_index, false, "If it is set to true, allow to use experimental full-text index.", 0) \
@ -1140,6 +1144,7 @@ class IColumn;
M(Bool, input_format_try_infer_variants, false, "Try to infer the Variant type in text formats when there is more than one possible type for column/array elements", 0) \
M(Bool, type_json_skip_duplicated_paths, false, "When enabled, during parsing JSON object into JSON type duplicated paths will be ignored and only the first one will be inserted instead of an exception", 0) \
M(UInt64, input_format_json_max_depth, 1000, "Maximum depth of a field in JSON. This is not a strict limit, it does not have to be applied precisely.", 0) \
M(Bool, input_format_json_empty_as_default, false, "Treat empty fields in JSON input as default values.", 0) \
M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \
M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \
M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \

View File

@ -71,6 +71,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
},
{"24.9",
{
{"input_format_json_empty_as_default", false, false, "Added new setting to allow to treat empty fields in JSON input as default values."},
{"input_format_try_infer_variants", false, false, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"},
{"join_output_by_rowlist_perkey_rows_threshold", 0, 5, "The lower limit of per-key average rows in the right table to determine whether to output by row list in hash join."},
{"create_if_not_exists", false, false, "New setting."},
@ -79,11 +80,15 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"output_format_identifier_quoting_style", "Backticks", "Backticks", "New setting."},
{"database_replicated_allow_replicated_engine_arguments", 1, 0, "Don't allow explicit arguments by default"},
{"database_replicated_allow_explicit_uuid", 0, 0, "Added a new setting to disallow explicitly specifying table UUID"},
{"parallel_replicas_local_plan", false, false, "Use local plan for local replica in a query with parallel replicas"},
{"join_to_sort_minimum_perkey_rows", 0, 40, "The lower limit of per-key average rows in the right table to determine whether to rerange the right table by key in left or inner join. This setting ensures that the optimization is not applied for sparse table keys"},
{"join_to_sort_maximum_table_rows", 0, 10000, "The maximum number of rows in the right table to determine whether to rerange the right table by key in left or inner join"},
{"allow_experimental_join_right_table_sorting", false, false, "If it is set to true, and the conditions of `join_to_sort_minimum_perkey_rows` and `join_to_sort_maximum_table_rows` are met, rerange the right table by key to improve the performance in left or inner hash join"}
}
},
{"24.8",
{
{"rows_before_aggregation", true, true, "Provide exact value for rows_before_aggregation statistic, represents the number of rows read before aggregation"},
{"rows_before_aggregation", false, false, "Provide exact value for rows_before_aggregation statistic, represents the number of rows read before aggregation"},
{"restore_replace_external_table_functions_to_null", false, false, "New setting."},
{"restore_replace_external_engines_to_null", false, false, "New setting."},
{"input_format_json_max_depth", 1000000, 1000, "It was unlimited in previous versions, but that was unsafe."},
@ -99,7 +104,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"use_json_alias_for_old_object_type", true, false, "Use JSON type alias to create new JSON type"},
{"type_json_skip_duplicated_paths", false, false, "Allow to skip duplicated paths during JSON parsing"},
{"allow_experimental_vector_similarity_index", false, false, "Added new setting to allow experimental vector similarity indexes"},
{"input_format_try_infer_datetimes_only_datetime64", true, false, "Allow to infer DateTime instead of DateTime64 in data formats"}
{"input_format_try_infer_datetimes_only_datetime64", true, false, "Allow to infer DateTime instead of DateTime64 in data formats"},
}
},
{"24.7",

View File

@ -420,6 +420,21 @@ bool ISerialization::isEphemeralSubcolumn(const DB::ISerialization::SubstreamPat
return path[last_elem].type == Substream::VariantElementNullMap;
}
bool ISerialization::isDynamicSubcolumn(const DB::ISerialization::SubstreamPath & path, size_t prefix_len)
{
if (prefix_len == 0 || prefix_len > path.size())
return false;
for (size_t i = 0; i != prefix_len; ++i)
{
if (path[i].type == SubstreamType::DynamicData || path[i].type == SubstreamType::DynamicStructure
|| path[i].type == SubstreamType::ObjectData || path[i].type == SubstreamType::ObjectStructure)
return true;
}
return false;
}
ISerialization::SubstreamData ISerialization::createFromPath(const SubstreamPath & path, size_t prefix_len)
{
assert(prefix_len <= path.size());

View File

@ -457,6 +457,9 @@ public:
/// for writing/reading data. For example, it's a null-map subcolumn of Variant type (it's always constructed from discriminators);.
static bool isEphemeralSubcolumn(const SubstreamPath & path, size_t prefix_len);
/// Returns true if stream with specified path corresponds to dynamic subcolumn.
static bool isDynamicSubcolumn(const SubstreamPath & path, size_t prefix_len);
protected:
template <typename State, typename StatePtr>
State * checkAndGetState(const StatePtr & state) const;

View File

@ -11,6 +11,7 @@
#include <IO/WriteBufferFromString.h>
#include <Formats/FormatSettings.h>
#include <Formats/JSONUtils.h>
namespace DB
{
@ -615,28 +616,49 @@ void SerializationArray::serializeTextJSONPretty(const IColumn & column, size_t
}
void SerializationArray::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
template <typename ReturnType>
ReturnType SerializationArray::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeTextImpl(column, istr,
[&](IColumn & nested_column)
auto deserialize_nested = [&settings, this](IColumn & nested_column, ReadBuffer & buf) -> ReturnType
{
if constexpr (std::is_same_v<ReturnType, void>)
{
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column))
SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, istr, settings, nested);
SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested);
else
nested->deserializeTextJSON(nested_column, istr, settings);
nested->deserializeTextJSON(nested_column, buf, settings);
}
else
{
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column))
return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested);
return nested->tryDeserializeTextJSON(nested_column, buf, settings);
}
};
if (settings.json.empty_as_default)
return deserializeTextImpl<ReturnType>(column, istr,
[&deserialize_nested, &istr](IColumn & nested_column) -> ReturnType
{
return JSONUtils::deserializeEmpyStringAsDefaultOrNested<ReturnType>(nested_column, istr, deserialize_nested);
}, false);
else
return deserializeTextImpl<ReturnType>(column, istr,
[&deserialize_nested, &istr](IColumn & nested_column) -> ReturnType
{
return deserialize_nested(nested_column, istr);
}, false);
}
void SerializationArray::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeTextJSONImpl<void>(column, istr, settings);
}
bool SerializationArray::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
auto read_nested = [&](IColumn & nested_column)
{
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column))
return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, istr, settings, nested);
return nested->tryDeserializeTextJSON(nested_column, istr, settings);
};
return deserializeTextImpl<bool>(column, istr, std::move(read_nested), false);
return deserializeTextJSONImpl<bool>(column, istr, settings);
}

View File

@ -82,6 +82,10 @@ public:
SerializationPtr create(const SerializationPtr & prev) const override;
ColumnPtr create(const ColumnPtr & prev) const override;
};
private:
template <typename ReturnType>
ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const;
};
}

View File

@ -6,6 +6,7 @@
#include <Columns/ColumnMap.h>
#include <Core/Field.h>
#include <Formats/FormatSettings.h>
#include <Formats/JSONUtils.h>
#include <Common/assert_cast.h>
#include <Common/quoteString.h>
#include <IO/WriteHelpers.h>
@ -316,28 +317,52 @@ void SerializationMap::serializeTextJSONPretty(const IColumn & column, size_t ro
}
void SerializationMap::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
template <typename ReturnType>
ReturnType SerializationMap::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeTextImpl(column, istr,
[&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn)
auto deserialize_nested = [&settings](IColumn & subcolumn, ReadBuffer & buf, const SerializationPtr & subcolumn_serialization) -> ReturnType
{
if constexpr (std::is_same_v<ReturnType, void>)
{
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn))
SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(subcolumn, buf, settings, subcolumn_serialization);
else
subcolumn_serialization->deserializeTextJSON(subcolumn, buf, settings);
});
}
bool SerializationMap::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
auto reader = [&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn)
else
{
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn))
return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(subcolumn, buf, settings, subcolumn_serialization);
return subcolumn_serialization->tryDeserializeTextJSON(subcolumn, buf, settings);
}
};
return deserializeTextImpl<bool>(column, istr, reader);
if (settings.json.empty_as_default)
return deserializeTextImpl<ReturnType>(column, istr,
[&deserialize_nested](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) -> ReturnType
{
return JSONUtils::deserializeEmpyStringAsDefaultOrNested<ReturnType>(subcolumn, buf,
[&deserialize_nested, &subcolumn_serialization](IColumn & subcolumn_, ReadBuffer & buf_) -> ReturnType
{
return deserialize_nested(subcolumn_, buf_, subcolumn_serialization);
});
});
else
return deserializeTextImpl<ReturnType>(column, istr,
[&deserialize_nested](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) -> ReturnType
{
return deserialize_nested(subcolumn, buf, subcolumn_serialization);
});
}
void SerializationMap::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeTextJSONImpl<void>(column, istr, settings);
}
bool SerializationMap::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
return deserializeTextJSONImpl<bool>(column, istr, settings);
}
void SerializationMap::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const

View File

@ -74,6 +74,9 @@ private:
template <typename ReturnType = void, typename Reader>
ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && reader) const;
template <typename ReturnType>
ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const;
};
}

View File

@ -5,6 +5,7 @@
#include <Core/Field.h>
#include <Columns/ColumnTuple.h>
#include <Common/assert_cast.h>
#include <Formats/JSONUtils.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <IO/ReadBufferFromString.h>
@ -313,27 +314,9 @@ void SerializationTuple::serializeTextJSONPretty(const IColumn & column, size_t
}
template <typename ReturnType>
ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
ReturnType SerializationTuple::deserializeTupleJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, auto && deserialize_element) const
{
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
auto deserialize_element = [&](IColumn & element_column, size_t element_pos)
{
if constexpr (throw_exception)
{
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(element_column))
SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(element_column, istr, settings, elems[element_pos]);
else
elems[element_pos]->deserializeTextJSON(element_column, istr, settings);
return true;
}
else
{
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(element_column))
return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(element_column, istr, settings, elems[element_pos]);
return elems[element_pos]->tryDeserializeTextJSON(element_column, istr, settings);
}
};
static constexpr auto throw_exception = std::is_same_v<ReturnType, void>;
if (settings.json.read_named_tuples_as_objects
&& have_explicit_names)
@ -506,12 +489,51 @@ ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuf
}
}
void SerializationTuple::deserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const
template <typename ReturnType>
ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeTextJSONImpl(column, istr, settings);
auto deserialize_nested = [&settings](IColumn & nested_column, ReadBuffer & buf, const SerializationPtr & nested_column_serialization) -> ReturnType
{
if constexpr (std::is_same_v<ReturnType, void>)
{
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column))
SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested_column_serialization);
else
nested_column_serialization->deserializeTextJSON(nested_column, buf, settings);
}
else
{
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column))
return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested_column_serialization);
else
return nested_column_serialization->tryDeserializeTextJSON(nested_column, buf, settings);
}
};
if (settings.json.empty_as_default)
return deserializeTupleJSONImpl<ReturnType>(column, istr, settings,
[&deserialize_nested, &istr, this](IColumn & nested_column, size_t element_pos) -> ReturnType
{
return JSONUtils::deserializeEmpyStringAsDefaultOrNested<ReturnType>(nested_column, istr,
[&deserialize_nested, element_pos, this](IColumn & nested_column_, ReadBuffer & buf) -> ReturnType
{
return deserialize_nested(nested_column_, buf, elems[element_pos]);
});
});
else
return deserializeTupleJSONImpl<ReturnType>(column, istr, settings,
[&deserialize_nested, &istr, this](IColumn & nested_column, size_t element_pos) -> ReturnType
{
return deserialize_nested(nested_column, istr, elems[element_pos]);
});
}
bool SerializationTuple::tryDeserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const
void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeTextJSONImpl<void>(column, istr, settings);
}
bool SerializationTuple::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
return deserializeTextJSONImpl<bool>(column, istr, settings);
}

View File

@ -81,7 +81,10 @@ private:
template <typename ReturnType = void>
ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const;
template <typename ReturnType = void>
template <typename ReturnType>
ReturnType deserializeTupleJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, auto && deserialize_element) const;
template <typename ReturnType>
ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const;
template <typename ReturnType = void>

View File

@ -197,8 +197,9 @@ void DatabaseAtomic::renameTable(ContextPtr local_context, const String & table_
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Moving tables between databases of different engines is not supported");
}
if (exchange && !supportsAtomicRename())
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "RENAME EXCHANGE is not supported");
std::string message;
if (exchange && !supportsAtomicRename(&message))
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "RENAME EXCHANGE is not supported because exchanging files is not supported by the OS ({})", message);
waitDatabaseStarted();

View File

@ -63,6 +63,7 @@ namespace ErrorCodes
extern const int NO_ACTIVE_REPLICAS;
extern const int CANNOT_GET_REPLICATED_DATABASE_SNAPSHOT;
extern const int CANNOT_RESTORE_TABLE;
extern const int SUPPORT_IS_DISABLED;
}
static constexpr const char * REPLICATED_DATABASE_MARK = "DatabaseReplicated";
@ -1741,6 +1742,9 @@ void DatabaseReplicated::detachTablePermanently(ContextPtr local_context, const
{
waitDatabaseStarted();
if (!local_context->getServerSettings().database_replicated_allow_detach_permanently)
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Support for DETACH TABLE PERMANENTLY is disabled");
auto txn = local_context->getZooKeeperMetadataTransaction();
assert(!ddl_worker->isCurrentlyActive() || txn);
if (txn && txn->isInitialQuery())

View File

@ -339,7 +339,15 @@ DiskLocal::writeFile(const String & path, size_t buf_size, WriteMode mode, const
{
int flags = (mode == WriteMode::Append) ? (O_APPEND | O_CREAT | O_WRONLY) : -1;
return std::make_unique<WriteBufferFromFile>(
fs::path(disk_path) / path, buf_size, flags, settings.local_throttler);
fs::path(disk_path) / path,
buf_size,
flags,
settings.local_throttler,
0666,
nullptr,
0,
settings.use_adaptive_write_buffer,
settings.adaptive_write_buffer_initial_size);
}
std::vector<String> DiskLocal::getBlobPath(const String & path) const

View File

@ -59,7 +59,7 @@ WriteBufferFromAzureBlobStorage::WriteBufferFromAzureBlobStorage(
const WriteSettings & write_settings_,
std::shared_ptr<const AzureBlobStorage::RequestSettings> settings_,
ThreadPoolCallbackRunnerUnsafe<void> schedule_)
: WriteBufferFromFileBase(buf_size_, nullptr, 0)
: WriteBufferFromFileBase(std::min(buf_size_, static_cast<size_t>(DBMS_DEFAULT_BUFFER_SIZE)), nullptr, 0)
, log(getLogger("WriteBufferFromAzureBlobStorage"))
, buffer_allocation_policy(createBufferAllocationPolicy(*settings_))
, max_single_part_upload_size(settings_->max_single_part_upload_size)
@ -244,11 +244,21 @@ void WriteBufferFromAzureBlobStorage::allocateBuffer()
buffer_allocation_policy->nextBuffer();
chassert(0 == hidden_size);
auto size = buffer_allocation_policy->getBufferSize();
/// First buffer was already allocated in BufferWithOwnMemory constructor with buffer size provided in constructor.
/// It will be reallocated in subsequent nextImpl calls up to the desired buffer size from buffer_allocation_policy.
if (buffer_allocation_policy->getBufferNumber() == 1)
size = std::min(size_t(DBMS_DEFAULT_BUFFER_SIZE), size);
{
/// Reduce memory size if initial size was larger then desired size from buffer_allocation_policy.
/// Usually it doesn't happen but we have it in unit tests.
if (memory.size() > buffer_allocation_policy->getBufferSize())
{
memory.resize(buffer_allocation_policy->getBufferSize());
WriteBuffer::set(memory.data(), memory.size());
}
return;
}
auto size = buffer_allocation_policy->getBufferSize();
memory = Memory(size);
WriteBuffer::set(memory.data(), memory.size());
}

View File

@ -289,7 +289,7 @@ std::unique_ptr<WriteBufferFromFileBase> AzureObjectStorage::writeObject( /// NO
return std::make_unique<WriteBufferFromAzureBlobStorage>(
client.get(),
object.remote_path,
buf_size,
write_settings.use_adaptive_write_buffer ? write_settings.adaptive_write_buffer_initial_size : buf_size,
patchSettings(write_settings),
settings.get(),
std::move(scheduler));

View File

@ -282,7 +282,7 @@ std::unique_ptr<WriteBufferFromFileBase> S3ObjectStorage::writeObject( /// NOLIN
client.get(),
uri.bucket,
object.remote_path,
buf_size,
write_settings.use_adaptive_write_buffer ? write_settings.adaptive_write_buffer_initial_size : buf_size,
request_settings,
std::move(blob_storage_log),
attributes,

View File

@ -152,6 +152,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
format_settings.json.try_infer_objects_as_tuples = settings.input_format_json_try_infer_named_tuples_from_objects;
format_settings.json.throw_on_bad_escape_sequence = settings.input_format_json_throw_on_bad_escape_sequence;
format_settings.json.ignore_unnecessary_fields = settings.input_format_json_ignore_unnecessary_fields;
format_settings.json.empty_as_default = settings.input_format_json_empty_as_default;
format_settings.json.type_json_skip_duplicated_paths = settings.type_json_skip_duplicated_paths;
format_settings.null_as_default = settings.input_format_null_as_default;
format_settings.force_null_for_omitted_fields = settings.input_format_force_null_for_omitted_fields;

View File

@ -237,6 +237,7 @@ struct FormatSettings
bool infer_incomplete_types_as_strings = true;
bool throw_on_bad_escape_sequence = true;
bool ignore_unnecessary_fields = true;
bool empty_as_default = false;
bool type_json_skip_duplicated_paths = false;
} json{};

View File

@ -2,12 +2,14 @@
#include <Formats/JSONUtils.h>
#include <Formats/ReadSchemaUtils.h>
#include <Formats/EscapingRuleUtils.h>
#include <IO/PeekableReadBuffer.h>
#include <IO/ReadBufferFromString.h>
#include <IO/WriteBufferValidUTF8.h>
#include <DataTypes/Serializations/SerializationNullable.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeObjectDeprecated.h>
#include <DataTypes/DataTypeFactory.h>
#include <Common/assert_cast.h>
#include <base/find_symbols.h>
@ -286,11 +288,19 @@ namespace JSONUtils
return true;
}
auto deserialize = [as_nullable, &format_settings, &serialization](IColumn & column_, ReadBuffer & buf) -> bool
{
if (as_nullable)
return SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(column, in, format_settings, serialization);
return SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(column_, buf, format_settings, serialization);
serialization->deserializeTextJSON(column, in, format_settings);
serialization->deserializeTextJSON(column_, buf, format_settings);
return true;
};
if (format_settings.json.empty_as_default)
return JSONUtils::deserializeEmpyStringAsDefaultOrNested<bool, false>(column, in, deserialize);
else
return deserialize(column, in);
}
catch (Exception & e)
{
@ -920,6 +930,78 @@ namespace JSONUtils
}
}
template <typename ReturnType, bool default_column_return_value>
ReturnType deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize<ReturnType> & deserialize_nested)
{
static constexpr auto throw_exception = std::is_same_v<ReturnType, void>;
static constexpr auto EMPTY_STRING = "\"\"";
static constexpr auto EMPTY_STRING_LENGTH = std::string_view(EMPTY_STRING).length();
if (istr.eof() || *istr.position() != EMPTY_STRING[0])
return deserialize_nested(column, istr);
auto do_deserialize = [](IColumn & column_, ReadBuffer & buf, auto && check_for_empty_string, auto && deserialize) -> ReturnType
{
if (check_for_empty_string(buf))
{
column_.insertDefault();
return ReturnType(default_column_return_value);
}
return deserialize(column_, buf);
};
if (istr.available() >= EMPTY_STRING_LENGTH)
{
/// We have enough data in buffer to check if we have an empty string.
auto check_for_empty_string = [](ReadBuffer & buf) -> bool
{
auto * pos = buf.position();
if (checkString(EMPTY_STRING, buf))
return true;
buf.position() = pos;
return false;
};
return do_deserialize(column, istr, check_for_empty_string, deserialize_nested);
}
/// We don't have enough data in buffer to check if we have an empty string.
/// Use PeekableReadBuffer to make a checkpoint before checking for an
/// empty string and rollback if check was failed.
auto check_for_empty_string = [](ReadBuffer & buf) -> bool
{
auto & peekable_buf = assert_cast<PeekableReadBuffer &>(buf);
peekable_buf.setCheckpoint();
SCOPE_EXIT(peekable_buf.dropCheckpoint());
if (checkString(EMPTY_STRING, peekable_buf))
return true;
peekable_buf.rollbackToCheckpoint();
return false;
};
auto deserialize_nested_with_check = [&deserialize_nested](IColumn & column_, ReadBuffer & buf) -> ReturnType
{
auto & peekable_buf = assert_cast<PeekableReadBuffer &>(buf);
if constexpr (throw_exception)
deserialize_nested(column_, peekable_buf);
else if (!deserialize_nested(column_, peekable_buf))
return ReturnType(false);
if (unlikely(peekable_buf.hasUnreadData()))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect state while parsing JSON: PeekableReadBuffer has unread data in own memory: {}", String(peekable_buf.position(), peekable_buf.available()));
return ReturnType(true);
};
PeekableReadBuffer peekable_buf(istr, true);
return do_deserialize(column, peekable_buf, check_for_empty_string, deserialize_nested_with_check);
}
template void deserializeEmpyStringAsDefaultOrNested<void, true>(IColumn & column, ReadBuffer & istr, const NestedDeserialize<void> & deserialize_nested);
template bool deserializeEmpyStringAsDefaultOrNested<bool, true>(IColumn & column, ReadBuffer & istr, const NestedDeserialize<bool> & deserialize_nested);
template bool deserializeEmpyStringAsDefaultOrNested<bool, false>(IColumn & column, ReadBuffer & istr, const NestedDeserialize<bool> & deserialize_nested);
}
}

View File

@ -8,6 +8,7 @@
#include <IO/Progress.h>
#include <Core/NamesAndTypes.h>
#include <Common/Stopwatch.h>
#include <functional>
#include <utility>
namespace DB
@ -146,6 +147,16 @@ namespace JSONUtils
bool skipUntilFieldInObject(ReadBuffer & in, const String & desired_field_name, const FormatSettings::JSON & settings);
void skipTheRestOfObject(ReadBuffer & in, const FormatSettings::JSON & settings);
template <typename ReturnType>
using NestedDeserialize = std::function<ReturnType(IColumn &, ReadBuffer &)>;
template <typename ReturnType, bool default_column_return_value = true>
ReturnType deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize<ReturnType> & deserialize_nested);
extern template void deserializeEmpyStringAsDefaultOrNested<void, true>(IColumn & column, ReadBuffer & istr, const NestedDeserialize<void> & deserialize_nested);
extern template bool deserializeEmpyStringAsDefaultOrNested<bool, true>(IColumn & column, ReadBuffer & istr, const NestedDeserialize<bool> & deserialize_nested);
extern template bool deserializeEmpyStringAsDefaultOrNested<bool, false>(IColumn & column, ReadBuffer & istr, const NestedDeserialize<bool> & deserialize_nested);
}
}

View File

@ -2,7 +2,7 @@
#include <Functions/IFunction.h>
#include <Functions/FunctionFactory.h>
#include <Columns/ColumnLowCardinality.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <Columns/ColumnSparse.h>
namespace DB
{
@ -18,11 +18,6 @@ public:
return std::make_shared<FunctionMaterialize>();
}
bool useDefaultImplementationForNulls() const override
{
return false;
}
/// Get the function name.
String getName() const override
{
@ -34,8 +29,16 @@ public:
return true;
}
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForNothing() const override { return false; }
bool useDefaultImplementationForConstants() const override { return false; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
bool useDefaultImplementationForSparseColumns() const override { return false; }
bool isSuitableForConstantFolding() const override { return false; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
@ -52,7 +55,7 @@ public:
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
{
return arguments[0].column->convertToFullColumnIfConst();
return recursiveRemoveSparse(arguments[0].column->convertToFullColumnIfConst());
}
bool hasInformationAboutMonotonicity() const override { return true; }

View File

@ -124,7 +124,7 @@ public:
std::string_view sqid = col_non_const->getDataAt(i).toView();
std::vector<UInt64> integers = sqids.decode(String(sqid));
res_nested_data.insert(integers.begin(), integers.end());
res_offsets_data.push_back(integers.size());
res_offsets_data.push_back(res_offsets_data.back() + integers.size());
}
}
else

View File

@ -36,7 +36,7 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax)
/// Case when bucket name represented in domain name of S3 URL.
/// E.g. (https://bucket-name.s3.region.amazonaws.com/key)
/// https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html#virtual-hosted-style-access
static const RE2 virtual_hosted_style_pattern(R"((.+)\.(s3express[\-a-z0-9]+|s3|cos|obs|oss|eos)([.\-][a-z0-9\-.:]+))");
static const RE2 virtual_hosted_style_pattern(R"(([^.]+)\.(s3express[\-a-z0-9]+|s3|cos|obs|.*oss[^\/]*|eos)([.\-][a-z0-9\-.:]+))");
/// Case when AWS Private Link Interface is being used
/// E.g. (bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w.s3.us-east-1.vpce.amazonaws.com/bucket-name/key)

View File

@ -32,8 +32,10 @@ WriteBufferFromFile::WriteBufferFromFile(
ThrottlerPtr throttler_,
mode_t mode,
char * existing_memory,
size_t alignment)
: WriteBufferFromFileDescriptor(-1, buf_size, existing_memory, throttler_, alignment, file_name_)
size_t alignment,
bool use_adaptive_buffer_size_,
size_t adaptive_buffer_initial_size)
: WriteBufferFromFileDescriptor(-1, buf_size, existing_memory, throttler_, alignment, file_name_, use_adaptive_buffer_size_, adaptive_buffer_initial_size)
{
ProfileEvents::increment(ProfileEvents::FileOpen);
@ -66,8 +68,10 @@ WriteBufferFromFile::WriteBufferFromFile(
size_t buf_size,
ThrottlerPtr throttler_,
char * existing_memory,
size_t alignment)
: WriteBufferFromFileDescriptor(fd_, buf_size, existing_memory, throttler_, alignment, original_file_name)
size_t alignment,
bool use_adaptive_buffer_size_,
size_t adaptive_buffer_initial_size)
: WriteBufferFromFileDescriptor(fd_, buf_size, existing_memory, throttler_, alignment, original_file_name, use_adaptive_buffer_size_, adaptive_buffer_initial_size)
{
fd_ = -1;
}

View File

@ -36,7 +36,9 @@ public:
ThrottlerPtr throttler_ = {},
mode_t mode = 0666,
char * existing_memory = nullptr,
size_t alignment = 0);
size_t alignment = 0,
bool use_adaptive_buffer_size_ = false,
size_t adaptive_buffer_initial_size = DBMS_DEFAULT_INITIAL_ADAPTIVE_BUFFER_SIZE);
/// Use pre-opened file descriptor.
explicit WriteBufferFromFile(
@ -45,7 +47,9 @@ public:
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
ThrottlerPtr throttler_ = {},
char * existing_memory = nullptr,
size_t alignment = 0);
size_t alignment = 0,
bool use_adaptive_buffer_size_ = false,
size_t adaptive_buffer_initial_size = DBMS_DEFAULT_INITIAL_ADAPTIVE_BUFFER_SIZE);
~WriteBufferFromFile() override;

View File

@ -83,6 +83,13 @@ void WriteBufferFromFileDescriptor::nextImpl()
ProfileEvents::increment(ProfileEvents::DiskWriteElapsedMicroseconds, watch.elapsedMicroseconds());
ProfileEvents::increment(ProfileEvents::WriteBufferFromFileDescriptorWriteBytes, bytes_written);
/// Increase buffer size for next data if adaptive buffer size is used and nextImpl was called because of end of buffer.
if (!available() && use_adaptive_buffer_size && memory.size() < adaptive_max_buffer_size)
{
memory.resize(std::min(memory.size() * 2, adaptive_max_buffer_size));
BufferBase::set(memory.data(), memory.size(), 0);
}
}
/// NOTE: This class can be used as a very low-level building block, for example
@ -94,11 +101,15 @@ WriteBufferFromFileDescriptor::WriteBufferFromFileDescriptor(
char * existing_memory,
ThrottlerPtr throttler_,
size_t alignment,
std::string file_name_)
: WriteBufferFromFileBase(buf_size, existing_memory, alignment)
std::string file_name_,
bool use_adaptive_buffer_size_,
size_t adaptive_buffer_initial_size)
: WriteBufferFromFileBase(use_adaptive_buffer_size_ ? adaptive_buffer_initial_size : buf_size, existing_memory, alignment)
, fd(fd_)
, throttler(throttler_)
, file_name(std::move(file_name_))
, use_adaptive_buffer_size(use_adaptive_buffer_size_)
, adaptive_max_buffer_size(buf_size)
{
}
@ -124,6 +135,7 @@ void WriteBufferFromFileDescriptor::finalizeImpl()
return;
}
use_adaptive_buffer_size = false;
next();
}

View File

@ -18,7 +18,9 @@ public:
char * existing_memory = nullptr,
ThrottlerPtr throttler_ = {},
size_t alignment = 0,
std::string file_name_ = "");
std::string file_name_ = "",
bool use_adaptive_buffer_size_ = false,
size_t adaptive_buffer_initial_size = DBMS_DEFAULT_INITIAL_ADAPTIVE_BUFFER_SIZE);
/** Could be used before initialization if needed 'fd' was not passed to constructor.
* It's not possible to change 'fd' during work.
@ -56,6 +58,12 @@ protected:
/// If file has name contains filename, otherwise contains string "(fd=...)"
std::string file_name;
/// If true, the size of internal buffer will be exponentially increased up to
/// adaptive_buffer_max_size after each nextImpl call. It can be used to avoid
/// large buffer allocation when actual size of written data is small.
bool use_adaptive_buffer_size;
size_t adaptive_max_buffer_size;
void finalizeImpl() override;
};

View File

@ -95,7 +95,7 @@ WriteBufferFromS3::WriteBufferFromS3(
std::optional<std::map<String, String>> object_metadata_,
ThreadPoolCallbackRunnerUnsafe<void> schedule_,
const WriteSettings & write_settings_)
: WriteBufferFromFileBase(buf_size_, nullptr, 0)
: WriteBufferFromFileBase(std::min(buf_size_, static_cast<size_t>(DBMS_DEFAULT_BUFFER_SIZE)), nullptr, 0)
, bucket(bucket_)
, key(key_)
, request_settings(request_settings_)
@ -351,9 +351,17 @@ void WriteBufferFromS3::allocateBuffer()
buffer_allocation_policy->nextBuffer();
chassert(0 == hidden_size);
/// First buffer was already allocated in BufferWithOwnMemory constructor with provided in constructor buffer size.
/// It will be reallocated in subsequent nextImpl calls up to the desired buffer size from buffer_allocation_policy.
if (buffer_allocation_policy->getBufferNumber() == 1)
{
allocateFirstBuffer();
/// Reduce memory size if initial size was larger then desired size from buffer_allocation_policy.
/// Usually it doesn't happen but we have it in unit tests.
if (memory.size() > buffer_allocation_policy->getBufferSize())
{
memory.resize(buffer_allocation_policy->getBufferSize());
WriteBuffer::set(memory.data(), memory.size());
}
return;
}
@ -361,14 +369,6 @@ void WriteBufferFromS3::allocateBuffer()
WriteBuffer::set(memory.data(), memory.size());
}
void WriteBufferFromS3::allocateFirstBuffer()
{
const auto max_first_buffer = buffer_allocation_policy->getBufferSize();
const auto size = std::min(size_t(DBMS_DEFAULT_BUFFER_SIZE), max_first_buffer);
memory = Memory(size);
WriteBuffer::set(memory.data(), memory.size());
}
void WriteBufferFromS3::setFakeBufferWhenPreFinalized()
{
WriteBuffer::set(fake_buffer_when_prefinalized, sizeof(fake_buffer_when_prefinalized));

View File

@ -64,7 +64,6 @@ private:
void reallocateFirstBuffer();
void detachBuffer();
void allocateBuffer();
void allocateFirstBuffer();
void setFakeBufferWhenPreFinalized();
S3::UploadPartRequest getUploadRequest(size_t part_number, PartData & data);

View File

@ -54,7 +54,7 @@ inline void WriteBufferValidUTF8::putReplacement()
}
inline void WriteBufferValidUTF8::putValid(char *data, size_t len)
inline void WriteBufferValidUTF8::putValid(const char *data, size_t len)
{
if (len == 0)
return;
@ -149,9 +149,34 @@ void WriteBufferValidUTF8::finalizeImpl()
/// Write all complete sequences from buffer.
nextImpl();
/// If unfinished sequence at end, then write replacement.
/// Handle remaining bytes if we have an incomplete sequence
if (working_buffer.begin() != memory.data())
{
const char * p = memory.data();
while (p < pos)
{
UInt8 len = length_of_utf8_sequence[static_cast<const unsigned char>(*p)];
if (p + len > pos)
{
/// Incomplete sequence. Skip one byte.
putReplacement();
++p;
}
else if (Poco::UTF8Encoding::isLegal(reinterpret_cast<const unsigned char *>(p), len))
{
/// Valid sequence
putValid(p, len);
p += len;
}
else
{
/// Invalid sequence, skip first byte.
putReplacement();
++p;
}
}
}
}
}

Some files were not shown because too many files have changed in this diff Show More