mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-26 17:41:59 +00:00
Merge branch 'ClickHouse:master' into gcddelta-codec
This commit is contained in:
commit
ee0b2e4b39
2
.gitmodules
vendored
2
.gitmodules
vendored
@ -3,7 +3,7 @@
|
||||
url = https://github.com/facebook/zstd
|
||||
[submodule "contrib/lz4"]
|
||||
path = contrib/lz4
|
||||
url = https://github.com/lz4/lz4
|
||||
url = https://github.com/ClickHouse/lz4
|
||||
[submodule "contrib/librdkafka"]
|
||||
path = contrib/librdkafka
|
||||
url = https://github.com/ClickHouse/librdkafka
|
||||
|
2
contrib/arrow
vendored
2
contrib/arrow
vendored
@ -1 +1 @@
|
||||
Subproject commit 1f1b3d35fb6eb73e6492d3afd8a85cde848d174f
|
||||
Subproject commit 1d93838f69a802639ca144ea5704a98e2481810d
|
2
contrib/boost
vendored
2
contrib/boost
vendored
@ -1 +1 @@
|
||||
Subproject commit 063a9372b4ae304e869a5c5724971d0501552731
|
||||
Subproject commit 5a3fb87e67cc67ffadfc1990b3665fc3b260fcf4
|
2
contrib/lz4
vendored
2
contrib/lz4
vendored
@ -1 +1 @@
|
||||
Subproject commit e82198428c8061372d5adef1f9bfff4203f6081e
|
||||
Subproject commit 92ebf1870b9acbefc0e7970409a181954a10ff40
|
@ -13,6 +13,11 @@ add_library (ch_contrib::lz4 ALIAS _lz4)
|
||||
|
||||
target_compile_definitions (_lz4 PUBLIC LZ4_DISABLE_DEPRECATE_WARNINGS=1)
|
||||
target_compile_definitions (_lz4 PUBLIC LZ4_FAST_DEC_LOOP=1)
|
||||
|
||||
if(ARCH_S390X)
|
||||
target_compile_definitions(_lz4 PRIVATE LZ4_STATIC_LINKING_ONLY_ENDIANNESS_INDEPENDENT_OUTPUT)
|
||||
endif()
|
||||
|
||||
if (SANITIZE STREQUAL "undefined")
|
||||
target_compile_options (_lz4 PRIVATE -fno-sanitize=undefined)
|
||||
endif ()
|
||||
|
2
contrib/openldap
vendored
2
contrib/openldap
vendored
@ -1 +1 @@
|
||||
Subproject commit 8688afe6bc95ebcd20edf4578c536362218cb70a
|
||||
Subproject commit 5671b80e369df2caf5f34e02924316205a43c895
|
@ -96,71 +96,82 @@ target_compile_definitions(_lber
|
||||
)
|
||||
|
||||
set(_ldap_srcs
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/bind.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/open.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/result.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/error.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/compare.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/search.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/controls.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/messages.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/references.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/extended.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/cyrus.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/modify.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/add.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/modrdn.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/delete.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/abandon.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/sasl.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/sbind.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/unbind.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/add.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/addentry.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/assertion.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/avl.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/bind.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/cancel.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/charray.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/compare.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/controls.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/cyrus.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/dds.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/delete.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/deref.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/dnssrv.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/error.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/extended.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/fetch.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/filter.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/free.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/sort.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/passwd.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/whoami.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/vc.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/getattr.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/getdn.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/getentry.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/getattr.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/getvalues.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/addentry.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/request.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/os-ip.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/url.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/pagectrl.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/sortctrl.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/vlvctrl.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/init.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/options.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/print.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/string.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/util-int.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/schema.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/charray.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/os-local.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/dnssrv.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/utf-8.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/utf-8-conv.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/tls2.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/tls_o.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/tls_g.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/turn.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/ppolicy.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/dds.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/txn.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/ldap_sync.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/stctrl.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/assertion.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/deref.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/ldifutil.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/ldif.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/fetch.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/lbase64.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/ldap_sync.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/ldif.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/ldifutil.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/messages.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/modify.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/modrdn.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/msctrl.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/open.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/options.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/os-ip.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/os-local.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/pagectrl.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/passwd.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/ppolicy.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/print.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/psearchctrl.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/rdwr.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/references.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/request.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/result.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/rq.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/sasl.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/sbind.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/schema.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/search.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/sort.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/sortctrl.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/stctrl.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/string.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/tavl.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/thr_debug.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/thr_nt.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/thr_posix.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/thr_pth.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/thr_thr.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/threads.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/tls2.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/tls_g.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/tls_o.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/tpool.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/turn.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/txn.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/unbind.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/url.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/utf-8-conv.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/utf-8.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/util-int.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/vc.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/vlvctrl.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/whoami.c"
|
||||
)
|
||||
|
||||
mkversion(ldap)
|
||||
@ -185,43 +196,5 @@ target_compile_definitions(_ldap
|
||||
PRIVATE LDAP_LIBRARY
|
||||
)
|
||||
|
||||
set(_ldap_r_specific_srcs
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/threads.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/rdwr.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/tpool.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/rq.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/thr_posix.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/thr_thr.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/thr_nt.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/thr_pth.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/thr_stub.c"
|
||||
"${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/thr_debug.c"
|
||||
)
|
||||
|
||||
mkversion(ldap_r)
|
||||
|
||||
add_library(_ldap_r
|
||||
${_ldap_r_specific_srcs}
|
||||
${_ldap_srcs}
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/ldap_r-version.c"
|
||||
)
|
||||
|
||||
target_link_libraries(_ldap_r
|
||||
PRIVATE _lber
|
||||
PRIVATE OpenSSL::Crypto OpenSSL::SSL
|
||||
)
|
||||
|
||||
target_include_directories(_ldap_r SYSTEM
|
||||
PUBLIC ${_extra_build_dir}/include
|
||||
PUBLIC "${OPENLDAP_SOURCE_DIR}/include"
|
||||
PRIVATE "${OPENLDAP_SOURCE_DIR}/libraries/libldap_r"
|
||||
PRIVATE "${OPENLDAP_SOURCE_DIR}/libraries/libldap"
|
||||
)
|
||||
|
||||
target_compile_definitions(_ldap_r
|
||||
PRIVATE LDAP_R_COMPILE
|
||||
PRIVATE LDAP_LIBRARY
|
||||
)
|
||||
|
||||
add_library(ch_contrib::ldap ALIAS _ldap_r)
|
||||
add_library(ch_contrib::ldap ALIAS _ldap)
|
||||
add_library(ch_contrib::lber ALIAS _lber)
|
||||
|
@ -20,6 +20,7 @@ echo '/boost/context/*' >> $FILES_TO_CHECKOUT
|
||||
echo '/boost/convert/*' >> $FILES_TO_CHECKOUT
|
||||
echo '/boost/coroutine/*' >> $FILES_TO_CHECKOUT
|
||||
echo '/boost/core/*' >> $FILES_TO_CHECKOUT
|
||||
echo '/boost/describe/*' >> $FILES_TO_CHECKOUT
|
||||
echo '/boost/detail/*' >> $FILES_TO_CHECKOUT
|
||||
echo '/boost/dynamic_bitset/*' >> $FILES_TO_CHECKOUT
|
||||
echo '/boost/exception/*' >> $FILES_TO_CHECKOUT
|
||||
@ -82,4 +83,4 @@ echo '/libs/*' >> $FILES_TO_CHECKOUT
|
||||
|
||||
git config core.sparsecheckout true
|
||||
git checkout $1
|
||||
git read-tree -mu HEAD
|
||||
git read-tree -mu HEAD
|
||||
|
@ -41,7 +41,7 @@ do
|
||||
|
||||
echo "Creating destination table ${table}_${hash}" >&2
|
||||
|
||||
echo "$statement" | clickhouse-client --distributed_ddl_task_timeout=10 $CONNECTION_PARAMETERS || continue
|
||||
echo "$statement" | clickhouse-client --distributed_ddl_task_timeout=10 --receive_timeout=10 --send_timeout=10 $CONNECTION_PARAMETERS || continue
|
||||
|
||||
echo "Creating table system.${table}_sender" >&2
|
||||
|
||||
|
@ -644,7 +644,7 @@ function report
|
||||
rm -r report ||:
|
||||
mkdir report report/tmp ||:
|
||||
|
||||
rm ./*.{rep,svg} test-times.tsv test-dump.tsv unstable.tsv unstable-query-ids.tsv unstable-query-metrics.tsv changed-perf.tsv unstable-tests.tsv unstable-queries.tsv bad-tests.tsv slow-on-client.tsv all-queries.tsv run-errors.tsv ||:
|
||||
rm ./*.{rep,svg} test-times.tsv test-dump.tsv unstable.tsv unstable-query-ids.tsv unstable-query-metrics.tsv changed-perf.tsv unstable-tests.tsv unstable-queries.tsv bad-tests.tsv all-queries.tsv run-errors.tsv ||:
|
||||
|
||||
cat analyze/errors.log >> report/errors.log ||:
|
||||
cat profile-errors.log >> report/errors.log ||:
|
||||
@ -810,12 +810,6 @@ create view total_client_time_per_query as select *
|
||||
from file('analyze/client-times.tsv', TSV,
|
||||
'test text, query_index int, client float, server float');
|
||||
|
||||
create table slow_on_client_report engine File(TSV, 'report/slow-on-client.tsv')
|
||||
as select client, server, round(client/server, 3) p,
|
||||
test, query_display_name
|
||||
from total_client_time_per_query left join query_display_names using (test, query_index)
|
||||
where p > round(1.02, 3) order by p desc;
|
||||
|
||||
create table wall_clock_time_per_test engine Memory as select *
|
||||
from file('wall-clock-times.tsv', TSV, 'test text, real float, user float, system float');
|
||||
|
||||
|
@ -364,20 +364,6 @@ if args.report == "main":
|
||||
]
|
||||
)
|
||||
|
||||
slow_on_client_rows = tsvRows("report/slow-on-client.tsv")
|
||||
error_tests += len(slow_on_client_rows)
|
||||
addSimpleTable(
|
||||
"Slow on Client",
|
||||
["Client time, s", "Server time, s", "Ratio", "Test", "Query"],
|
||||
slow_on_client_rows,
|
||||
)
|
||||
if slow_on_client_rows:
|
||||
errors_explained.append(
|
||||
[
|
||||
f'<a href="#{currentTableAnchor()}">Some queries are taking noticeable time client-side (missing `FORMAT Null`?)</a>'
|
||||
]
|
||||
)
|
||||
|
||||
def add_backward_incompatible():
|
||||
rows = tsvRows("report/partial-queries-report.tsv")
|
||||
if not rows:
|
||||
|
@ -63,7 +63,6 @@ configure
|
||||
# it contains some new settings, but we can safely remove it
|
||||
rm /etc/clickhouse-server/config.d/merge_tree.xml
|
||||
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
|
||||
rm /etc/clickhouse-server/config.d/filesystem_caches_path.xml
|
||||
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
|
||||
|
||||
start
|
||||
@ -94,7 +93,6 @@ sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_defau
|
||||
# it contains some new settings, but we can safely remove it
|
||||
rm /etc/clickhouse-server/config.d/merge_tree.xml
|
||||
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
|
||||
rm /etc/clickhouse-server/config.d/filesystem_caches_path.xml
|
||||
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
|
||||
|
||||
start
|
||||
@ -131,6 +129,7 @@ sudo cat /etc/clickhouse-server/config.d/lost_forever_check.xml \
|
||||
| sed "s|>1<|>0<|g" \
|
||||
> /etc/clickhouse-server/config.d/lost_forever_check.xml.tmp
|
||||
sudo mv /etc/clickhouse-server/config.d/lost_forever_check.xml.tmp /etc/clickhouse-server/config.d/lost_forever_check.xml
|
||||
rm /etc/clickhouse-server/config.d/filesystem_caches_path.xml
|
||||
|
||||
start 500
|
||||
clickhouse-client --query "SELECT 'Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \
|
||||
|
14
docs/changelogs/v23.3.10.5-lts.md
Normal file
14
docs/changelogs/v23.3.10.5-lts.md
Normal file
@ -0,0 +1,14 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.3.10.5-lts (d8737007f9e) FIXME as compared to v23.3.9.55-lts (b9c5c8622d3)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Not-ready Set [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
@ -90,34 +90,117 @@ Process 1 stopped
|
||||
|
||||
## Visual Studio Code integration
|
||||
|
||||
- [CodeLLDB extension](https://github.com/vadimcn/vscode-lldb) is required for visual debugging, the [Command Variable](https://github.com/rioj7/command-variable) extension can help dynamic launches if using [cmake variants](https://github.com/microsoft/vscode-cmake-tools/blob/main/docs/variants.md).
|
||||
- Make sure to set the backend to your llvm installation eg. `"lldb.library": "/usr/lib/x86_64-linux-gnu/liblldb-15.so"`
|
||||
- Launcher:
|
||||
- [CodeLLDB](https://github.com/vadimcn/vscode-lldb) extension is required for visual debugging.
|
||||
- [Command Variable](https://github.com/rioj7/command-variable) extension can help dynamic launches if using [CMake Variants](https://github.com/microsoft/vscode-cmake-tools/blob/main/docs/variants.md).
|
||||
- Make sure to set the backend to your LLVM installation eg. `"lldb.library": "/usr/lib/x86_64-linux-gnu/liblldb-15.so"`
|
||||
- Make sure to run the clickhouse executable in debug mode prior to launch. (It is also possible to create a `preLaunchTask` that automates this)
|
||||
|
||||
### Example configurations
|
||||
#### cmake-variants.yaml
|
||||
```yaml
|
||||
buildType:
|
||||
default: relwithdebinfo
|
||||
choices:
|
||||
debug:
|
||||
short: Debug
|
||||
long: Emit debug information
|
||||
buildType: Debug
|
||||
release:
|
||||
short: Release
|
||||
long: Optimize generated code
|
||||
buildType: Release
|
||||
relwithdebinfo:
|
||||
short: RelWithDebInfo
|
||||
long: Release with Debug Info
|
||||
buildType: RelWithDebInfo
|
||||
tsan:
|
||||
short: MinSizeRel
|
||||
long: Minimum Size Release
|
||||
buildType: MinSizeRel
|
||||
|
||||
toolchain:
|
||||
default: default
|
||||
description: Select toolchain
|
||||
choices:
|
||||
default:
|
||||
short: x86_64
|
||||
long: x86_64
|
||||
s390x:
|
||||
short: s390x
|
||||
long: s390x
|
||||
settings:
|
||||
CMAKE_TOOLCHAIN_FILE: cmake/linux/toolchain-s390x.cmake
|
||||
```
|
||||
|
||||
#### launch.json
|
||||
```json
|
||||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Debug",
|
||||
"type": "lldb",
|
||||
"request": "custom",
|
||||
"targetCreateCommands": ["target create ${command:cmake.launchTargetDirectory}/clickhouse"],
|
||||
"processCreateCommands": ["settings set target.source-map ${input:targetdir} ${workspaceFolder}", "gdb-remote 31338"],
|
||||
"sourceMap": { "${input:targetdir}": "${workspaceFolder}" },
|
||||
}
|
||||
],
|
||||
"inputs": [
|
||||
{
|
||||
"id": "targetdir",
|
||||
"type": "command",
|
||||
"command": "extension.commandvariable.transform",
|
||||
"args": {
|
||||
"text": "${command:cmake.launchTargetDirectory}",
|
||||
"find": ".*/([^/]+)/[^/]+$",
|
||||
"replace": "$1"
|
||||
}
|
||||
"name": "(lldb) Launch s390x with qemu",
|
||||
"targetCreateCommands": ["target create ${command:cmake.launchTargetPath}"],
|
||||
"processCreateCommands": ["gdb-remote 2159"],
|
||||
"preLaunchTask": "Run ClickHouse"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
#### settings.json
|
||||
This would also put different builds under different subfolders of the `build` folder.
|
||||
```json
|
||||
{
|
||||
"cmake.buildDirectory": "${workspaceFolder}/build/${buildKitVendor}-${buildKitVersion}-${variant:toolchain}-${variant:buildType}",
|
||||
"lldb.library": "/usr/lib/x86_64-linux-gnu/liblldb-15.so"
|
||||
}
|
||||
```
|
||||
|
||||
#### run-debug.sh
|
||||
```sh
|
||||
#! /bin/sh
|
||||
echo 'Starting debugger session'
|
||||
cd $1
|
||||
qemu-s390x-static -g 2159 -L /usr/s390x-linux-gnu $2 $3 $4
|
||||
```
|
||||
|
||||
#### tasks.json
|
||||
Defines a task to run the compiled executable in `server` mode under a `tmp` folder next to the binaries, with configuration from under `programs/server/config.xml`.
|
||||
```json
|
||||
{
|
||||
"version": "2.0.0",
|
||||
"tasks": [
|
||||
{
|
||||
"label": "Run ClickHouse",
|
||||
"type": "shell",
|
||||
"isBackground": true,
|
||||
"command": "${workspaceFolder}/.vscode/run-debug.sh",
|
||||
"args": [
|
||||
"${command:cmake.launchTargetDirectory}/tmp",
|
||||
"${command:cmake.launchTargetPath}",
|
||||
"server",
|
||||
"--config-file=${workspaceFolder}/programs/server/config.xml"
|
||||
],
|
||||
"problemMatcher": [
|
||||
{
|
||||
"pattern": [
|
||||
{
|
||||
"regexp": ".",
|
||||
"file": 1,
|
||||
"location": 2,
|
||||
"message": 3
|
||||
}
|
||||
],
|
||||
"background": {
|
||||
"activeOnStart": true,
|
||||
"beginsPattern": "^Starting debugger session",
|
||||
"endsPattern": ".*"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
- Make sure to run the clickhouse executable in debug mode prior to launch. (It is also possible to create a `preLaunchTask` that automates this)
|
@ -219,6 +219,10 @@ LIMIT N
|
||||
SETTINGS annoy_index_search_k_nodes=100;
|
||||
```
|
||||
|
||||
:::note
|
||||
The Annoy index currently does not work with per-table, non-default `index_granularity` settings (see
|
||||
[here](https://github.com/ClickHouse/ClickHouse/pull/51325#issuecomment-1605920475)). If necessary, the value must be changed in config.xml.
|
||||
:::
|
||||
## USearch {#usearch}
|
||||
|
||||
This type of ANN index is based on the [the USearch library](https://github.com/unum-cloud/usearch), which implements the [HNSW
|
||||
@ -274,4 +278,4 @@ USearch currently supports two distance functions:
|
||||
([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)).
|
||||
|
||||
For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no
|
||||
distance function was specified during index creation, `L2Distance` is used as default.
|
||||
distance function was specified during index creation, `L2Distance` is used as default.
|
@ -228,8 +228,8 @@ For most input formats schema inference reads some data to determine its structu
|
||||
To prevent inferring the same schema every time ClickHouse read the data from the same file, the inferred schema is cached and when accessing the same file again, ClickHouse will use the schema from the cache.
|
||||
|
||||
There are special settings that control this cache:
|
||||
- `schema_inference_cache_max_elements_for_{file/s3/hdfs/url}` - the maximum number of cached schemas for the corresponding table function. The default value is `4096`. These settings should be set in the server config.
|
||||
- `schema_inference_use_cache_for_{file,s3,hdfs,url}` - allows turning on/off using cache for schema inference. These settings can be used in queries.
|
||||
- `schema_inference_cache_max_elements_for_{file/s3/hdfs/url/azure}` - the maximum number of cached schemas for the corresponding table function. The default value is `4096`. These settings should be set in the server config.
|
||||
- `schema_inference_use_cache_for_{file,s3,hdfs,url,azure}` - allows turning on/off using cache for schema inference. These settings can be used in queries.
|
||||
|
||||
The schema of the file can be changed by modifying the data or by changing format settings.
|
||||
For this reason, the schema inference cache identifies the schema by file source, format name, used format settings, and the last modification time of the file.
|
||||
|
@ -217,6 +217,14 @@ Type: UInt32
|
||||
Default: 1024
|
||||
|
||||
|
||||
## index_mark_cache_policy
|
||||
|
||||
Index mark cache policy name.
|
||||
|
||||
Type: String
|
||||
|
||||
Default: SLRU
|
||||
|
||||
## index_mark_cache_size
|
||||
|
||||
Size of cache for index marks. Zero means disabled.
|
||||
@ -229,6 +237,21 @@ Type: UInt64
|
||||
|
||||
Default: 0
|
||||
|
||||
## index_mark_cache_size_ratio
|
||||
|
||||
The size of the protected queue in the index mark cache relative to the cache's total size.
|
||||
|
||||
Type: Double
|
||||
|
||||
Default: 0.5
|
||||
|
||||
## index_uncompressed_cache_policy
|
||||
|
||||
Index uncompressed cache policy name.
|
||||
|
||||
Type: String
|
||||
|
||||
Default: SLRU
|
||||
|
||||
## index_uncompressed_cache_size
|
||||
|
||||
@ -242,6 +265,13 @@ Type: UInt64
|
||||
|
||||
Default: 0
|
||||
|
||||
## index_uncompressed_cache_size_ratio
|
||||
|
||||
The size of the protected queue in the index uncompressed cache relative to the cache's total size.
|
||||
|
||||
Type: Double
|
||||
|
||||
Default: 0.5
|
||||
|
||||
## io_thread_pool_queue_size
|
||||
|
||||
@ -271,6 +301,14 @@ Type: UInt64
|
||||
|
||||
Default: 5368709120
|
||||
|
||||
## mark_cache_size_ratio
|
||||
|
||||
The size of the protected queue in the mark cache relative to the cache's total size.
|
||||
|
||||
Type: Double
|
||||
|
||||
Default: 0.5
|
||||
|
||||
## max_backup_bandwidth_for_server
|
||||
|
||||
The maximum read speed in bytes per second for all backups on server. Zero means unlimited.
|
||||
@ -629,6 +667,14 @@ Type: UInt64
|
||||
|
||||
Default: 0
|
||||
|
||||
## uncompressed_cache_size_ratio
|
||||
|
||||
The size of the protected queue in the uncompressed cache relative to the cache's total size.
|
||||
|
||||
Type: Double
|
||||
|
||||
Default: 0.5
|
||||
|
||||
## builtin_dictionaries_reload_interval {#builtin-dictionaries-reload-interval}
|
||||
|
||||
The interval in seconds before reloading built-in dictionaries.
|
||||
|
@ -623,6 +623,19 @@ Possible values:
|
||||
|
||||
Default value: false
|
||||
|
||||
## number_of_free_entries_in_pool_to_execute_optimize_entire_partition {#number_of_free_entries_in_pool_to_execute_optimize_entire_partition}
|
||||
|
||||
When there is less than specified number of free entries in pool, do not execute optimizing entire partition in the background (this task generated when set `min_age_to_force_merge_seconds` and enable `min_age_to_force_merge_on_partition_only`). This is to leave free threads for regular merges and avoid "Too many parts".
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
|
||||
Default value: 25
|
||||
|
||||
The value of the `number_of_free_entries_in_pool_to_execute_optimize_entire_partition` setting should be less than the value of the [background_pool_size](/docs/en/operations/server-configuration-parameters/settings.md/#background_pool_size) * [background_merges_mutations_concurrency_ratio](/docs/en/operations/server-configuration-parameters/settings.md/#background_merges_mutations_concurrency_ratio). Otherwise, ClickHouse throws an exception.
|
||||
|
||||
|
||||
## allow_floating_point_partition_key {#allow_floating_point_partition_key}
|
||||
|
||||
Enables to allow floating-point number as a partition key.
|
||||
|
@ -2383,6 +2383,23 @@ See also:
|
||||
|
||||
- [optimize_functions_to_subcolumns](#optimize-functions-to-subcolumns)
|
||||
|
||||
## optimize_count_from_files {#optimize_count_from_files}
|
||||
|
||||
Enables or disables the optimization of counting number of rows from files in different input formats. It applies to table functions/engines `file`/`s3`/`url`/`hdfs`/`azureBlobStorage`.
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 — Optimization disabled.
|
||||
- 1 — Optimization enabled.
|
||||
|
||||
Default value: `1`.
|
||||
|
||||
## use_cache_for_count_from_files {#use_cache_for_count_from_files}
|
||||
|
||||
Enables caching of rows number during count from files in table functions `file`/`s3`/`url`/`hdfs`/`azureBlobStorage`.
|
||||
|
||||
Enabled by default.
|
||||
|
||||
## distributed_replica_error_half_life {#settings-distributed_replica_error_half_life}
|
||||
|
||||
- Type: seconds
|
||||
|
@ -114,7 +114,11 @@ Example of disk configuration:
|
||||
|
||||
## Using local cache {#using-local-cache}
|
||||
|
||||
It is possible to configure local cache over disks in storage configuration starting from version 22.3. For versions 22.3 - 22.7 cache is supported only for `s3` disk type. For versions >= 22.8 cache is supported for any disk type: S3, Azure, Local, Encrypted, etc. Cache uses `LRU` cache policy.
|
||||
It is possible to configure local cache over disks in storage configuration starting from version 22.3.
|
||||
For versions 22.3 - 22.7 cache is supported only for `s3` disk type. For versions >= 22.8 cache is supported for any disk type: S3, Azure, Local, Encrypted, etc.
|
||||
For versions >= 23.5 cache is supported only for remote disk types: S3, Azure, HDFS.
|
||||
Cache uses `LRU` cache policy.
|
||||
|
||||
|
||||
Example of configuration for versions later or equal to 22.8:
|
||||
|
||||
|
@ -1794,6 +1794,330 @@ Return value type is always [Float64](../../sql-reference/data-types/float.md).
|
||||
└─────┴──────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## arrayRotateLeft
|
||||
|
||||
Rotates an [array](../../sql-reference/data-types/array.md) to the left by the specified number of elements.
|
||||
If the number of elements is negative, the array is rotated to the right.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
arrayRotateLeft(arr, n)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `arr` — [Array](../../sql-reference/data-types/array.md).
|
||||
- `n` — Number of elements to rotate.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- An array rotated to the left by the specified number of elements.
|
||||
|
||||
Type: [Array](../../sql-reference/data-types/array.md).
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT arrayRotateLeft([1,2,3,4,5,6], 2) as res;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─res───────────┐
|
||||
│ [3,4,5,6,1,2] │
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT arrayRotateLeft([1,2,3,4,5,6], -2) as res;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─res───────────┐
|
||||
│ [5,6,1,2,3,4] │
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT arrayRotateLeft(['a','b','c','d','e'], 3) as res;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─res───────────────────┐
|
||||
│ ['d','e','a','b','c'] │
|
||||
└───────────────────────┘
|
||||
```
|
||||
|
||||
## arrayRotateRight
|
||||
|
||||
Rotates an [array](../../sql-reference/data-types/array.md) to the right by the specified number of elements.
|
||||
If the number of elements is negative, the array is rotated to the left.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
arrayRotateRight(arr, n)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `arr` — [Array](../../sql-reference/data-types/array.md).
|
||||
- `n` — Number of elements to rotate.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- An array rotated to the right by the specified number of elements.
|
||||
|
||||
Type: [Array](../../sql-reference/data-types/array.md).
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT arrayRotateRight([1,2,3,4,5,6], 2) as res;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─res───────────┐
|
||||
│ [5,6,1,2,3,4] │
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT arrayRotateRight([1,2,3,4,5,6], -2) as res;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─res───────────┐
|
||||
│ [3,4,5,6,1,2] │
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT arrayRotateRight(['a','b','c','d','e'], 3) as res;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─res───────────────────┐
|
||||
│ ['c','d','e','a','b'] │
|
||||
└───────────────────────┘
|
||||
```
|
||||
|
||||
## arrayShiftLeft
|
||||
|
||||
Shifts an [array](../../sql-reference/data-types/array.md) to the left by the specified number of elements.
|
||||
New elements are filled with the provided argument or the default value of the array element type.
|
||||
If the number of elements is negative, the array is shifted to the right.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
arrayShiftLeft(arr, n[, default])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `arr` — [Array](../../sql-reference/data-types/array.md).
|
||||
- `n` — Number of elements to shift.
|
||||
- `default` — Optional. Default value for new elements.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- An array shifted to the left by the specified number of elements.
|
||||
|
||||
Type: [Array](../../sql-reference/data-types/array.md).
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT arrayShiftLeft([1,2,3,4,5,6], 2) as res;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─res───────────┐
|
||||
│ [3,4,5,6,0,0] │
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT arrayShiftLeft([1,2,3,4,5,6], -2) as res;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─res───────────┐
|
||||
│ [0,0,1,2,3,4] │
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT arrayShiftLeft([1,2,3,4,5,6], 2, 42) as res;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─res─────────────┐
|
||||
│ [3,4,5,6,42,42] │
|
||||
└─────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT arrayShiftLeft(['a','b','c','d','e','f'], 3, 'foo') as res;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─res─────────────────────────────┐
|
||||
│ ['d','e','f','foo','foo','foo'] │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT arrayShiftLeft([1,2,3,4,5,6] :: Array(UInt16), 2, 4242) as res;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─res─────────────────┐
|
||||
│ [3,4,5,6,4242,4242] │
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
## arrayShiftRight
|
||||
|
||||
Shifts an [array](../../sql-reference/data-types/array.md) to the right by the specified number of elements.
|
||||
New elements are filled with the provided argument or the default value of the array element type.
|
||||
If the number of elements is negative, the array is shifted to the left.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
arrayShiftRight(arr, n[, default])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `arr` — [Array](../../sql-reference/data-types/array.md).
|
||||
- `n` — Number of elements to shift.
|
||||
- `default` — Optional. Default value for new elements.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- An array shifted to the right by the specified number of elements.
|
||||
|
||||
Type: [Array](../../sql-reference/data-types/array.md).
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT arrayShiftRight([1,2,3,4,5,6], 2) as res;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─res───────────┐
|
||||
│ [0,0,1,2,3,4] │
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT arrayShiftRight([1,2,3,4,5,6], -2) as res;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─res───────────┐
|
||||
│ [3,4,5,6,0,0] │
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT arrayShiftRight([1,2,3,4,5,6], 2, 42) as res;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─res─────────────┐
|
||||
│ [42,42,1,2,3,4] │
|
||||
└─────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT arrayShiftRight(['a','b','c','d','e','f'], 3, 'foo') as res;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─res─────────────────────────────┐
|
||||
│ ['foo','foo','foo','a','b','c'] │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT arrayShiftRight([1,2,3,4,5,6] :: Array(UInt16), 2, 4242) as res;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─res─────────────────┐
|
||||
│ [4242,4242,1,2,3,4] │
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
## Distance functions
|
||||
|
||||
All supported functions are described in [distance functions documentation](../../sql-reference/functions/distance-functions.md).
|
||||
|
@ -91,7 +91,7 @@ clickhouse-client --query='INSERT INTO table FORMAT TabSeparated' < data.tsv
|
||||
|
||||
## Import Sample Dataset {#import-sample-dataset}
|
||||
|
||||
Now it’s time to fill our ClickHouse server with some sample data. In this tutorial, we’ll use the anonymized data of Yandex.Metrica, the first service that runs ClickHouse in production way before it became open-source (more on that in [history section](../introduction/history.md)). There are [multiple ways to import Yandex.Metrica dataset](../getting-started/example-datasets/metrica.md), and for the sake of the tutorial, we’ll go with the most realistic one.
|
||||
Now it’s time to fill our ClickHouse server with some sample data. In this tutorial, we’ll use some anonymized metric data. There are [multiple ways to import the dataset](../getting-started/example-datasets/metrica.md), and for the sake of the tutorial, we’ll go with the most realistic one.
|
||||
|
||||
### Download and Extract Table Data {#download-and-extract-table-data}
|
||||
|
||||
@ -116,7 +116,7 @@ Syntax for creating tables is way more complicated compared to databases (see [r
|
||||
2. Table schema, i.e. list of columns and their [data types](../sql-reference/data-types/index.md).
|
||||
3. [Table engine](../engines/table-engines/index.md) and its settings, which determines all the details on how queries to this table will be physically executed.
|
||||
|
||||
Yandex.Metrica is a web analytics service, and sample dataset doesn’t cover its full functionality, so there are only two tables to create:
|
||||
There are only two tables to create:
|
||||
|
||||
- `hits` is a table with each action done by all users on all websites covered by the service.
|
||||
- `visits` is a table that contains pre-built sessions instead of individual actions.
|
||||
@ -523,7 +523,7 @@ SELECT
|
||||
sumIf(Sign, has(Goals.ID, 1105530)) AS goal_visits,
|
||||
(100. * goal_visits) / visits AS goal_percent
|
||||
FROM tutorial.visits_v1
|
||||
WHERE (CounterID = 912887) AND (toYYYYMM(StartDate) = 201403) AND (domain(StartURL) = 'yandex.ru')
|
||||
WHERE (CounterID = 912887) AND (toYYYYMM(StartDate) = 201403)
|
||||
```
|
||||
|
||||
## Cluster Deployment {#cluster-deployment}
|
||||
@ -544,19 +544,19 @@ Example config for a cluster with three shards, one replica each:
|
||||
<perftest_3shards_1replicas>
|
||||
<shard>
|
||||
<replica>
|
||||
<host>example-perftest01j.yandex.ru</host>
|
||||
<host>example-perftest01j.clickhouse.com</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
<shard>
|
||||
<replica>
|
||||
<host>example-perftest02j.yandex.ru</host>
|
||||
<host>example-perftest02j.clickhouse.com</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
<shard>
|
||||
<replica>
|
||||
<host>example-perftest03j.yandex.ru</host>
|
||||
<host>example-perftest03j.clickhouse.com</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
@ -602,15 +602,15 @@ Example config for a cluster of one shard containing three replicas:
|
||||
<perftest_1shards_3replicas>
|
||||
<shard>
|
||||
<replica>
|
||||
<host>example-perftest01j.yandex.ru</host>
|
||||
<host>example-perftest01j.clickhouse.com</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
<replica>
|
||||
<host>example-perftest02j.yandex.ru</host>
|
||||
<host>example-perftest02j.clickhouse.com</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
<replica>
|
||||
<host>example-perftest03j.yandex.ru</host>
|
||||
<host>example-perftest03j.clickhouse.com</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
@ -628,15 +628,15 @@ ZooKeeper locations are specified in the configuration file:
|
||||
``` xml
|
||||
<zookeeper>
|
||||
<node>
|
||||
<host>zoo01.yandex.ru</host>
|
||||
<host>zoo01.clickhouse.com</host>
|
||||
<port>2181</port>
|
||||
</node>
|
||||
<node>
|
||||
<host>zoo02.yandex.ru</host>
|
||||
<host>zoo02.clickhouse.com</host>
|
||||
<port>2181</port>
|
||||
</node>
|
||||
<node>
|
||||
<host>zoo03.yandex.ru</host>
|
||||
<host>zoo03.clickhouse.com</host>
|
||||
<port>2181</port>
|
||||
</node>
|
||||
</zookeeper>
|
||||
|
@ -1703,3 +1703,327 @@ SELECT arrayProduct([toDecimal64(1,8), toDecimal64(2,8), toDecimal64(3,8)]) as r
|
||||
│ 6 │ Float64 │
|
||||
└─────┴──────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## arrayRotateLeft
|
||||
|
||||
Поворачивает [массив](../../sql-reference/data-types/array.md) влево на заданное число элементов.
|
||||
Если количество элементов отрицательно, то массив поворачивается вправо.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
arrayRotateLeft(arr, n)
|
||||
```
|
||||
|
||||
**Аргументы**
|
||||
|
||||
- `arr` — [Массив](../../sql-reference/data-types/array.md).
|
||||
- `n` — Число элементов, на которое нужно повернуть массив.
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Массив, повернутый на заданное число элементов влево.
|
||||
|
||||
Тип: [Массив](../../sql-reference/data-types/array.md).
|
||||
|
||||
**Примеры**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT arrayRotateLeft([1,2,3,4,5,6], 2) as res;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─res───────────┐
|
||||
│ [3,4,5,6,1,2] │
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT arrayRotateLeft([1,2,3,4,5,6], -2) as res;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─res───────────┐
|
||||
│ [5,6,1,2,3,4] │
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT arrayRotateLeft(['a','b','c','d','e'], 3) as res;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─res───────────────────┐
|
||||
│ ['d','e','a','b','c'] │
|
||||
└───────────────────────┘
|
||||
```
|
||||
|
||||
## arrayRotateRight
|
||||
|
||||
Поворачивает [массив](../../sql-reference/data-types/array.md) вправо на заданное число элементов.
|
||||
Если количество элементов отрицательно, то массив поворачивается влево.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
arrayRotateRight(arr, n)
|
||||
```
|
||||
|
||||
**Аргументы**
|
||||
|
||||
- `arr` — [Массив](../../sql-reference/data-types/array.md).
|
||||
- `n` — Число элементов, на которое нужно повернуть массив.
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Массив, повернутый на заданное число элементов вправо.
|
||||
|
||||
Тип: [Массив](../../sql-reference/data-types/array.md).
|
||||
|
||||
**Примеры**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT arrayRotateRight([1,2,3,4,5,6], 2) as res;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─res───────────┐
|
||||
│ [5,6,1,2,3,4] │
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT arrayRotateRight([1,2,3,4,5,6], -2) as res;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─res───────────┐
|
||||
│ [3,4,5,6,1,2] │
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT arrayRotateRight(['a','b','c','d','e'], 3) as res;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─res───────────────────┐
|
||||
│ ['c','d','e','a','b'] │
|
||||
└───────────────────────┘
|
||||
```
|
||||
|
||||
## arrayShiftLeft
|
||||
|
||||
Сдвигает [массив](../../sql-reference/data-types/array.md) влево на заданное число элементов.
|
||||
Новые элементы заполняются переданным аргументом или значением по умолчанию для типа элементов массива.
|
||||
Если количество элементов отрицательно, то массив сдвигается вправо.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
arrayShiftLeft(arr, n[, default])
|
||||
```
|
||||
|
||||
**Аргументы**
|
||||
|
||||
- `arr` — [Массив](../../sql-reference/data-types/array.md).
|
||||
- `n` — Число элементов, на которое нужно сдвинуть массив.
|
||||
- `default` — Опциональный. Значение по умолчанию для новых элементов.
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Массив, сдвинутый на заданное число элементов влево.
|
||||
|
||||
Тип: [Массив](../../sql-reference/data-types/array.md).
|
||||
|
||||
**Примеры**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT arrayShiftLeft([1,2,3,4,5,6], 2) as res;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─res───────────┐
|
||||
│ [3,4,5,6,0,0] │
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT arrayShiftLeft([1,2,3,4,5,6], -2) as res;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─res───────────┐
|
||||
│ [0,0,1,2,3,4] │
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT arrayShiftLeft([1,2,3,4,5,6], 2, 42) as res;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─res─────────────┐
|
||||
│ [3,4,5,6,42,42] │
|
||||
└─────────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT arrayShiftLeft(['a','b','c','d','e','f'], 3, 'foo') as res;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─res─────────────────────────────┐
|
||||
│ ['d','e','f','foo','foo','foo'] │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT arrayShiftLeft([1,2,3,4,5,6] :: Array(UInt16), 2, 4242) as res;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─res─────────────────┐
|
||||
│ [3,4,5,6,4242,4242] │
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
## arrayShiftRight
|
||||
|
||||
Сдвигает [массив](../../sql-reference/data-types/array.md) вправо на заданное число элементов.
|
||||
Новые элементы заполняются переданным аргументом или значением по умолчанию для типа элементов массива.
|
||||
Если количество элементов отрицательно, то массив сдвигается влево.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
arrayShiftRight(arr, n[, default])
|
||||
```
|
||||
|
||||
**Аргументы**
|
||||
|
||||
- `arr` — [Массив](../../sql-reference/data-types/array.md).
|
||||
- `n` — Число элементов, на которое нужно сдвинуть массив.
|
||||
- `default` — Опциональный. Значение по умолчанию для новых элементов.
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Массив, сдвинутый на заданное число элементов вправо.
|
||||
|
||||
Тип: [Массив](../../sql-reference/data-types/array.md).
|
||||
|
||||
**Примеры**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT arrayShiftRight([1,2,3,4,5,6], 2) as res;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─res───────────┐
|
||||
│ [0,0,1,2,3,4] │
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT arrayShiftRight([1,2,3,4,5,6], -2) as res;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─res───────────┐
|
||||
│ [3,4,5,6,0,0] │
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT arrayShiftRight([1,2,3,4,5,6], 2, 42) as res;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─res─────────────┐
|
||||
│ [42,42,1,2,3,4] │
|
||||
└─────────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT arrayShiftRight(['a','b','c','d','e','f'], 3, 'foo') as res;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─res─────────────────────────────┐
|
||||
│ ['foo','foo','foo','a','b','c'] │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT arrayShiftRight([1,2,3,4,5,6] :: Array(UInt16), 2, 4242) as res;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─res─────────────────┐
|
||||
│ [4242,4242,1,2,3,4] │
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
@ -1171,10 +1171,267 @@ ClickHouse服务器日志文件中跟踪日志确认了ClickHouse正在对索引
|
||||
|
||||
主键键列之间的基数差得越大,主键中的列的顺序越重要。我们将在下一章节对此进行演示。
|
||||
|
||||
# 高效地为键列排序
|
||||
## 高效地为键列排序
|
||||
|
||||
TODO
|
||||
<a name="test"></a>
|
||||
|
||||
# 高效地识别单行
|
||||
|
||||
TODO
|
||||
在复合主键中,键列的顺序会对以下两方面产生重大影响:
|
||||
- 查询中过滤次关键字列的效率,以及
|
||||
- 表数据文件的压缩率。
|
||||
|
||||
为了演示这一点,我们将使用我们的[网络流量样本数据集(web traffic sample data set)](#数据集)这个版本,
|
||||
其中每一行包含三列,分别表示互联网用户(`UserID` 列)对 URL(`URL`列)的访问是否被标记为僵尸流量(`IsRobot` 列)。
|
||||
|
||||
我们将使用一个包含上述所有三列的复合主键,该主键可用于加快计算以下内容的典型网络分析查询速度
|
||||
- 特定 URL 有多少(百分比)流量来自机器人,或
|
||||
- 我们对特定用户是否为僵尸用户有多大把握(来自该用户的流量中有多大比例被认为是(或不是)僵尸流量)
|
||||
|
||||
我们使用该查询来计算我们要用作复合主键中三个列的基数(注意,我们使用 [URL 表函数](/docs/en/sql-reference/table-functions/url.md) 来即席查询 TSV 数据,而无需创建本地表)。在 `clickhouse client`中运行此查询:
|
||||
```sql
|
||||
SELECT
|
||||
formatReadableQuantity(uniq(URL)) AS cardinality_URL,
|
||||
formatReadableQuantity(uniq(UserID)) AS cardinality_UserID,
|
||||
formatReadableQuantity(uniq(IsRobot)) AS cardinality_IsRobot
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
c11::UInt64 AS UserID,
|
||||
c15::String AS URL,
|
||||
c20::UInt8 AS IsRobot
|
||||
FROM url('https://datasets.clickhouse.com/hits/tsv/hits_v1.tsv.xz')
|
||||
WHERE URL != ''
|
||||
)
|
||||
```
|
||||
响应如下:
|
||||
```response
|
||||
┌─cardinality_URL─┬─cardinality_UserID─┬─cardinality_IsRobot─┐
|
||||
│ 2.39 million │ 119.08 thousand │ 4.00 │
|
||||
└─────────────────┴────────────────────┴─────────────────────┘
|
||||
|
||||
1 row in set. Elapsed: 118.334 sec. Processed 8.87 million rows, 15.88 GB (74.99 thousand rows/s., 134.21 MB/s.)
|
||||
```
|
||||
|
||||
我们可以看到,各列之间的基数,尤其是 `URL` 列和 `IsRobot` 列之间,存在着很大的差异,因此,在复合主键中,这些列的顺序对于有效加快对这些列的查询过滤速度,以及实现表中列数据文件的最佳压缩比都非常重要。
|
||||
|
||||
为了证明这一点,我们为僵尸流量分析数据创建了两个版本的表:
|
||||
- 带有复合主键`(URL、UserID、IsRobot)`的表 `hits_URL_UserID_IsRobot`,其中的键列按基数降序排列
|
||||
- 使用复合主键`(IsRobot, UserID, URL)` 创建表 `hits_IsRobot_UserID_URL`,其中的键列按基数升序排列
|
||||
|
||||
|
||||
创建具有复合主键`(URL、UserID、IsRobot)`的表 `hits_URL_UserID_IsRobot`:
|
||||
```sql
|
||||
CREATE TABLE hits_URL_UserID_IsRobot
|
||||
(
|
||||
`UserID` UInt32,
|
||||
`URL` String,
|
||||
`IsRobot` UInt8
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
// highlight-next-line
|
||||
PRIMARY KEY (URL, UserID, IsRobot);
|
||||
```
|
||||
|
||||
然后,填充887万行数据:
|
||||
```sql
|
||||
INSERT INTO hits_URL_UserID_IsRobot SELECT
|
||||
intHash32(c11::UInt64) AS UserID,
|
||||
c15 AS URL,
|
||||
c20 AS IsRobot
|
||||
FROM url('https://datasets.clickhouse.com/hits/tsv/hits_v1.tsv.xz')
|
||||
WHERE URL != '';
|
||||
```
|
||||
响应如下:
|
||||
```response
|
||||
0 rows in set. Elapsed: 104.729 sec. Processed 8.87 million rows, 15.88 GB (84.73 thousand rows/s., 151.64 MB/s.)
|
||||
```
|
||||
|
||||
|
||||
接下来,创建带有复合主键 `(IsRobot,UserID,URL)`的表 `hits_IsRobot_UserID_URL`:
|
||||
```sql
|
||||
CREATE TABLE hits_IsRobot_UserID_URL
|
||||
(
|
||||
`UserID` UInt32,
|
||||
`URL` String,
|
||||
`IsRobot` UInt8
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
// highlight-next-line
|
||||
PRIMARY KEY (IsRobot, UserID, URL);
|
||||
```
|
||||
并在其中填入与上一个表相同的 887 万行数据:
|
||||
|
||||
```sql
|
||||
INSERT INTO hits_IsRobot_UserID_URL SELECT
|
||||
intHash32(c11::UInt64) AS UserID,
|
||||
c15 AS URL,
|
||||
c20 AS IsRobot
|
||||
FROM url('https://datasets.clickhouse.com/hits/tsv/hits_v1.tsv.xz')
|
||||
WHERE URL != '';
|
||||
```
|
||||
响应如下:
|
||||
```response
|
||||
0 rows in set. Elapsed: 95.959 sec. Processed 8.87 million rows, 15.88 GB (92.48 thousand rows/s., 165.50 MB/s.)
|
||||
```
|
||||
|
||||
|
||||
|
||||
### 在次关键字列上高效过滤
|
||||
|
||||
当查询对至少一列进行过滤时,该列是复合关键字的一部分,并且是第一关键字列,[那么 ClickHouse 将在关键字列的索引标记上运行二分查找算法](#主索引被用来选择颗粒)。
|
||||
|
||||
当查询(仅)过滤属于复合关键字的某一列,但不是第一关键字列时,[ClickHouse 将在关键字列的索引标记上使用通用排除搜索算法](#查询使用第二位主键的性能问题)。
|
||||
|
||||
|
||||
对于第二种情况,复合主键中关键列的排序对[通用排除搜索算法](https://github.com/ClickHouse/ClickHouse/blob/22.3/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp#L1444)的有效性很重要。
|
||||
|
||||
这是一个对表中的 `UserID` 列进行过滤的查询,我们对该表的关键字列`(URL、UserID、IsRobot)`按基数进行了降序排序:
|
||||
```sql
|
||||
SELECT count(*)
|
||||
FROM hits_URL_UserID_IsRobot
|
||||
WHERE UserID = 112304
|
||||
```
|
||||
响应如下:
|
||||
```response
|
||||
┌─count()─┐
|
||||
│ 73 │
|
||||
└─────────┘
|
||||
|
||||
1 row in set. Elapsed: 0.026 sec.
|
||||
// highlight-next-line
|
||||
Processed 7.92 million rows,
|
||||
31.67 MB (306.90 million rows/s., 1.23 GB/s.)
|
||||
```
|
||||
|
||||
对关键字列`(IsRobot, UserID, URL)`按基数升序排列的表,进行相同的查询:
|
||||
```sql
|
||||
SELECT count(*)
|
||||
FROM hits_IsRobot_UserID_URL
|
||||
WHERE UserID = 112304
|
||||
```
|
||||
响应如下:
|
||||
```response
|
||||
┌─count()─┐
|
||||
│ 73 │
|
||||
└─────────┘
|
||||
|
||||
1 row in set. Elapsed: 0.003 sec.
|
||||
// highlight-next-line
|
||||
Processed 20.32 thousand rows,
|
||||
81.28 KB (6.61 million rows/s., 26.44 MB/s.)
|
||||
```
|
||||
|
||||
我们可以看到,在对关键列按基数进行升序排列的表中,查询执行的效率和速度明显更高。
|
||||
|
||||
其原因是,当通过具有较低基数前键列的次关键字列选择[颗粒](#主索引被用来选择颗粒)时, [通用排除搜索算法](https://github.com/ClickHouse/ClickHouse/blob/22.3/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp#L1444)最有效。 我们在本指南的[上一节](#generic-exclusion-search-algorithm)中对此进行了详细说明。
|
||||
|
||||
|
||||
### 数据文件的最佳压缩率
|
||||
|
||||
此查询将比较上面创建的两个表中 `UserID` 列的压缩率:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
table AS Table,
|
||||
name AS Column,
|
||||
formatReadableSize(data_uncompressed_bytes) AS Uncompressed,
|
||||
formatReadableSize(data_compressed_bytes) AS Compressed,
|
||||
round(data_uncompressed_bytes / data_compressed_bytes, 0) AS Ratio
|
||||
FROM system.columns
|
||||
WHERE (table = 'hits_URL_UserID_IsRobot' OR table = 'hits_IsRobot_UserID_URL') AND (name = 'UserID')
|
||||
ORDER BY Ratio ASC
|
||||
```
|
||||
这是响应:
|
||||
```response
|
||||
┌─Table───────────────────┬─Column─┬─Uncompressed─┬─Compressed─┬─Ratio─┐
|
||||
│ hits_URL_UserID_IsRobot │ UserID │ 33.83 MiB │ 11.24 MiB │ 3 │
|
||||
│ hits_IsRobot_UserID_URL │ UserID │ 33.83 MiB │ 877.47 KiB │ 39 │
|
||||
└─────────────────────────┴────────┴──────────────┴────────────┴───────┘
|
||||
|
||||
2 rows in set. Elapsed: 0.006 sec.
|
||||
```
|
||||
我们可以看到,在按关键字列`(IsRobot、UserID、URL)` 按基数升序排列的表中,`UserID` 列的压缩率明显更高。
|
||||
|
||||
虽然两个表中存储的数据完全相同(我们在两个表中插入了相同的 887 万行),但复合主键中关键字列的顺序对表的 [列数据文件](#数据按照主键排序存储在磁盘上)中的 <a href="https://clickhouse.com/docs/en/introduction/distinctive-features/#data-compression" target="_blank">压缩</a>数据所需的磁盘空间有很大影响:
|
||||
- 在具有复合主键`(URL, UserID, IsRobot)` 的表 `hits_URL_UserID_IsRobot` 中,我们按照键列的基数降序排列,此时 `UserID.bin` 数据文件占用**11.24MB**的磁盘空间。
|
||||
- 在具有复合主键`(IsRobot, UserID, URL)` 的表 `hits_IsRobot_UserID_URL` 中,我们按照键列的基数升序排列,`UserID.bin` 数据文件仅占用**877.47 KiB**的磁盘空间。
|
||||
|
||||
对磁盘上表的列数据进行良好的压缩比不仅能节省磁盘空间,还能使需要从该列读取数据的查询(尤其是分析查询)更快,因为将列数据从磁盘移动到主内存(操作系统的文件缓存)所需的 i/o 更少。
|
||||
|
||||
下面我们将说明,为什么主键列按基数升序排列有利于提高表列的压缩率。
|
||||
|
||||
下图阐述了主键的磁盘上行顺序,其中键列是按基数升序排列的:
|
||||
<img src={require('../../../en/guides/best-practices/images/sparse-primary-indexes-14a.png').default} class="image"/>
|
||||
|
||||
我们讨论过 [表的行数据按主键列有序存储在磁盘上](#数据按照主键排序存储在磁盘上)。
|
||||
|
||||
在上图中,表格的行(它们在磁盘上的列值)首先按其 `cl` 值排序,具有相同 `cl` 值的行按其 `ch` 值排序。由于第一键列 `cl` 的基数较低,因此很可能存在具有相同 `cl` 值的行。因此,`ch`值也很可能是有序的(局部地--对于具有相同`cl`值的行而言)。
|
||||
|
||||
如果在一列中,相似的数据被放在彼此相近的位置,例如通过排序,那么这些数据将得到更好的压缩。
|
||||
一般来说,压缩算法会受益于数据的运行长度(可见的数据越多,压缩效果越好)和局部性(数据越相似,压缩率越高)。
|
||||
|
||||
与上图不同的是,下图阐述了主键的磁盘上行顺序,其中主键列是按基数降序排列的:
|
||||
<img src={require('../../../en/guides/best-practices/images/sparse-primary-indexes-14b.png').default} class="image"/>
|
||||
|
||||
现在,表格的行首先按其 `ch` 值排序,具有相同 `ch` 值的行按其 `cl` 值排序。
|
||||
但是,由于第一键列 `ch` 的基数很高,因此不太可能存在具有相同 `ch` 值的行。因此,`cl`值也不太可能是有序的(局部地--对于具有相同`ch`值的行而言)。
|
||||
|
||||
因此,`cl`值很可能是随机排序的,因此局部性和压缩比都很差。
|
||||
|
||||
|
||||
### 小结
|
||||
|
||||
为了在查询中有效地过滤次关键字列和提高表列数据文件的压缩率,按基数升序排列主键中的列是有益的。
|
||||
|
||||
|
||||
### 相关内容
|
||||
- 博客: [Super charging your ClickHouse queries](https://clickhouse.com/blog/clickhouse-faster-queries-with-projections-and-primary-indexes)
|
||||
|
||||
|
||||
## 有效识别单行
|
||||
|
||||
尽管在一般情况下,它[不](/knowledgebase/key-value)是ClickHouse 的最佳用例,
|
||||
但是有时建立在ClickHouse之上的应用程序,需要识别ClickHouse表中的单行。
|
||||
|
||||
|
||||
一个直观的解决方案可能是使用[UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier) 列,每一行的值都是唯一的,并且为了快速检索行,将该列用作主键列。
|
||||
|
||||
为了实现最快的检索,UUID 列[需要成为主键列](#主索引被用来选择颗粒)。
|
||||
|
||||
我们讨论过,由于[ClickHouse 表的行数据是按主键列顺序存储在磁盘上的](#数据按照主键排序存储在磁盘上),因此在主键或复合主键中,在基数较小的列之前设置基数非常大的列(如 UUID 列)[不利于其他表列的压缩率](#数据文件的最佳压缩率)。
|
||||
|
||||
在最快检索速度和最佳数据压缩之间的折中方法是使用某种复合主键,其中 UUID 是最后一列关键字,位于(更)小基数关键字列之后,这些关键字列用于确保表中某些列的良好压缩比。
|
||||
|
||||
### 一个具体例子
|
||||
|
||||
一个具体的例子是 Alexey Milovidov 开发的文本粘贴服务 https://pastila.nl, 相关[博客](https://clickhouse.com/blog/building-a-paste-service-with-clickhouse/)。
|
||||
|
||||
每次更改文本区域时,数据都会自动保存到 ClickHouse 表格行中(每次更改保存一行)。
|
||||
|
||||
识别和检索(特定版本)粘贴内容的一种方法是使用内容的哈希值作为包含内容的表行的 UUID。
|
||||
|
||||
下图显示了
|
||||
- 当内容发生变化时(例如由于按键将文本键入文本框),行的插入顺序,以及
|
||||
- 当使用 `PRIMARY KEY (hash)` 时,插入行数据的磁盘顺序:
|
||||
<img src={require('../../../en/guides/best-practices/images/sparse-primary-indexes-15a.png').default} class="image"/>
|
||||
|
||||
由于 `hash` 列被用作主键列
|
||||
- 可以[非常快速](#主索引被用来选择颗粒) 检索特定行,但
|
||||
- 表格的行(列数据)是按照(唯一和随机的)哈希值升序存储在磁盘上的。因此,内容列的值也是按随机顺序存储的,不具有数据局部性,导致**内容列数据文件的压缩率不理想**。
|
||||
|
||||
|
||||
为了大幅提高内容列的压缩率,同时仍能快速检索特定行,pastila.nl 使用两个哈希值(和一个复合主键)来识别特定行:
|
||||
- 内容哈希值,如上所述,对于不同的数据是不同的,以及
|
||||
- 对[局部性敏感的哈希值(fingerprint)](https://en.wikipedia.org/wiki/Locality-sensitive_hashing), 它**不会**因数据的微小变化而变化。
|
||||
|
||||
下图显示了
|
||||
- 当内容发生变化时(例如,由于按键将文本输入文本区),行的插入顺序以及
|
||||
- 当使用复合主键`(fingerprint,hash)` 时,插入行数据的磁盘顺序:
|
||||
|
||||
<img src={require('../../../en/guides/best-practices/images/sparse-primary-indexes-15b.png').default} class="image"/>
|
||||
|
||||
现在,磁盘上的行首先按指纹 (`fingerprint`) 排序,对于`fingerprint` 值相同的行,其哈希(`hash`)值决定最终的排序。
|
||||
|
||||
由于仅有细微差别的数据会获得相同的指纹值,因此类似的数据现在会被存储在磁盘的内容列中,并且彼此靠近。这对内容列的压缩率非常有利,因为压缩算法一般会从数据局部性中获益(数据越相似,压缩率越高)。
|
||||
|
||||
由此带来的妥协是,检索特定行时需要两个字段("指纹"和 "散列"),以便最佳地利用由复合主键 `(fingerprint, hash)` 产生的主索引。
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <random>
|
||||
#include <string_view>
|
||||
#include <pcg_random.hpp>
|
||||
#include <Poco/Util/Application.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
@ -48,6 +49,7 @@ namespace DB
|
||||
{
|
||||
|
||||
using Ports = std::vector<UInt16>;
|
||||
static constexpr std::string_view DEFAULT_CLIENT_NAME = "benchmark";
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
@ -122,7 +124,7 @@ public:
|
||||
default_database_, user_, password_, quota_key_,
|
||||
/* cluster_= */ "",
|
||||
/* cluster_secret_= */ "",
|
||||
/* client_name_= */ "benchmark",
|
||||
/* client_name_= */ std::string(DEFAULT_CLIENT_NAME),
|
||||
Protocol::Compression::Enable,
|
||||
secure));
|
||||
|
||||
@ -135,6 +137,8 @@ public:
|
||||
|
||||
global_context->makeGlobalContext();
|
||||
global_context->setSettings(settings);
|
||||
global_context->setClientName(std::string(DEFAULT_CLIENT_NAME));
|
||||
global_context->setQueryKindInitial();
|
||||
|
||||
std::cerr << std::fixed << std::setprecision(3);
|
||||
|
||||
|
@ -1243,6 +1243,7 @@ void Client::processConfig()
|
||||
global_context->getSettingsRef().max_insert_block_size);
|
||||
}
|
||||
|
||||
global_context->setClientName(std::string(DEFAULT_CLIENT_NAME));
|
||||
global_context->setQueryKindInitial();
|
||||
global_context->setQuotaClientKey(config().getString("quota_key", ""));
|
||||
global_context->setQueryKind(query_kind);
|
||||
|
@ -133,8 +133,6 @@ if (BUILD_STANDALONE_KEEPER)
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3Capabilities.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/diskSettings.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/ProxyListConfiguration.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/ProxyResolverConfiguration.cpp
|
||||
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/createReadBufferFromFileBase.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
|
||||
|
@ -657,21 +657,23 @@ void LocalServer::processConfig()
|
||||
/// There is no need for concurrent queries, override max_concurrent_queries.
|
||||
global_context->getProcessList().setMaxSize(0);
|
||||
|
||||
const size_t memory_amount = getMemoryAmount();
|
||||
const size_t physical_server_memory = getMemoryAmount();
|
||||
const double cache_size_to_ram_max_ratio = config().getDouble("cache_size_to_ram_max_ratio", 0.5);
|
||||
const size_t max_cache_size = static_cast<size_t>(memory_amount * cache_size_to_ram_max_ratio);
|
||||
const size_t max_cache_size = static_cast<size_t>(physical_server_memory * cache_size_to_ram_max_ratio);
|
||||
|
||||
String uncompressed_cache_policy = config().getString("uncompressed_cache_policy", DEFAULT_UNCOMPRESSED_CACHE_POLICY);
|
||||
size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE);
|
||||
double uncompressed_cache_size_ratio = config().getDouble("uncompressed_cache_size_ratio", DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO);
|
||||
if (uncompressed_cache_size > max_cache_size)
|
||||
{
|
||||
uncompressed_cache_size = max_cache_size;
|
||||
LOG_INFO(log, "Lowered uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
|
||||
}
|
||||
global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size);
|
||||
global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size, uncompressed_cache_size_ratio);
|
||||
|
||||
String mark_cache_policy = config().getString("mark_cache_policy", DEFAULT_MARK_CACHE_POLICY);
|
||||
size_t mark_cache_size = config().getUInt64("mark_cache_size", DEFAULT_MARK_CACHE_MAX_SIZE);
|
||||
double mark_cache_size_ratio = config().getDouble("mark_cache_size_ratio", DEFAULT_MARK_CACHE_SIZE_RATIO);
|
||||
if (!mark_cache_size)
|
||||
LOG_ERROR(log, "Too low mark cache size will lead to severe performance degradation.");
|
||||
if (mark_cache_size > max_cache_size)
|
||||
@ -679,23 +681,27 @@ void LocalServer::processConfig()
|
||||
mark_cache_size = max_cache_size;
|
||||
LOG_INFO(log, "Lowered mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(mark_cache_size));
|
||||
}
|
||||
global_context->setMarkCache(mark_cache_policy, mark_cache_size);
|
||||
global_context->setMarkCache(mark_cache_policy, mark_cache_size, mark_cache_size_ratio);
|
||||
|
||||
String index_uncompressed_cache_policy = config().getString("index_uncompressed_cache_policy", DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY);
|
||||
size_t index_uncompressed_cache_size = config().getUInt64("index_uncompressed_cache_size", DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE);
|
||||
double index_uncompressed_cache_size_ratio = config().getDouble("index_uncompressed_cache_size_ratio", DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO);
|
||||
if (index_uncompressed_cache_size > max_cache_size)
|
||||
{
|
||||
index_uncompressed_cache_size = max_cache_size;
|
||||
LOG_INFO(log, "Lowered index uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
|
||||
}
|
||||
global_context->setIndexUncompressedCache(index_uncompressed_cache_size);
|
||||
global_context->setIndexUncompressedCache(index_uncompressed_cache_policy, index_uncompressed_cache_size, index_uncompressed_cache_size_ratio);
|
||||
|
||||
String index_mark_cache_policy = config().getString("index_mark_cache_policy", DEFAULT_INDEX_MARK_CACHE_POLICY);
|
||||
size_t index_mark_cache_size = config().getUInt64("index_mark_cache_size", DEFAULT_INDEX_MARK_CACHE_MAX_SIZE);
|
||||
double index_mark_cache_size_ratio = config().getDouble("index_mark_cache_size_ratio", DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO);
|
||||
if (index_mark_cache_size > max_cache_size)
|
||||
{
|
||||
index_mark_cache_size = max_cache_size;
|
||||
LOG_INFO(log, "Lowered index mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
|
||||
}
|
||||
global_context->setIndexMarkCache(index_mark_cache_size);
|
||||
global_context->setIndexMarkCache(index_mark_cache_policy, index_mark_cache_size, index_mark_cache_size_ratio);
|
||||
|
||||
size_t mmap_cache_size = config().getUInt64("mmap_cache_size", DEFAULT_MMAP_CACHE_MAX_SIZE);
|
||||
if (mmap_cache_size > max_cache_size)
|
||||
|
@ -1302,18 +1302,14 @@ try
|
||||
|
||||
if (structure.empty())
|
||||
{
|
||||
ReadBufferIterator read_buffer_iterator = [&](ColumnsDescription &)
|
||||
{
|
||||
auto file = std::make_unique<ReadBufferFromFileDescriptor>(STDIN_FILENO);
|
||||
auto file = std::make_unique<ReadBufferFromFileDescriptor>(STDIN_FILENO);
|
||||
|
||||
/// stdin must be seekable
|
||||
auto res = lseek(file->getFD(), 0, SEEK_SET);
|
||||
if (-1 == res)
|
||||
throwFromErrno("Input must be seekable file (it will be read twice).", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
|
||||
|
||||
return file;
|
||||
};
|
||||
/// stdin must be seekable
|
||||
auto res = lseek(file->getFD(), 0, SEEK_SET);
|
||||
if (-1 == res)
|
||||
throwFromErrno("Input must be seekable file (it will be read twice).", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
|
||||
|
||||
SingleReadBufferIterator read_buffer_iterator(std::move(file));
|
||||
schema_columns = readSchemaFromFormat(input_format, {}, read_buffer_iterator, false, context_const);
|
||||
}
|
||||
else
|
||||
|
@ -1111,37 +1111,43 @@ try
|
||||
|
||||
String uncompressed_cache_policy = server_settings.uncompressed_cache_policy;
|
||||
size_t uncompressed_cache_size = server_settings.uncompressed_cache_size;
|
||||
double uncompressed_cache_size_ratio = server_settings.uncompressed_cache_size_ratio;
|
||||
if (uncompressed_cache_size > max_cache_size)
|
||||
{
|
||||
uncompressed_cache_size = max_cache_size;
|
||||
LOG_INFO(log, "Lowered uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
|
||||
}
|
||||
global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size);
|
||||
global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size, uncompressed_cache_size_ratio);
|
||||
|
||||
String mark_cache_policy = server_settings.mark_cache_policy;
|
||||
size_t mark_cache_size = server_settings.mark_cache_size;
|
||||
double mark_cache_size_ratio = server_settings.mark_cache_size_ratio;
|
||||
if (mark_cache_size > max_cache_size)
|
||||
{
|
||||
mark_cache_size = max_cache_size;
|
||||
LOG_INFO(log, "Lowered mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(mark_cache_size));
|
||||
}
|
||||
global_context->setMarkCache(mark_cache_policy, mark_cache_size);
|
||||
global_context->setMarkCache(mark_cache_policy, mark_cache_size, mark_cache_size_ratio);
|
||||
|
||||
String index_uncompressed_cache_policy = server_settings.index_uncompressed_cache_policy;
|
||||
size_t index_uncompressed_cache_size = server_settings.index_uncompressed_cache_size;
|
||||
double index_uncompressed_cache_size_ratio = server_settings.index_uncompressed_cache_size_ratio;
|
||||
if (index_uncompressed_cache_size > max_cache_size)
|
||||
{
|
||||
index_uncompressed_cache_size = max_cache_size;
|
||||
LOG_INFO(log, "Lowered index uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
|
||||
}
|
||||
global_context->setIndexUncompressedCache(index_uncompressed_cache_size);
|
||||
global_context->setIndexUncompressedCache(index_uncompressed_cache_policy, index_uncompressed_cache_size, index_uncompressed_cache_size_ratio);
|
||||
|
||||
String index_mark_cache_policy = server_settings.index_mark_cache_policy;
|
||||
size_t index_mark_cache_size = server_settings.index_mark_cache_size;
|
||||
double index_mark_cache_size_ratio = server_settings.index_mark_cache_size_ratio;
|
||||
if (index_mark_cache_size > max_cache_size)
|
||||
{
|
||||
index_mark_cache_size = max_cache_size;
|
||||
LOG_INFO(log, "Lowered index mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
|
||||
}
|
||||
global_context->setIndexMarkCache(index_mark_cache_size);
|
||||
global_context->setIndexMarkCache(index_mark_cache_policy, index_mark_cache_size, index_mark_cache_size_ratio);
|
||||
|
||||
size_t mmap_cache_size = server_settings.mmap_cache_size;
|
||||
if (mmap_cache_size > max_cache_size)
|
||||
|
@ -153,6 +153,7 @@ enum class AccessType
|
||||
M(SYSTEM_DROP_QUERY_CACHE, "SYSTEM DROP QUERY, DROP QUERY CACHE, DROP QUERY", GLOBAL, SYSTEM_DROP_CACHE) \
|
||||
M(SYSTEM_DROP_COMPILED_EXPRESSION_CACHE, "SYSTEM DROP COMPILED EXPRESSION, DROP COMPILED EXPRESSION CACHE, DROP COMPILED EXPRESSIONS", GLOBAL, SYSTEM_DROP_CACHE) \
|
||||
M(SYSTEM_DROP_FILESYSTEM_CACHE, "SYSTEM DROP FILESYSTEM CACHE, DROP FILESYSTEM CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
|
||||
M(SYSTEM_SYNC_FILESYSTEM_CACHE, "SYSTEM REPAIR FILESYSTEM CACHE, REPAIR FILESYSTEM CACHE, SYNC FILESYSTEM CACHE", GLOBAL, SYSTEM) \
|
||||
M(SYSTEM_DROP_SCHEMA_CACHE, "SYSTEM DROP SCHEMA CACHE, DROP SCHEMA CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
|
||||
M(SYSTEM_DROP_S3_CLIENT_CACHE, "SYSTEM DROP S3 CLIENT, DROP S3 CLIENT CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
|
||||
M(SYSTEM_DROP_CACHE, "DROP CACHE", GROUP, SYSTEM) \
|
||||
|
@ -100,8 +100,8 @@ private:
|
||||
struct Constraint
|
||||
{
|
||||
SettingConstraintWritability writability = SettingConstraintWritability::WRITABLE;
|
||||
Field min_value;
|
||||
Field max_value;
|
||||
Field min_value{};
|
||||
Field max_value{};
|
||||
|
||||
bool operator ==(const Constraint & other) const;
|
||||
bool operator !=(const Constraint & other) const { return !(*this == other); }
|
||||
|
@ -547,7 +547,7 @@ public:
|
||||
|
||||
/// For serialization we use signed Int32 (for historical reasons), -1 means "no value"
|
||||
Int32 size_to_write = size ? size : -1;
|
||||
writeBinary(size_to_write, buf);
|
||||
writeBinaryLittleEndian(size_to_write, buf);
|
||||
if (has())
|
||||
buf.write(getData(), size);
|
||||
}
|
||||
@ -573,7 +573,7 @@ public:
|
||||
{
|
||||
/// For serialization we use signed Int32 (for historical reasons), -1 means "no value"
|
||||
Int32 rhs_size_signed;
|
||||
readBinary(rhs_size_signed, buf);
|
||||
readBinaryLittleEndian(rhs_size_signed, buf);
|
||||
|
||||
if (rhs_size_signed < 0)
|
||||
{
|
||||
|
@ -258,12 +258,12 @@ struct AggregateFunctionSumData
|
||||
|
||||
void write(WriteBuffer & buf) const
|
||||
{
|
||||
writeBinary(sum, buf);
|
||||
writeBinaryLittleEndian(sum, buf);
|
||||
}
|
||||
|
||||
void read(ReadBuffer & buf)
|
||||
{
|
||||
readBinary(sum, buf);
|
||||
readBinaryLittleEndian(sum, buf);
|
||||
}
|
||||
|
||||
T get() const
|
||||
|
@ -10,6 +10,8 @@
|
||||
|
||||
#include <Parsers/ASTWithAlias.h>
|
||||
|
||||
#include <boost/functional/hash.hpp>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
@ -1918,6 +1918,9 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden
|
||||
subquery_settings.max_result_rows = 1;
|
||||
subquery_settings.extremes = false;
|
||||
subquery_context->setSettings(subquery_settings);
|
||||
/// When execute `INSERT INTO t WITH ... SELECT ...`, it may lead to `Unknown columns`
|
||||
/// exception with this settings enabled(https://github.com/ClickHouse/ClickHouse/issues/52494).
|
||||
subquery_context->setSetting("use_structure_from_insertion_table_in_table_functions", false);
|
||||
|
||||
auto options = SelectQueryOptions(QueryProcessingStage::Complete, scope.subquery_depth, true /*is_subquery*/);
|
||||
auto interpreter = std::make_unique<InterpreterSelectQueryAnalyzer>(node->toAST(), subquery_context, options);
|
||||
|
@ -50,7 +50,8 @@ namespace
|
||||
context->getRemoteHostFilter(),
|
||||
static_cast<unsigned>(context->getGlobalContext()->getSettingsRef().s3_max_redirects),
|
||||
context->getGlobalContext()->getSettingsRef().enable_s3_requests_logging,
|
||||
/* for_disk_s3 = */ false, settings.request_settings.get_request_throttler, settings.request_settings.put_request_throttler);
|
||||
/* for_disk_s3 = */ false, settings.request_settings.get_request_throttler, settings.request_settings.put_request_throttler,
|
||||
s3_uri.uri.getScheme());
|
||||
|
||||
client_configuration.endpointOverride = s3_uri.endpoint;
|
||||
client_configuration.maxConnections = static_cast<unsigned>(context->getSettingsRef().s3_max_connections);
|
||||
|
@ -169,6 +169,10 @@ if (TARGET ch_contrib::jemalloc)
|
||||
target_link_libraries (clickhouse_storages_system PRIVATE ch_contrib::jemalloc)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::azure_sdk)
|
||||
target_link_libraries(clickhouse_storages_system PRIVATE ch_contrib::azure_sdk)
|
||||
endif()
|
||||
|
||||
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::sparsehash ch_contrib::incbin)
|
||||
|
||||
add_subdirectory(Access/Common)
|
||||
|
@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <string_view>
|
||||
#include "Common/NamePrompter.h"
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Common/ProgressIndication.h>
|
||||
@ -24,6 +25,7 @@ namespace po = boost::program_options;
|
||||
namespace DB
|
||||
{
|
||||
|
||||
static constexpr std::string_view DEFAULT_CLIENT_NAME = "client";
|
||||
|
||||
static const NameSet exit_strings
|
||||
{
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <IO/TimeoutSetter.h>
|
||||
#include <Formats/NativeReader.h>
|
||||
#include <Formats/NativeWriter.h>
|
||||
#include <Client/ClientBase.h>
|
||||
#include <Client/Connection.h>
|
||||
#include <Client/ConnectionParameters.h>
|
||||
#include <Common/ClickHouseRevision.h>
|
||||
@ -1204,7 +1205,7 @@ ServerConnectionPtr Connection::createConnection(const ConnectionParameters & pa
|
||||
parameters.quota_key,
|
||||
"", /* cluster */
|
||||
"", /* cluster_secret */
|
||||
"client",
|
||||
std::string(DEFAULT_CLIENT_NAME),
|
||||
parameters.compression,
|
||||
parameters.security);
|
||||
}
|
||||
|
@ -559,6 +559,7 @@ FieldInfo ColumnObject::Subcolumn::getFieldInfo() const
|
||||
.have_nulls = base_type->isNullable(),
|
||||
.need_convert = false,
|
||||
.num_dimensions = least_common_type.getNumberOfDimensions(),
|
||||
.need_fold_dimension = false,
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -182,6 +182,7 @@ AsyncLoader::AsyncLoader(std::vector<PoolInitializer> pool_initializers, bool lo
|
||||
init.max_threads,
|
||||
/* max_free_threads = */ 0,
|
||||
init.max_threads),
|
||||
.ready_queue = {},
|
||||
.max_threads = init.max_threads
|
||||
});
|
||||
}
|
||||
|
@ -40,14 +40,17 @@ public:
|
||||
using MappedPtr = typename CachePolicy::MappedPtr;
|
||||
using KeyMapped = typename CachePolicy::KeyMapped;
|
||||
|
||||
/// Use this ctor if you don't care about the internal cache policy.
|
||||
explicit CacheBase(size_t max_size_in_bytes, size_t max_count = 0, double size_ratio = 0.5)
|
||||
static constexpr auto NO_MAX_COUNT = 0uz;
|
||||
static constexpr auto DEFAULT_SIZE_RATIO = 0.5l;
|
||||
|
||||
/// Use this ctor if you only care about the cache size but not internals like the cache policy.
|
||||
explicit CacheBase(size_t max_size_in_bytes, size_t max_count = NO_MAX_COUNT, double size_ratio = DEFAULT_SIZE_RATIO)
|
||||
: CacheBase("SLRU", max_size_in_bytes, max_count, size_ratio)
|
||||
{
|
||||
}
|
||||
|
||||
/// Use this ctor if you want the user to configure the cache policy via some setting. Supports only general-purpose policies LRU and SLRU.
|
||||
explicit CacheBase(std::string_view cache_policy_name, size_t max_size_in_bytes, size_t max_count = 0, double size_ratio = 0.5)
|
||||
/// Use this ctor if the user should be able to configure the cache policy and cache sizes via settings. Supports only general-purpose policies LRU and SLRU.
|
||||
explicit CacheBase(std::string_view cache_policy_name, size_t max_size_in_bytes, size_t max_count, double size_ratio)
|
||||
{
|
||||
auto on_weight_loss_function = [&](size_t weight_loss) { onRemoveOverflowWeightLoss(weight_loss); };
|
||||
|
||||
@ -79,7 +82,7 @@ public:
|
||||
MappedPtr get(const Key & key)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
auto res = cache_policy->get(key, lock);
|
||||
auto res = cache_policy->get(key);
|
||||
if (res)
|
||||
++hits;
|
||||
else
|
||||
@ -90,7 +93,7 @@ public:
|
||||
std::optional<KeyMapped> getWithKey(const Key & key)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
auto res = cache_policy->getWithKey(key, lock);
|
||||
auto res = cache_policy->getWithKey(key);
|
||||
if (res.has_value())
|
||||
++hits;
|
||||
else
|
||||
@ -101,7 +104,7 @@ public:
|
||||
void set(const Key & key, const MappedPtr & mapped)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
cache_policy->set(key, mapped, lock);
|
||||
cache_policy->set(key, mapped);
|
||||
}
|
||||
|
||||
/// If the value for the key is in the cache, returns it. If it is not, calls load_func() to
|
||||
@ -118,7 +121,7 @@ public:
|
||||
InsertTokenHolder token_holder;
|
||||
{
|
||||
std::lock_guard cache_lock(mutex);
|
||||
auto val = cache_policy->get(key, cache_lock);
|
||||
auto val = cache_policy->get(key);
|
||||
if (val)
|
||||
{
|
||||
++hits;
|
||||
@ -156,7 +159,7 @@ public:
|
||||
auto token_it = insert_tokens.find(key);
|
||||
if (token_it != insert_tokens.end() && token_it->second.get() == token)
|
||||
{
|
||||
cache_policy->set(key, token->value, cache_lock);
|
||||
cache_policy->set(key, token->value);
|
||||
result = true;
|
||||
}
|
||||
|
||||
@ -185,49 +188,49 @@ public:
|
||||
insert_tokens.clear();
|
||||
hits = 0;
|
||||
misses = 0;
|
||||
cache_policy->clear(lock);
|
||||
cache_policy->clear();
|
||||
}
|
||||
|
||||
void remove(const Key & key)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
cache_policy->remove(key, lock);
|
||||
cache_policy->remove(key);
|
||||
}
|
||||
|
||||
size_t weight() const
|
||||
size_t sizeInBytes() const
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
return cache_policy->weight(lock);
|
||||
return cache_policy->sizeInBytes();
|
||||
}
|
||||
|
||||
size_t count() const
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
return cache_policy->count(lock);
|
||||
return cache_policy->count();
|
||||
}
|
||||
|
||||
size_t maxSize() const
|
||||
size_t maxSizeInBytes() const
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
return cache_policy->maxSize(lock);
|
||||
return cache_policy->maxSizeInBytes();
|
||||
}
|
||||
|
||||
void setMaxCount(size_t max_count)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
cache_policy->setMaxCount(max_count, lock);
|
||||
cache_policy->setMaxCount(max_count);
|
||||
}
|
||||
|
||||
void setMaxSize(size_t max_size_in_bytes)
|
||||
void setMaxSizeInBytes(size_t max_size_in_bytes)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
cache_policy->setMaxSize(max_size_in_bytes, lock);
|
||||
cache_policy->setMaxSizeInBytes(max_size_in_bytes);
|
||||
}
|
||||
|
||||
void setQuotaForUser(const String & user_name, size_t max_size_in_bytes, size_t max_entries)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
cache_policy->setQuotaForUser(user_name, max_size_in_bytes, max_entries, lock);
|
||||
cache_policy->setQuotaForUser(user_name, max_size_in_bytes, max_entries);
|
||||
}
|
||||
|
||||
virtual ~CacheBase() = default;
|
||||
|
74
src/Common/EnvironmentProxyConfigurationResolver.cpp
Normal file
74
src/Common/EnvironmentProxyConfigurationResolver.cpp
Normal file
@ -0,0 +1,74 @@
|
||||
#include "EnvironmentProxyConfigurationResolver.h"
|
||||
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Poco/URI.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/*
|
||||
* Usually environment variables are upper-case, but it seems like proxy related variables are an exception.
|
||||
* See https://unix.stackexchange.com/questions/212894/whats-the-right-format-for-the-http-proxy-environment-variable-caps-or-no-ca/212972#212972
|
||||
* */
|
||||
static constexpr auto PROXY_HTTP_ENVIRONMENT_VARIABLE = "http_proxy";
|
||||
static constexpr auto PROXY_HTTPS_ENVIRONMENT_VARIABLE = "https_proxy";
|
||||
|
||||
EnvironmentProxyConfigurationResolver::EnvironmentProxyConfigurationResolver(Protocol protocol_)
|
||||
: protocol(protocol_)
|
||||
{}
|
||||
|
||||
namespace
|
||||
{
|
||||
const char * getProxyHost(DB::ProxyConfiguration::Protocol protocol)
|
||||
{
|
||||
/*
|
||||
* getenv is safe to use here because ClickHouse code does not make any call to `setenv` or `putenv`
|
||||
* aside from tests and a very early call during startup: https://github.com/ClickHouse/ClickHouse/blob/master/src/Daemon/BaseDaemon.cpp#L791
|
||||
* */
|
||||
|
||||
if (protocol == DB::ProxyConfiguration::Protocol::HTTP)
|
||||
{
|
||||
return std::getenv(PROXY_HTTP_ENVIRONMENT_VARIABLE); // NOLINT(concurrency-mt-unsafe)
|
||||
}
|
||||
else if (protocol == DB::ProxyConfiguration::Protocol::HTTPS)
|
||||
{
|
||||
return std::getenv(PROXY_HTTPS_ENVIRONMENT_VARIABLE); // NOLINT(concurrency-mt-unsafe)
|
||||
}
|
||||
else
|
||||
{
|
||||
if (const char * http_proxy_host = std::getenv(PROXY_HTTP_ENVIRONMENT_VARIABLE)) // NOLINT(concurrency-mt-unsafe)
|
||||
{
|
||||
return http_proxy_host;
|
||||
}
|
||||
else
|
||||
{
|
||||
return std::getenv(PROXY_HTTPS_ENVIRONMENT_VARIABLE); // NOLINT(concurrency-mt-unsafe)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ProxyConfiguration EnvironmentProxyConfigurationResolver::resolve()
|
||||
{
|
||||
const auto * proxy_host = getProxyHost(protocol);
|
||||
|
||||
if (!proxy_host)
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
auto uri = Poco::URI(proxy_host);
|
||||
auto host = uri.getHost();
|
||||
auto scheme = uri.getScheme();
|
||||
auto port = uri.getPort();
|
||||
|
||||
LOG_TRACE(&Poco::Logger::get("EnvironmentProxyConfigurationResolver"), "Use proxy from environment: {}://{}:{}", scheme, host, port);
|
||||
|
||||
return ProxyConfiguration {
|
||||
host,
|
||||
ProxyConfiguration::protocolFromString(scheme),
|
||||
port
|
||||
};
|
||||
}
|
||||
|
||||
}
|
23
src/Common/EnvironmentProxyConfigurationResolver.h
Normal file
23
src/Common/EnvironmentProxyConfigurationResolver.h
Normal file
@ -0,0 +1,23 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/ProxyConfigurationResolver.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/*
|
||||
* Grabs proxy configuration from environment variables (http_proxy and https_proxy).
|
||||
* */
|
||||
class EnvironmentProxyConfigurationResolver : public ProxyConfigurationResolver
|
||||
{
|
||||
public:
|
||||
explicit EnvironmentProxyConfigurationResolver(Protocol protocol_);
|
||||
|
||||
ProxyConfiguration resolve() override;
|
||||
void errorReport(const ProxyConfiguration &) override {}
|
||||
|
||||
private:
|
||||
Protocol protocol;
|
||||
};
|
||||
|
||||
}
|
@ -584,6 +584,7 @@
|
||||
M(699, INVALID_REDIS_TABLE_STRUCTURE) \
|
||||
M(700, USER_SESSION_LIMIT_EXCEEDED) \
|
||||
M(701, CLUSTER_DOESNT_EXIST) \
|
||||
M(702, CLIENT_INFO_DOES_NOT_MATCH) \
|
||||
\
|
||||
M(999, KEEPER_EXCEPTION) \
|
||||
M(1000, POCO_EXCEPTION) \
|
||||
|
@ -69,11 +69,11 @@ struct FunctionDocumentation
|
||||
using Related = std::string;
|
||||
|
||||
Description description; /// E.g. "Returns the position (in bytes, starting at 1) of a substring needle in a string haystack."
|
||||
Syntax syntax; /// E.g. "position(haystack, needle)"
|
||||
Arguments arguments; /// E.g. ["haystack — String in which the search is performed. String.", "needle — Substring to be searched. String."]
|
||||
ReturnedValue returned_value; /// E.g. "Starting position in bytes and counting from 1, if the substring was found."
|
||||
Examples examples; ///
|
||||
Categories categories; /// E.g. {"String Search"}
|
||||
Syntax syntax = {}; /// E.g. "position(haystack, needle)"
|
||||
Arguments arguments {}; /// E.g. ["haystack — String in which the search is performed. String.", "needle — Substring to be searched. String."]
|
||||
ReturnedValue returned_value {};/// E.g. "Starting position in bytes and counting from 1, if the substring was found."
|
||||
Examples examples {}; ///
|
||||
Categories categories {}; /// E.g. {"String Search"}
|
||||
|
||||
std::string argumentsAsString() const;
|
||||
std::string examplesAsString() const;
|
||||
|
@ -37,25 +37,25 @@ public:
|
||||
explicit ICachePolicy(CachePolicyUserQuotaPtr user_quotas_) : user_quotas(std::move(user_quotas_)) {}
|
||||
virtual ~ICachePolicy() = default;
|
||||
|
||||
virtual size_t weight(std::lock_guard<std::mutex> & /*cache_lock*/) const = 0;
|
||||
virtual size_t count(std::lock_guard<std::mutex> & /*cache_lock*/) const = 0;
|
||||
virtual size_t maxSize(std::lock_guard<std::mutex>& /*cache_lock*/) const = 0;
|
||||
virtual size_t sizeInBytes() const = 0;
|
||||
virtual size_t count() const = 0;
|
||||
virtual size_t maxSizeInBytes() const = 0;
|
||||
|
||||
virtual void setMaxCount(size_t /*max_count*/, std::lock_guard<std::mutex> & /* cache_lock */) = 0;
|
||||
virtual void setMaxSize(size_t /*max_size_in_bytes*/, std::lock_guard<std::mutex> & /* cache_lock */) = 0;
|
||||
virtual void setQuotaForUser(const String & user_name, size_t max_size_in_bytes, size_t max_entries, std::lock_guard<std::mutex> & /*cache_lock*/) { user_quotas->setQuotaForUser(user_name, max_size_in_bytes, max_entries); }
|
||||
virtual void setMaxCount(size_t /*max_count*/) = 0;
|
||||
virtual void setMaxSizeInBytes(size_t /*max_size_in_bytes*/) = 0;
|
||||
virtual void setQuotaForUser(const String & user_name, size_t max_size_in_bytes, size_t max_entries) { user_quotas->setQuotaForUser(user_name, max_size_in_bytes, max_entries); }
|
||||
|
||||
/// HashFunction usually hashes the entire key and the found key will be equal the provided key. In such cases, use get(). It is also
|
||||
/// possible to store other, non-hashed data in the key. In that case, the found key is potentially different from the provided key.
|
||||
/// Then use getWithKey() to also return the found key including it's non-hashed data.
|
||||
virtual MappedPtr get(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */) = 0;
|
||||
virtual std::optional<KeyMapped> getWithKey(const Key &, std::lock_guard<std::mutex> & /*cache_lock*/) = 0;
|
||||
virtual MappedPtr get(const Key & key) = 0;
|
||||
virtual std::optional<KeyMapped> getWithKey(const Key &) = 0;
|
||||
|
||||
virtual void set(const Key & key, const MappedPtr & mapped, std::lock_guard<std::mutex> & /*cache_lock*/) = 0;
|
||||
virtual void set(const Key & key, const MappedPtr & mapped) = 0;
|
||||
|
||||
virtual void remove(const Key & key, std::lock_guard<std::mutex> & /*cache_lock*/) = 0;
|
||||
virtual void remove(const Key & key) = 0;
|
||||
|
||||
virtual void clear(std::lock_guard<std::mutex> & /*cache_lock*/) = 0;
|
||||
virtual void clear() = 0;
|
||||
virtual std::vector<KeyMapped> dump() const = 0;
|
||||
|
||||
protected:
|
||||
|
@ -34,41 +34,41 @@ public:
|
||||
{
|
||||
}
|
||||
|
||||
size_t weight(std::lock_guard<std::mutex> & /* cache_lock */) const override
|
||||
size_t sizeInBytes() const override
|
||||
{
|
||||
return current_size_in_bytes;
|
||||
}
|
||||
|
||||
size_t count(std::lock_guard<std::mutex> & /* cache_lock */) const override
|
||||
size_t count() const override
|
||||
{
|
||||
return cells.size();
|
||||
}
|
||||
|
||||
size_t maxSize(std::lock_guard<std::mutex> & /* cache_lock */) const override
|
||||
size_t maxSizeInBytes() const override
|
||||
{
|
||||
return max_size_in_bytes;
|
||||
}
|
||||
|
||||
void setMaxCount(size_t max_count_, std::lock_guard<std::mutex> & /* cache_lock */) override
|
||||
void setMaxCount(size_t max_count_) override
|
||||
{
|
||||
max_count = max_count_;
|
||||
removeOverflow();
|
||||
}
|
||||
|
||||
void setMaxSize(size_t max_size_in_bytes_, std::lock_guard<std::mutex> & /* cache_lock */) override
|
||||
void setMaxSizeInBytes(size_t max_size_in_bytes_) override
|
||||
{
|
||||
max_size_in_bytes = max_size_in_bytes_;
|
||||
removeOverflow();
|
||||
}
|
||||
|
||||
void clear(std::lock_guard<std::mutex> & /* cache_lock */) override
|
||||
void clear() override
|
||||
{
|
||||
queue.clear();
|
||||
cells.clear();
|
||||
current_size_in_bytes = 0;
|
||||
}
|
||||
|
||||
void remove(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */) override
|
||||
void remove(const Key & key) override
|
||||
{
|
||||
auto it = cells.find(key);
|
||||
if (it == cells.end())
|
||||
@ -79,7 +79,7 @@ public:
|
||||
cells.erase(it);
|
||||
}
|
||||
|
||||
MappedPtr get(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */) override
|
||||
MappedPtr get(const Key & key) override
|
||||
{
|
||||
auto it = cells.find(key);
|
||||
if (it == cells.end())
|
||||
@ -93,7 +93,7 @@ public:
|
||||
return cell.value;
|
||||
}
|
||||
|
||||
std::optional<KeyMapped> getWithKey(const Key & key, std::lock_guard<std::mutex> & /*cache_lock*/) override
|
||||
std::optional<KeyMapped> getWithKey(const Key & key) override
|
||||
{
|
||||
auto it = cells.find(key);
|
||||
if (it == cells.end())
|
||||
@ -107,7 +107,7 @@ public:
|
||||
return std::make_optional<KeyMapped>({it->first, cell.value});
|
||||
}
|
||||
|
||||
void set(const Key & key, const MappedPtr & mapped, std::lock_guard<std::mutex> & /* cache_lock */) override
|
||||
void set(const Key & key, const MappedPtr & mapped) override
|
||||
{
|
||||
auto [it, inserted] = cells.emplace(std::piecewise_construct,
|
||||
std::forward_as_tuple(key),
|
||||
|
@ -34,8 +34,8 @@ public:
|
||||
StorageID table_id = StorageID::createEmpty();
|
||||
bool ignore_unknown = false;
|
||||
bool expand_special_macros_only = false;
|
||||
std::optional<String> shard;
|
||||
std::optional<String> replica;
|
||||
std::optional<String> shard = {};
|
||||
std::optional<String> replica = {};
|
||||
|
||||
/// Information about macro expansion
|
||||
size_t level = 0;
|
||||
|
@ -258,6 +258,8 @@ The server successfully detected this situation and will download merged part fr
|
||||
M(RWLockReadersWaitMilliseconds, "Total time spent waiting for a read lock to be acquired (in a heavy RWLock).") \
|
||||
M(RWLockWritersWaitMilliseconds, "Total time spent waiting for a write lock to be acquired (in a heavy RWLock).") \
|
||||
M(DNSError, "Total count of errors in DNS resolution") \
|
||||
M(PartsLockHoldMicroseconds, "Total time spent holding data parts lock in MergeTree tables") \
|
||||
M(PartsLockWaitMicroseconds, "Total time spent waiting for data parts lock in MergeTree tables") \
|
||||
\
|
||||
M(RealTimeMicroseconds, "Total (wall clock) time spent in processing (queries and other tasks) threads (note that this is a sum).") \
|
||||
M(UserTimeMicroseconds, "Total time spent in processing (queries and other tasks) threads executing CPU instructions in user mode. This include time CPU pipeline was stalled due to main memory access, cache misses, branch mispredictions, hyper-threading, etc.") \
|
||||
@ -366,6 +368,8 @@ The server successfully detected this situation and will download merged part fr
|
||||
M(DiskS3PutObject, "Number of DiskS3 API PutObject calls.") \
|
||||
M(DiskS3GetObject, "Number of DiskS3 API GetObject calls.") \
|
||||
\
|
||||
M(EngineFileLikeReadFiles, "Number of files read in table engines working with files (like File/S3/URL/HDFS).") \
|
||||
\
|
||||
M(ReadBufferFromS3Microseconds, "Time spent on reading from S3.") \
|
||||
M(ReadBufferFromS3InitMicroseconds, "Time spent initializing connection to S3.") \
|
||||
M(ReadBufferFromS3Bytes, "Bytes read from S3.") \
|
||||
@ -391,8 +395,11 @@ The server successfully detected this situation and will download merged part fr
|
||||
M(CachedWriteBufferCacheWriteBytes, "Bytes written from source (remote fs, etc) to filesystem cache") \
|
||||
M(CachedWriteBufferCacheWriteMicroseconds, "Time spent writing data into filesystem cache") \
|
||||
\
|
||||
M(FilesystemCacheLoadMetadataMicroseconds, "Time spent loading filesystem cache metadata") \
|
||||
M(FilesystemCacheEvictedBytes, "Number of bytes evicted from filesystem cache") \
|
||||
M(FilesystemCacheEvictedFileSegments, "Number of file segments evicted from filesystem cache") \
|
||||
M(FilesystemCacheEvictionSkippedFileSegments, "Number of file segments skipped for eviction because of being unreleasable") \
|
||||
M(FilesystemCacheEvictionTries, "Number of filesystem cache eviction attempts") \
|
||||
M(FilesystemCacheLockKeyMicroseconds, "Lock cache key time") \
|
||||
M(FilesystemCacheLockMetadataMicroseconds, "Lock filesystem cache metadata time") \
|
||||
M(FilesystemCacheLockCacheMicroseconds, "Lock filesystem cache time") \
|
||||
@ -485,8 +492,12 @@ The server successfully detected this situation and will download merged part fr
|
||||
M(ScalarSubqueriesLocalCacheHit, "Number of times a read from a scalar subquery was done using the local cache") \
|
||||
M(ScalarSubqueriesCacheMiss, "Number of times a read from a scalar subquery was not cached and had to be calculated completely") \
|
||||
\
|
||||
M(SchemaInferenceCacheHits, "Number of times a schema from cache was used for schema inference") \
|
||||
M(SchemaInferenceCacheMisses, "Number of times a schema is not in cache while schema inference") \
|
||||
M(SchemaInferenceCacheHits, "Number of times the requested source is found in schema cache") \
|
||||
M(SchemaInferenceCacheSchemaHits, "Number of times the schema is found in schema cache during schema inference") \
|
||||
M(SchemaInferenceCacheNumRowsHits, "Number of times the number of rows is found in schema cache during count from files") \
|
||||
M(SchemaInferenceCacheMisses, "Number of times the requested source is not in schema cache") \
|
||||
M(SchemaInferenceCacheSchemaMisses, "Number of times the requested source is in cache but the schema is not in cache while schema inference") \
|
||||
M(SchemaInferenceCacheNumRowsMisses, "Number of times the requested source is in cache but the number of rows is not in cache while count from files") \
|
||||
M(SchemaInferenceCacheEvictions, "Number of times a schema from cache was evicted due to overflow") \
|
||||
M(SchemaInferenceCacheInvalidations, "Number of times a schema in cache became invalid due to changes in data") \
|
||||
\
|
||||
|
51
src/Common/ProxyConfiguration.h
Normal file
51
src/Common/ProxyConfiguration.h
Normal file
@ -0,0 +1,51 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct ProxyConfiguration
|
||||
{
|
||||
enum class Protocol
|
||||
{
|
||||
HTTP,
|
||||
HTTPS,
|
||||
ANY
|
||||
};
|
||||
|
||||
static auto protocolFromString(const std::string & str)
|
||||
{
|
||||
if (str == "http")
|
||||
{
|
||||
return Protocol::HTTP;
|
||||
}
|
||||
else if (str == "https")
|
||||
{
|
||||
return Protocol::HTTPS;
|
||||
}
|
||||
else
|
||||
{
|
||||
return Protocol::ANY;
|
||||
}
|
||||
}
|
||||
|
||||
static auto protocolToString(Protocol protocol)
|
||||
{
|
||||
switch (protocol)
|
||||
{
|
||||
case Protocol::HTTP:
|
||||
return "http";
|
||||
case Protocol::HTTPS:
|
||||
return "https";
|
||||
case Protocol::ANY:
|
||||
return "any";
|
||||
}
|
||||
}
|
||||
|
||||
std::string host;
|
||||
Protocol protocol;
|
||||
uint16_t port;
|
||||
};
|
||||
|
||||
}
|
17
src/Common/ProxyConfigurationResolver.h
Normal file
17
src/Common/ProxyConfigurationResolver.h
Normal file
@ -0,0 +1,17 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/ProxyConfiguration.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct ProxyConfigurationResolver
|
||||
{
|
||||
using Protocol = ProxyConfiguration::Protocol;
|
||||
|
||||
virtual ~ProxyConfigurationResolver() = default;
|
||||
virtual ProxyConfiguration resolve() = 0;
|
||||
virtual void errorReport(const ProxyConfiguration & config) = 0;
|
||||
};
|
||||
|
||||
}
|
208
src/Common/ProxyConfigurationResolverProvider.cpp
Normal file
208
src/Common/ProxyConfigurationResolverProvider.cpp
Normal file
@ -0,0 +1,208 @@
|
||||
#include <Common/ProxyConfigurationResolverProvider.h>
|
||||
|
||||
#include <Common/EnvironmentProxyConfigurationResolver.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/ProxyListConfigurationResolver.h>
|
||||
#include <Common/RemoteProxyConfigurationResolver.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
std::shared_ptr<ProxyConfigurationResolver> getRemoteResolver(
|
||||
const String & config_prefix, const Poco::Util::AbstractConfiguration & configuration)
|
||||
{
|
||||
auto endpoint = Poco::URI(configuration.getString(config_prefix + ".endpoint"));
|
||||
auto proxy_scheme = configuration.getString(config_prefix + ".proxy_scheme");
|
||||
if (proxy_scheme != "http" && proxy_scheme != "https")
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Only HTTP/HTTPS schemas allowed in proxy resolver config: {}", proxy_scheme);
|
||||
auto proxy_port = configuration.getUInt(config_prefix + ".proxy_port");
|
||||
auto cache_ttl = configuration.getUInt(config_prefix + ".proxy_cache_time", 10);
|
||||
|
||||
LOG_DEBUG(&Poco::Logger::get("ProxyConfigurationResolverProvider"), "Configured remote proxy resolver: {}, Scheme: {}, Port: {}",
|
||||
endpoint.toString(), proxy_scheme, proxy_port);
|
||||
|
||||
return std::make_shared<RemoteProxyConfigurationResolver>(endpoint, proxy_scheme, proxy_port, cache_ttl);
|
||||
}
|
||||
|
||||
std::shared_ptr<ProxyConfigurationResolver> getRemoteResolver(
|
||||
ProxyConfiguration::Protocol protocol, const String & config_prefix, const Poco::Util::AbstractConfiguration & configuration)
|
||||
{
|
||||
std::vector<String> keys;
|
||||
configuration.keys(config_prefix, keys);
|
||||
|
||||
std::vector<Poco::URI> uris;
|
||||
for (const auto & key : keys)
|
||||
{
|
||||
if (startsWith(key, "resolver"))
|
||||
{
|
||||
auto prefix_with_key = config_prefix + "." + key;
|
||||
auto proxy_scheme_config_string = prefix_with_key + ".proxy_scheme";
|
||||
auto config_protocol = configuration.getString(proxy_scheme_config_string);
|
||||
|
||||
if (ProxyConfiguration::Protocol::ANY == protocol || config_protocol == ProxyConfiguration::protocolToString(protocol))
|
||||
{
|
||||
return getRemoteResolver(prefix_with_key, configuration);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto extractURIList(const String & config_prefix, const Poco::Util::AbstractConfiguration & configuration)
|
||||
{
|
||||
std::vector<String> keys;
|
||||
configuration.keys(config_prefix, keys);
|
||||
|
||||
std::vector<Poco::URI> uris;
|
||||
for (const auto & key : keys)
|
||||
{
|
||||
if (startsWith(key, "uri"))
|
||||
{
|
||||
Poco::URI proxy_uri(configuration.getString(config_prefix + "." + key));
|
||||
|
||||
if (proxy_uri.getScheme() != "http" && proxy_uri.getScheme() != "https")
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Only HTTP/HTTPS schemas allowed in proxy uri: {}", proxy_uri.toString());
|
||||
if (proxy_uri.getHost().empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Empty host in proxy uri: {}", proxy_uri.toString());
|
||||
|
||||
uris.push_back(proxy_uri);
|
||||
|
||||
LOG_DEBUG(&Poco::Logger::get("ProxyConfigurationResolverProvider"), "Configured proxy: {}", proxy_uri.toString());
|
||||
}
|
||||
}
|
||||
|
||||
return uris;
|
||||
}
|
||||
|
||||
std::shared_ptr<ProxyConfigurationResolver> getListResolverNewSyntax(
|
||||
ProxyConfiguration::Protocol protocol,
|
||||
const String & config_prefix,
|
||||
const Poco::Util::AbstractConfiguration & configuration
|
||||
)
|
||||
{
|
||||
std::vector<Poco::URI> uris;
|
||||
|
||||
bool include_http_uris = ProxyConfiguration::Protocol::ANY == protocol || ProxyConfiguration::Protocol::HTTP == protocol;
|
||||
|
||||
if (include_http_uris && configuration.has(config_prefix + ".http"))
|
||||
{
|
||||
auto http_uris = extractURIList(config_prefix + ".http", configuration);
|
||||
uris.insert(uris.end(), http_uris.begin(), http_uris.end());
|
||||
}
|
||||
|
||||
bool include_https_uris = ProxyConfiguration::Protocol::ANY == protocol || ProxyConfiguration::Protocol::HTTPS == protocol;
|
||||
|
||||
if (include_https_uris && configuration.has(config_prefix + ".https"))
|
||||
{
|
||||
auto https_uris = extractURIList(config_prefix + ".https", configuration);
|
||||
uris.insert(uris.end(), https_uris.begin(), https_uris.end());
|
||||
}
|
||||
|
||||
return uris.empty() ? nullptr : std::make_shared<ProxyListConfigurationResolver>(uris);
|
||||
}
|
||||
|
||||
std::shared_ptr<ProxyConfigurationResolver> getListResolverOldSyntax(
|
||||
const String & config_prefix,
|
||||
const Poco::Util::AbstractConfiguration & configuration
|
||||
)
|
||||
{
|
||||
auto uris = extractURIList(config_prefix, configuration);
|
||||
|
||||
return uris.empty() ? nullptr : std::make_shared<ProxyListConfigurationResolver>(uris);
|
||||
}
|
||||
|
||||
std::shared_ptr<ProxyConfigurationResolver> getListResolver(
|
||||
ProxyConfiguration::Protocol protocol, const String & config_prefix, const Poco::Util::AbstractConfiguration & configuration
|
||||
)
|
||||
{
|
||||
std::vector<String> keys;
|
||||
configuration.keys(config_prefix, keys);
|
||||
|
||||
bool new_setting_syntax = std::find_if(
|
||||
keys.begin(),
|
||||
keys.end(),
|
||||
[](const String & key)
|
||||
{
|
||||
return startsWith(key, "http") || startsWith(key, "https");
|
||||
}) != keys.end();
|
||||
|
||||
return new_setting_syntax ? getListResolverNewSyntax(protocol, config_prefix, configuration)
|
||||
: getListResolverOldSyntax(config_prefix, configuration);
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<ProxyConfigurationResolver> ProxyConfigurationResolverProvider::get(Protocol protocol)
|
||||
{
|
||||
auto context = Context::getGlobalContextInstance();
|
||||
|
||||
chassert(context);
|
||||
|
||||
if (auto resolver = getFromSettings(protocol, "", context->getConfigRef()))
|
||||
{
|
||||
return resolver;
|
||||
}
|
||||
|
||||
return std::make_shared<EnvironmentProxyConfigurationResolver>(protocol);
|
||||
}
|
||||
|
||||
std::shared_ptr<ProxyConfigurationResolver> ProxyConfigurationResolverProvider::getFromSettings(
|
||||
Protocol protocol,
|
||||
const String & config_prefix,
|
||||
const Poco::Util::AbstractConfiguration & configuration
|
||||
)
|
||||
{
|
||||
auto proxy_prefix = config_prefix.empty() ? "proxy" : config_prefix + ".proxy";
|
||||
|
||||
if (configuration.has(proxy_prefix))
|
||||
{
|
||||
std::vector<String> config_keys;
|
||||
configuration.keys(proxy_prefix, config_keys);
|
||||
|
||||
if (auto remote_resolver = getRemoteResolver(protocol, proxy_prefix, configuration))
|
||||
{
|
||||
return remote_resolver;
|
||||
}
|
||||
|
||||
if (auto list_resolver = getListResolver(protocol, proxy_prefix, configuration))
|
||||
{
|
||||
return list_resolver;
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::shared_ptr<ProxyConfigurationResolver> ProxyConfigurationResolverProvider::getFromOldSettingsFormat(
|
||||
const String & config_prefix,
|
||||
const Poco::Util::AbstractConfiguration & configuration
|
||||
)
|
||||
{
|
||||
/*
|
||||
* First try to get it from settings only using the combination of config_prefix and configuration.
|
||||
* This logic exists for backward compatibility with old S3 storage specific proxy configuration.
|
||||
* */
|
||||
if (auto resolver = ProxyConfigurationResolverProvider::getFromSettings(Protocol::ANY, config_prefix, configuration))
|
||||
{
|
||||
return resolver;
|
||||
}
|
||||
|
||||
/*
|
||||
* In case the combination of config_prefix and configuration does not provide a resolver, try to get it from general / new settings.
|
||||
* Falls back to Environment resolver if no configuration is found.
|
||||
* */
|
||||
return ProxyConfigurationResolverProvider::get(Protocol::ANY);
|
||||
}
|
||||
|
||||
}
|
40
src/Common/ProxyConfigurationResolverProvider.h
Normal file
40
src/Common/ProxyConfigurationResolverProvider.h
Normal file
@ -0,0 +1,40 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/types.h>
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
#include <Common/ProxyConfigurationResolver.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
||||
class ProxyConfigurationResolverProvider
|
||||
{
|
||||
public:
|
||||
|
||||
using Protocol = ProxyConfiguration::Protocol;
|
||||
|
||||
/*
|
||||
* Returns appropriate ProxyConfigurationResolver based on current CH settings (Remote resolver or List resolver).
|
||||
* If no configuration is found, returns Environment Resolver.
|
||||
* */
|
||||
static std::shared_ptr<ProxyConfigurationResolver> get(Protocol protocol);
|
||||
|
||||
/*
|
||||
* This API exists exclusively for backward compatibility with old S3 storage specific proxy configuration.
|
||||
* If no configuration is found, returns nullptr.
|
||||
* */
|
||||
static std::shared_ptr<ProxyConfigurationResolver> getFromOldSettingsFormat(
|
||||
const String & config_prefix,
|
||||
const Poco::Util::AbstractConfiguration & configuration
|
||||
);
|
||||
|
||||
private:
|
||||
static std::shared_ptr<ProxyConfigurationResolver> getFromSettings(
|
||||
Protocol protocol,
|
||||
const String & config_prefix,
|
||||
const Poco::Util::AbstractConfiguration & configuration
|
||||
);
|
||||
};
|
||||
|
||||
}
|
31
src/Common/ProxyListConfigurationResolver.cpp
Normal file
31
src/Common/ProxyListConfigurationResolver.cpp
Normal file
@ -0,0 +1,31 @@
|
||||
#include <Common/ProxyListConfigurationResolver.h>
|
||||
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Poco/URI.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
ProxyListConfigurationResolver::ProxyListConfigurationResolver(std::vector<Poco::URI> proxies_)
|
||||
: proxies(std::move(proxies_))
|
||||
{
|
||||
}
|
||||
|
||||
ProxyConfiguration ProxyListConfigurationResolver::resolve()
|
||||
{
|
||||
if (proxies.empty())
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
/// Avoid atomic increment if number of proxies is 1.
|
||||
size_t index = proxies.size() > 1 ? (access_counter++) % proxies.size() : 0;
|
||||
|
||||
auto & proxy = proxies[index];
|
||||
|
||||
LOG_DEBUG(&Poco::Logger::get("ProxyListConfigurationResolver"), "Use proxy: {}", proxies[index].toString());
|
||||
return ProxyConfiguration {proxy.getHost(), ProxyConfiguration::protocolFromString(proxy.getScheme()), proxy.getPort()};
|
||||
}
|
||||
|
||||
}
|
31
src/Common/ProxyListConfigurationResolver.h
Normal file
31
src/Common/ProxyListConfigurationResolver.h
Normal file
@ -0,0 +1,31 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/types.h>
|
||||
|
||||
#include <Common/ProxyConfigurationResolver.h>
|
||||
#include <Poco/URI.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/*
|
||||
* Round-robin proxy list resolver.
|
||||
* */
|
||||
class ProxyListConfigurationResolver : public ProxyConfigurationResolver
|
||||
{
|
||||
public:
|
||||
explicit ProxyListConfigurationResolver(std::vector<Poco::URI> proxies_);
|
||||
|
||||
ProxyConfiguration resolve() override;
|
||||
|
||||
void errorReport(const ProxyConfiguration &) override {}
|
||||
|
||||
private:
|
||||
std::vector<Poco::URI> proxies;
|
||||
|
||||
/// Access counter to get proxy using round-robin strategy.
|
||||
std::atomic<size_t> access_counter;
|
||||
|
||||
};
|
||||
|
||||
}
|
@ -1,32 +1,36 @@
|
||||
#include "ProxyResolverConfiguration.h"
|
||||
|
||||
#if USE_AWS_S3
|
||||
#include <Common/RemoteProxyConfigurationResolver.h>
|
||||
|
||||
#include <utility>
|
||||
#include <IO/HTTPCommon.h>
|
||||
#include "Poco/StreamCopier.h"
|
||||
#include <Poco/StreamCopier.h>
|
||||
#include <Poco/Net/HTTPRequest.h>
|
||||
#include <Poco/Net/HTTPResponse.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/DNSResolver.h>
|
||||
|
||||
namespace DB::ErrorCodes
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
namespace DB::S3
|
||||
{
|
||||
|
||||
ProxyResolverConfiguration::ProxyResolverConfiguration(const Poco::URI & endpoint_, String proxy_scheme_
|
||||
, unsigned proxy_port_, unsigned cache_ttl_)
|
||||
: endpoint(endpoint_), proxy_scheme(std::move(proxy_scheme_)), proxy_port(proxy_port_), cache_ttl(cache_ttl_)
|
||||
RemoteProxyConfigurationResolver::RemoteProxyConfigurationResolver(
|
||||
const Poco::URI & endpoint_,
|
||||
String proxy_protocol_,
|
||||
unsigned proxy_port_,
|
||||
unsigned cache_ttl_
|
||||
)
|
||||
: endpoint(endpoint_), proxy_protocol(std::move(proxy_protocol_)), proxy_port(proxy_port_), cache_ttl(cache_ttl_)
|
||||
{
|
||||
}
|
||||
|
||||
ClientConfigurationPerRequest ProxyResolverConfiguration::getConfiguration(const Aws::Http::HttpRequest &)
|
||||
ProxyConfiguration RemoteProxyConfigurationResolver::resolve()
|
||||
{
|
||||
LOG_DEBUG(&Poco::Logger::get("AWSClient"), "Obtain proxy using resolver: {}", endpoint.toString());
|
||||
auto * logger = &Poco::Logger::get("RemoteProxyConfigurationResolver");
|
||||
|
||||
LOG_DEBUG(logger, "Obtain proxy using resolver: {}", endpoint.toString());
|
||||
|
||||
std::lock_guard lock(cache_mutex);
|
||||
|
||||
@ -34,7 +38,12 @@ ClientConfigurationPerRequest ProxyResolverConfiguration::getConfiguration(const
|
||||
|
||||
if (cache_ttl.count() && cache_valid && now <= cache_timestamp + cache_ttl && now >= cache_timestamp)
|
||||
{
|
||||
LOG_DEBUG(&Poco::Logger::get("AWSClient"), "Use cached proxy: {}://{}:{}", Aws::Http::SchemeMapper::ToString(cached_config.proxy_scheme), cached_config.proxy_host, cached_config.proxy_port);
|
||||
LOG_DEBUG(logger,
|
||||
"Use cached proxy: {}://{}:{}",
|
||||
cached_config.protocol,
|
||||
cached_config.host,
|
||||
cached_config.port
|
||||
);
|
||||
return cached_config;
|
||||
}
|
||||
|
||||
@ -84,11 +93,11 @@ ClientConfigurationPerRequest ProxyResolverConfiguration::getConfiguration(const
|
||||
/// Read proxy host as string from response body.
|
||||
Poco::StreamCopier::copyToString(response_body_stream, proxy_host);
|
||||
|
||||
LOG_DEBUG(&Poco::Logger::get("AWSClient"), "Use proxy: {}://{}:{}", proxy_scheme, proxy_host, proxy_port);
|
||||
LOG_DEBUG(logger, "Use proxy: {}://{}:{}", proxy_protocol, proxy_host, proxy_port);
|
||||
|
||||
cached_config.proxy_scheme = Aws::Http::SchemeMapper::FromString(proxy_scheme.c_str());
|
||||
cached_config.proxy_host = proxy_host;
|
||||
cached_config.proxy_port = proxy_port;
|
||||
cached_config.protocol = ProxyConfiguration::protocolFromString(proxy_protocol);
|
||||
cached_config.host = proxy_host;
|
||||
cached_config.port = proxy_port;
|
||||
cache_timestamp = std::chrono::system_clock::now();
|
||||
cache_valid = true;
|
||||
|
||||
@ -96,16 +105,14 @@ ClientConfigurationPerRequest ProxyResolverConfiguration::getConfiguration(const
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException("AWSClient", "Failed to obtain proxy");
|
||||
/// Don't use proxy if it can't be obtained.
|
||||
ClientConfigurationPerRequest cfg;
|
||||
return cfg;
|
||||
tryLogCurrentException("RemoteProxyConfigurationResolver", "Failed to obtain proxy");
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
void ProxyResolverConfiguration::errorReport(const ClientConfigurationPerRequest & config)
|
||||
void RemoteProxyConfigurationResolver::errorReport(const ProxyConfiguration & config)
|
||||
{
|
||||
if (config.proxy_host.empty())
|
||||
if (config.host.empty())
|
||||
return;
|
||||
|
||||
std::lock_guard lock(cache_mutex);
|
||||
@ -113,8 +120,8 @@ void ProxyResolverConfiguration::errorReport(const ClientConfigurationPerRequest
|
||||
if (!cache_ttl.count() || !cache_valid)
|
||||
return;
|
||||
|
||||
if (std::tie(cached_config.proxy_scheme, cached_config.proxy_host, cached_config.proxy_port)
|
||||
!= std::tie(config.proxy_scheme, config.proxy_host, config.proxy_port))
|
||||
if (std::tie(cached_config.protocol, cached_config.host, cached_config.port)
|
||||
!= std::tie(config.protocol, config.host, config.port))
|
||||
return;
|
||||
|
||||
/// Invalidate cached proxy when got error with this proxy
|
||||
@ -122,5 +129,3 @@ void ProxyResolverConfiguration::errorReport(const ClientConfigurationPerRequest
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
46
src/Common/RemoteProxyConfigurationResolver.h
Normal file
46
src/Common/RemoteProxyConfigurationResolver.h
Normal file
@ -0,0 +1,46 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/types.h>
|
||||
|
||||
#include <mutex>
|
||||
|
||||
#include <Common/ProxyConfigurationResolver.h>
|
||||
#include <Poco/URI.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/*
|
||||
* Makes an HTTP GET request to the specified endpoint to obtain a proxy host.
|
||||
* */
|
||||
class RemoteProxyConfigurationResolver : public ProxyConfigurationResolver
|
||||
{
|
||||
public:
|
||||
RemoteProxyConfigurationResolver(
|
||||
const Poco::URI & endpoint_,
|
||||
String proxy_protocol_,
|
||||
unsigned proxy_port_,
|
||||
unsigned cache_ttl_
|
||||
);
|
||||
|
||||
ProxyConfiguration resolve() override;
|
||||
|
||||
void errorReport(const ProxyConfiguration & config) override;
|
||||
|
||||
private:
|
||||
|
||||
/// Endpoint to obtain a proxy host.
|
||||
const Poco::URI endpoint;
|
||||
/// Scheme for obtained proxy.
|
||||
const String proxy_protocol;
|
||||
/// Port for obtained proxy.
|
||||
const unsigned proxy_port;
|
||||
|
||||
std::mutex cache_mutex;
|
||||
bool cache_valid = false;
|
||||
std::chrono::time_point<std::chrono::system_clock> cache_timestamp;
|
||||
const std::chrono::seconds cache_ttl{0};
|
||||
ProxyConfiguration cached_config;
|
||||
};
|
||||
|
||||
}
|
@ -31,45 +31,45 @@ public:
|
||||
/// TODO: construct from special struct with cache policy parameters (also with max_protected_size).
|
||||
SLRUCachePolicy(size_t max_size_in_bytes_, size_t max_count_, double size_ratio_, OnWeightLossFunction on_weight_loss_function_)
|
||||
: Base(std::make_unique<NoCachePolicyUserQuota>())
|
||||
, size_ratio(size_ratio_)
|
||||
, max_protected_size(static_cast<size_t>(max_size_in_bytes_ * std::min(1.0, size_ratio)))
|
||||
, max_size_in_bytes(max_size_in_bytes_)
|
||||
, max_protected_size(calculateMaxProtectedSize(max_size_in_bytes_, size_ratio_))
|
||||
, max_count(max_count_)
|
||||
, size_ratio(size_ratio_)
|
||||
, on_weight_loss_function(on_weight_loss_function_)
|
||||
{
|
||||
}
|
||||
|
||||
size_t weight(std::lock_guard<std::mutex> & /* cache_lock */) const override
|
||||
size_t sizeInBytes() const override
|
||||
{
|
||||
return current_size_in_bytes;
|
||||
}
|
||||
|
||||
size_t count(std::lock_guard<std::mutex> & /* cache_lock */) const override
|
||||
size_t count() const override
|
||||
{
|
||||
return cells.size();
|
||||
}
|
||||
|
||||
size_t maxSize(std::lock_guard<std::mutex> & /* cache_lock */) const override
|
||||
size_t maxSizeInBytes() const override
|
||||
{
|
||||
return max_size_in_bytes;
|
||||
}
|
||||
|
||||
void setMaxCount(size_t max_count_, std::lock_guard<std::mutex> & /* cache_lock */) override
|
||||
void setMaxCount(size_t max_count_) override
|
||||
{
|
||||
max_count = max_count_;
|
||||
removeOverflow(protected_queue, max_protected_size, current_protected_size, /*is_protected=*/true);
|
||||
removeOverflow(probationary_queue, max_size_in_bytes, current_size_in_bytes, /*is_protected=*/false);
|
||||
}
|
||||
|
||||
void setMaxSize(size_t max_size_in_bytes_, std::lock_guard<std::mutex> & /* cache_lock */) override
|
||||
void setMaxSizeInBytes(size_t max_size_in_bytes_) override
|
||||
{
|
||||
max_protected_size = static_cast<size_t>(max_size_in_bytes_ * std::min(1.0, size_ratio));
|
||||
max_protected_size = calculateMaxProtectedSize(max_size_in_bytes_, size_ratio);
|
||||
max_size_in_bytes = max_size_in_bytes_;
|
||||
removeOverflow(protected_queue, max_protected_size, current_protected_size, /*is_protected=*/true);
|
||||
removeOverflow(probationary_queue, max_size_in_bytes, current_size_in_bytes, /*is_protected=*/false);
|
||||
}
|
||||
|
||||
void clear(std::lock_guard<std::mutex> & /* cache_lock */) override
|
||||
void clear() override
|
||||
{
|
||||
cells.clear();
|
||||
probationary_queue.clear();
|
||||
@ -78,7 +78,7 @@ public:
|
||||
current_protected_size = 0;
|
||||
}
|
||||
|
||||
void remove(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */) override
|
||||
void remove(const Key & key) override
|
||||
{
|
||||
auto it = cells.find(key);
|
||||
if (it == cells.end())
|
||||
@ -95,7 +95,7 @@ public:
|
||||
cells.erase(it);
|
||||
}
|
||||
|
||||
MappedPtr get(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */) override
|
||||
MappedPtr get(const Key & key) override
|
||||
{
|
||||
auto it = cells.find(key);
|
||||
if (it == cells.end())
|
||||
@ -116,7 +116,7 @@ public:
|
||||
return cell.value;
|
||||
}
|
||||
|
||||
std::optional<KeyMapped> getWithKey(const Key & key, std::lock_guard<std::mutex> & /*cache_lock*/) override
|
||||
std::optional<KeyMapped> getWithKey(const Key & key) override
|
||||
{
|
||||
auto it = cells.find(key);
|
||||
if (it == cells.end())
|
||||
@ -137,7 +137,7 @@ public:
|
||||
return std::make_optional<KeyMapped>({it->first, cell.value});
|
||||
}
|
||||
|
||||
void set(const Key & key, const MappedPtr & mapped, std::lock_guard<std::mutex> & /* cache_lock */) override
|
||||
void set(const Key & key, const MappedPtr & mapped) override
|
||||
{
|
||||
auto [it, inserted] = cells.emplace(std::piecewise_construct,
|
||||
std::forward_as_tuple(key),
|
||||
@ -208,16 +208,21 @@ private:
|
||||
|
||||
Cells cells;
|
||||
|
||||
size_t max_size_in_bytes;
|
||||
size_t max_protected_size;
|
||||
size_t max_count;
|
||||
const double size_ratio;
|
||||
size_t current_protected_size = 0;
|
||||
size_t current_size_in_bytes = 0;
|
||||
size_t max_protected_size;
|
||||
size_t max_size_in_bytes;
|
||||
size_t max_count;
|
||||
|
||||
WeightFunction weight_function;
|
||||
OnWeightLossFunction on_weight_loss_function;
|
||||
|
||||
static size_t calculateMaxProtectedSize(size_t max_size_in_bytes, double size_ratio)
|
||||
{
|
||||
return static_cast<size_t>(max_size_in_bytes * std::max(0.0, std::min(1.0, size_ratio)));
|
||||
}
|
||||
|
||||
void removeOverflow(SLRUQueue & queue, size_t max_weight_size, size_t & current_weight_size, bool is_protected)
|
||||
{
|
||||
size_t current_weight_lost = 0;
|
||||
|
@ -94,39 +94,39 @@ public:
|
||||
{
|
||||
}
|
||||
|
||||
size_t weight(std::lock_guard<std::mutex> & /* cache_lock */) const override
|
||||
size_t sizeInBytes() const override
|
||||
{
|
||||
return size_in_bytes;
|
||||
}
|
||||
|
||||
size_t count(std::lock_guard<std::mutex> & /* cache_lock */) const override
|
||||
size_t count() const override
|
||||
{
|
||||
return cache.size();
|
||||
}
|
||||
|
||||
size_t maxSize(std::lock_guard<std::mutex> & /* cache_lock */) const override
|
||||
size_t maxSizeInBytes() const override
|
||||
{
|
||||
return max_size_in_bytes;
|
||||
}
|
||||
|
||||
void setMaxCount(size_t max_count_, std::lock_guard<std::mutex> & /* cache_lock */) override
|
||||
void setMaxCount(size_t max_count_) override
|
||||
{
|
||||
/// lazy behavior: the cache only shrinks upon the next insert
|
||||
max_count = max_count_;
|
||||
}
|
||||
|
||||
void setMaxSize(size_t max_size_in_bytes_, std::lock_guard<std::mutex> & /* cache_lock */) override
|
||||
void setMaxSizeInBytes(size_t max_size_in_bytes_) override
|
||||
{
|
||||
/// lazy behavior: the cache only shrinks upon the next insert
|
||||
max_size_in_bytes = max_size_in_bytes_;
|
||||
}
|
||||
|
||||
void clear(std::lock_guard<std::mutex> & /* cache_lock */) override
|
||||
void clear() override
|
||||
{
|
||||
cache.clear();
|
||||
}
|
||||
|
||||
void remove(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */) override
|
||||
void remove(const Key & key) override
|
||||
{
|
||||
auto it = cache.find(key);
|
||||
if (it == cache.end())
|
||||
@ -137,7 +137,7 @@ public:
|
||||
size_in_bytes -= sz;
|
||||
}
|
||||
|
||||
MappedPtr get(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */) override
|
||||
MappedPtr get(const Key & key) override
|
||||
{
|
||||
auto it = cache.find(key);
|
||||
if (it == cache.end())
|
||||
@ -145,7 +145,7 @@ public:
|
||||
return it->second;
|
||||
}
|
||||
|
||||
std::optional<KeyMapped> getWithKey(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */) override
|
||||
std::optional<KeyMapped> getWithKey(const Key & key) override
|
||||
{
|
||||
auto it = cache.find(key);
|
||||
if (it == cache.end())
|
||||
@ -154,7 +154,7 @@ public:
|
||||
}
|
||||
|
||||
/// Evicts on a best-effort basis. If there are too many non-stale entries, the new entry may not be cached at all!
|
||||
void set(const Key & key, const MappedPtr & mapped, std::lock_guard<std::mutex> & /* cache_lock */) override
|
||||
void set(const Key & key, const MappedPtr & mapped) override
|
||||
{
|
||||
chassert(mapped.get());
|
||||
|
||||
|
@ -863,7 +863,9 @@ void TestKeeper::reconfig(
|
||||
.callback = [callback](const Response & response)
|
||||
{
|
||||
callback(dynamic_cast<const ReconfigResponse &>(response));
|
||||
}
|
||||
},
|
||||
.watch = nullptr,
|
||||
.time = {}
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -18,7 +18,6 @@
|
||||
#include <Common/DNSResolver.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
#include <Poco/Net/NetException.h>
|
||||
#include <Poco/Net/DNS.h>
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
#include <Common/CurrentMetrics.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
@ -668,7 +669,13 @@ public:
|
||||
return;
|
||||
try
|
||||
{
|
||||
zookeeper.tryRemove(path);
|
||||
if (!zookeeper.expired())
|
||||
zookeeper.tryRemove(path);
|
||||
else
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::CannotRemoveEphemeralNode);
|
||||
LOG_DEBUG(&Poco::Logger::get("EphemeralNodeHolder"), "Cannot remove {} since session has been expired", path);
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
|
@ -49,7 +49,7 @@ struct AsyncLoaderTest
|
||||
}
|
||||
|
||||
explicit AsyncLoaderTest(size_t max_threads = 1)
|
||||
: AsyncLoaderTest({{.max_threads = max_threads}})
|
||||
: AsyncLoaderTest({{.max_threads = max_threads, .priority = {}}})
|
||||
{}
|
||||
|
||||
std::vector<AsyncLoader::PoolInitializer> getPoolInitializers(std::vector<Initializer> initializers)
|
||||
|
@ -73,3 +73,25 @@ inline std::string xmlNodeAsString(Poco::XML::Node *pNode)
|
||||
result += ("</"+ node_name + ">\n");
|
||||
return Poco::XML::fromXMLString(result);
|
||||
}
|
||||
|
||||
struct EnvironmentProxySetter
|
||||
{
|
||||
EnvironmentProxySetter(const Poco::URI & http_proxy, const Poco::URI & https_proxy)
|
||||
{
|
||||
if (!http_proxy.empty())
|
||||
{
|
||||
setenv("http_proxy", http_proxy.toString().c_str(), 1); // NOLINT(concurrency-mt-unsafe)
|
||||
}
|
||||
|
||||
if (!https_proxy.empty())
|
||||
{
|
||||
setenv("https_proxy", https_proxy.toString().c_str(), 1); // NOLINT(concurrency-mt-unsafe)
|
||||
}
|
||||
}
|
||||
|
||||
~EnvironmentProxySetter()
|
||||
{
|
||||
unsetenv("http_proxy"); // NOLINT(concurrency-mt-unsafe)
|
||||
unsetenv("https_proxy"); // NOLINT(concurrency-mt-unsafe)
|
||||
}
|
||||
};
|
||||
|
@ -5,11 +5,11 @@
|
||||
TEST(LRUCache, set)
|
||||
{
|
||||
using SimpleCacheBase = DB::CacheBase<int, int>;
|
||||
auto lru_cache = SimpleCacheBase("LRU", /*max_size_in_bytes*/ 10, /*max_count*/ 10);
|
||||
auto lru_cache = SimpleCacheBase("LRU", /*max_size_in_bytes*/ 10, /*max_count*/ 10, /*size_ratio*/ 0.5);
|
||||
lru_cache.set(1, std::make_shared<int>(2));
|
||||
lru_cache.set(2, std::make_shared<int>(3));
|
||||
|
||||
auto w = lru_cache.weight();
|
||||
auto w = lru_cache.sizeInBytes();
|
||||
auto n = lru_cache.count();
|
||||
ASSERT_EQ(w, 2);
|
||||
ASSERT_EQ(n, 2);
|
||||
@ -18,7 +18,7 @@ TEST(LRUCache, set)
|
||||
TEST(LRUCache, update)
|
||||
{
|
||||
using SimpleCacheBase = DB::CacheBase<int, int>;
|
||||
auto lru_cache = SimpleCacheBase("LRU", /*max_size_in_bytes*/ 10, /*max_count*/ 10);
|
||||
auto lru_cache = SimpleCacheBase("LRU", /*max_size_in_bytes*/ 10, /*max_count*/ 10, /*size_ratio*/ 0.5);
|
||||
lru_cache.set(1, std::make_shared<int>(2));
|
||||
lru_cache.set(1, std::make_shared<int>(3));
|
||||
auto val = lru_cache.get(1);
|
||||
@ -29,7 +29,7 @@ TEST(LRUCache, update)
|
||||
TEST(LRUCache, get)
|
||||
{
|
||||
using SimpleCacheBase = DB::CacheBase<int, int>;
|
||||
auto lru_cache = SimpleCacheBase("LRU", /*max_size_in_bytes*/ 10, /*max_count*/ 10);
|
||||
auto lru_cache = SimpleCacheBase("LRU", /*max_size_in_bytes*/ 10, /*max_count*/ 10, /*size_ratio*/ 0.5);
|
||||
lru_cache.set(1, std::make_shared<int>(2));
|
||||
lru_cache.set(2, std::make_shared<int>(3));
|
||||
SimpleCacheBase::MappedPtr value = lru_cache.get(1);
|
||||
@ -49,7 +49,7 @@ struct ValueWeight
|
||||
TEST(LRUCache, evictOnSize)
|
||||
{
|
||||
using SimpleCacheBase = DB::CacheBase<int, size_t>;
|
||||
auto lru_cache = SimpleCacheBase("LRU", /*max_size_in_bytes*/ 20, /*max_count*/ 3);
|
||||
auto lru_cache = SimpleCacheBase("LRU", /*max_size_in_bytes*/ 20, /*max_count*/ 3, /*size_ratio*/ 0.5);
|
||||
lru_cache.set(1, std::make_shared<size_t>(2));
|
||||
lru_cache.set(2, std::make_shared<size_t>(3));
|
||||
lru_cache.set(3, std::make_shared<size_t>(4));
|
||||
@ -65,7 +65,7 @@ TEST(LRUCache, evictOnSize)
|
||||
TEST(LRUCache, evictOnWeight)
|
||||
{
|
||||
using SimpleCacheBase = DB::CacheBase<int, size_t, std::hash<int>, ValueWeight>;
|
||||
auto lru_cache = SimpleCacheBase("LRU", /*max_size_in_bytes*/ 10, /*max_count*/ 10);
|
||||
auto lru_cache = SimpleCacheBase("LRU", /*max_size_in_bytes*/ 10, /*max_count*/ 10, /*size_ratio*/ 0.5);
|
||||
lru_cache.set(1, std::make_shared<size_t>(2));
|
||||
lru_cache.set(2, std::make_shared<size_t>(3));
|
||||
lru_cache.set(3, std::make_shared<size_t>(4));
|
||||
@ -74,7 +74,7 @@ TEST(LRUCache, evictOnWeight)
|
||||
auto n = lru_cache.count();
|
||||
ASSERT_EQ(n, 2);
|
||||
|
||||
auto w = lru_cache.weight();
|
||||
auto w = lru_cache.sizeInBytes();
|
||||
ASSERT_EQ(w, 9);
|
||||
|
||||
auto value = lru_cache.get(1);
|
||||
@ -86,7 +86,7 @@ TEST(LRUCache, evictOnWeight)
|
||||
TEST(LRUCache, getOrSet)
|
||||
{
|
||||
using SimpleCacheBase = DB::CacheBase<int, size_t, std::hash<int>, ValueWeight>;
|
||||
auto lru_cache = SimpleCacheBase("LRU", /*max_size_in_bytes*/ 10, /*max_count*/ 10);
|
||||
auto lru_cache = SimpleCacheBase("LRU", /*max_size_in_bytes*/ 10, /*max_count*/ 10, /*size_ratio*/ 0.5);
|
||||
size_t x = 10;
|
||||
auto load_func = [&] { return std::make_shared<size_t>(x); };
|
||||
auto [value, loaded] = lru_cache.getOrSet(1, load_func);
|
||||
|
122
src/Common/tests/gtest_proxy_configuration_resolver_provider.cpp
Normal file
122
src/Common/tests/gtest_proxy_configuration_resolver_provider.cpp
Normal file
@ -0,0 +1,122 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <Common/ProxyConfigurationResolverProvider.h>
|
||||
#include <Common/tests/gtest_global_context.h>
|
||||
#include <Common/tests/gtest_helper_functions.h>
|
||||
|
||||
using ConfigurationPtr = Poco::AutoPtr<Poco::Util::AbstractConfiguration>;
|
||||
|
||||
class ProxyConfigurationResolverProviderTests : public ::testing::Test
|
||||
{
|
||||
protected:
|
||||
|
||||
static void SetUpTestSuite() {
|
||||
context = getContext().context;
|
||||
}
|
||||
|
||||
static void TearDownTestSuite() {
|
||||
context->setConfig(Poco::AutoPtr(new Poco::Util::MapConfiguration()));
|
||||
}
|
||||
|
||||
static DB::ContextMutablePtr context;
|
||||
};
|
||||
|
||||
DB::ContextMutablePtr ProxyConfigurationResolverProviderTests::context;
|
||||
|
||||
Poco::URI http_env_proxy_server = Poco::URI("http://http_environment_proxy:3128");
|
||||
Poco::URI https_env_proxy_server = Poco::URI("http://https_environment_proxy:3128");
|
||||
|
||||
Poco::URI http_list_proxy_server = Poco::URI("http://http_list_proxy:3128");
|
||||
Poco::URI https_list_proxy_server = Poco::URI("http://https_list_proxy:3128");
|
||||
|
||||
TEST_F(ProxyConfigurationResolverProviderTests, EnvironmentResolverShouldBeUsedIfNoSettings)
|
||||
{
|
||||
EnvironmentProxySetter setter(http_env_proxy_server, https_env_proxy_server);
|
||||
|
||||
auto http_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP)->resolve();
|
||||
auto https_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS)->resolve();
|
||||
|
||||
ASSERT_EQ(http_configuration.host, http_env_proxy_server.getHost());
|
||||
ASSERT_EQ(http_configuration.port, http_env_proxy_server.getPort());
|
||||
ASSERT_EQ(http_configuration.protocol, DB::ProxyConfiguration::protocolFromString(http_env_proxy_server.getScheme()));
|
||||
|
||||
ASSERT_EQ(https_configuration.host, https_env_proxy_server.getHost());
|
||||
ASSERT_EQ(https_configuration.port, https_env_proxy_server.getPort());
|
||||
ASSERT_EQ(https_configuration.protocol, DB::ProxyConfiguration::protocolFromString(https_env_proxy_server.getScheme()));
|
||||
}
|
||||
|
||||
TEST_F(ProxyConfigurationResolverProviderTests, ListHTTPOnly)
|
||||
{
|
||||
ConfigurationPtr config = Poco::AutoPtr(new Poco::Util::MapConfiguration());
|
||||
|
||||
config->setString("proxy", "");
|
||||
config->setString("proxy.http", "");
|
||||
config->setString("proxy.http.uri", http_list_proxy_server.toString());
|
||||
context->setConfig(config);
|
||||
|
||||
auto http_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP)->resolve();
|
||||
|
||||
ASSERT_EQ(http_proxy_configuration.host, http_list_proxy_server.getHost());
|
||||
ASSERT_EQ(http_proxy_configuration.port, http_list_proxy_server.getPort());
|
||||
ASSERT_EQ(http_proxy_configuration.protocol, DB::ProxyConfiguration::protocolFromString(http_list_proxy_server.getScheme()));
|
||||
|
||||
auto https_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS)->resolve();
|
||||
|
||||
// No https configuration since it's not set
|
||||
ASSERT_EQ(https_proxy_configuration.host, "");
|
||||
ASSERT_EQ(https_proxy_configuration.port, 0);
|
||||
}
|
||||
|
||||
TEST_F(ProxyConfigurationResolverProviderTests, ListHTTPSOnly)
|
||||
{
|
||||
ConfigurationPtr config = Poco::AutoPtr(new Poco::Util::MapConfiguration());
|
||||
|
||||
config->setString("proxy", "");
|
||||
config->setString("proxy.https", "");
|
||||
config->setString("proxy.https.uri", https_list_proxy_server.toString());
|
||||
context->setConfig(config);
|
||||
|
||||
auto http_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP)->resolve();
|
||||
|
||||
ASSERT_EQ(http_proxy_configuration.host, "");
|
||||
ASSERT_EQ(http_proxy_configuration.port, 0);
|
||||
|
||||
auto https_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS)->resolve();
|
||||
|
||||
ASSERT_EQ(https_proxy_configuration.host, https_list_proxy_server.getHost());
|
||||
|
||||
// still HTTP because the proxy host is not HTTPS
|
||||
ASSERT_EQ(https_proxy_configuration.protocol, DB::ProxyConfiguration::protocolFromString(https_list_proxy_server.getScheme()));
|
||||
ASSERT_EQ(https_proxy_configuration.port, https_list_proxy_server.getPort());
|
||||
}
|
||||
|
||||
TEST_F(ProxyConfigurationResolverProviderTests, ListBoth)
|
||||
{
|
||||
ConfigurationPtr config = Poco::AutoPtr(new Poco::Util::MapConfiguration());
|
||||
|
||||
config->setString("proxy", "");
|
||||
config->setString("proxy.http", "");
|
||||
config->setString("proxy.http.uri", http_list_proxy_server.toString());
|
||||
|
||||
config->setString("proxy", "");
|
||||
config->setString("proxy.https", "");
|
||||
config->setString("proxy.https.uri", https_list_proxy_server.toString());
|
||||
|
||||
context->setConfig(config);
|
||||
|
||||
auto http_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP)->resolve();
|
||||
|
||||
ASSERT_EQ(http_proxy_configuration.host, http_list_proxy_server.getHost());
|
||||
ASSERT_EQ(http_proxy_configuration.protocol, DB::ProxyConfiguration::protocolFromString(http_list_proxy_server.getScheme()));
|
||||
ASSERT_EQ(http_proxy_configuration.port, http_list_proxy_server.getPort());
|
||||
|
||||
auto https_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS)->resolve();
|
||||
|
||||
ASSERT_EQ(https_proxy_configuration.host, https_list_proxy_server.getHost());
|
||||
|
||||
// still HTTP because the proxy host is not HTTPS
|
||||
ASSERT_EQ(https_proxy_configuration.protocol, DB::ProxyConfiguration::protocolFromString(https_list_proxy_server.getScheme()));
|
||||
ASSERT_EQ(https_proxy_configuration.port, https_list_proxy_server.getPort());
|
||||
}
|
||||
|
||||
// remote resolver is tricky to be tested in unit tests
|
96
src/Common/tests/gtest_proxy_environment_configuration.cpp
Normal file
96
src/Common/tests/gtest_proxy_environment_configuration.cpp
Normal file
@ -0,0 +1,96 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <Common/EnvironmentProxyConfigurationResolver.h>
|
||||
#include <Common/tests/gtest_helper_functions.h>
|
||||
#include <Poco/URI.h>
|
||||
|
||||
namespace
|
||||
{
|
||||
auto http_proxy_server = Poco::URI("http://proxy_server:3128");
|
||||
auto https_proxy_server = Poco::URI("https://proxy_server:3128");
|
||||
}
|
||||
|
||||
TEST(EnvironmentProxyConfigurationResolver, TestHTTP)
|
||||
{
|
||||
EnvironmentProxySetter setter(http_proxy_server, {});
|
||||
|
||||
DB::EnvironmentProxyConfigurationResolver resolver(DB::ProxyConfiguration::Protocol::HTTP);
|
||||
|
||||
auto configuration = resolver.resolve();
|
||||
|
||||
ASSERT_EQ(configuration.host, http_proxy_server.getHost());
|
||||
ASSERT_EQ(configuration.port, http_proxy_server.getPort());
|
||||
ASSERT_EQ(configuration.protocol, DB::ProxyConfiguration::protocolFromString(http_proxy_server.getScheme()));
|
||||
}
|
||||
|
||||
TEST(EnvironmentProxyConfigurationResolver, TestHTTPNoEnv)
|
||||
{
|
||||
DB::EnvironmentProxyConfigurationResolver resolver(DB::ProxyConfiguration::Protocol::HTTP);
|
||||
|
||||
auto configuration = resolver.resolve();
|
||||
|
||||
ASSERT_EQ(configuration.host, "");
|
||||
ASSERT_EQ(configuration.protocol, DB::ProxyConfiguration::Protocol::HTTP);
|
||||
ASSERT_EQ(configuration.port, 0u);
|
||||
}
|
||||
|
||||
TEST(EnvironmentProxyConfigurationResolver, TestHTTPs)
|
||||
{
|
||||
EnvironmentProxySetter setter({}, https_proxy_server);
|
||||
|
||||
DB::EnvironmentProxyConfigurationResolver resolver(DB::ProxyConfiguration::Protocol::HTTPS);
|
||||
|
||||
auto configuration = resolver.resolve();
|
||||
|
||||
ASSERT_EQ(configuration.host, https_proxy_server.getHost());
|
||||
ASSERT_EQ(configuration.port, https_proxy_server.getPort());
|
||||
ASSERT_EQ(configuration.protocol, DB::ProxyConfiguration::protocolFromString(https_proxy_server.getScheme()));
|
||||
}
|
||||
|
||||
TEST(EnvironmentProxyConfigurationResolver, TestHTTPsNoEnv)
|
||||
{
|
||||
DB::EnvironmentProxyConfigurationResolver resolver(DB::ProxyConfiguration::Protocol::HTTPS);
|
||||
|
||||
auto configuration = resolver.resolve();
|
||||
|
||||
ASSERT_EQ(configuration.host, "");
|
||||
ASSERT_EQ(configuration.protocol, DB::ProxyConfiguration::Protocol::HTTP);
|
||||
ASSERT_EQ(configuration.port, 0u);
|
||||
}
|
||||
|
||||
TEST(EnvironmentProxyConfigurationResolver, TestANYHTTP)
|
||||
{
|
||||
EnvironmentProxySetter setter(http_proxy_server, {});
|
||||
|
||||
DB::EnvironmentProxyConfigurationResolver resolver(DB::ProxyConfiguration::Protocol::ANY);
|
||||
|
||||
auto configuration = resolver.resolve();
|
||||
|
||||
ASSERT_EQ(configuration.host, http_proxy_server.getHost());
|
||||
ASSERT_EQ(configuration.port, http_proxy_server.getPort());
|
||||
ASSERT_EQ(configuration.protocol, DB::ProxyConfiguration::protocolFromString(http_proxy_server.getScheme()));
|
||||
}
|
||||
|
||||
TEST(EnvironmentProxyConfigurationResolver, TestANYHTTPS)
|
||||
{
|
||||
EnvironmentProxySetter setter({}, https_proxy_server);
|
||||
|
||||
DB::EnvironmentProxyConfigurationResolver resolver(DB::ProxyConfiguration::Protocol::ANY);
|
||||
|
||||
auto configuration = resolver.resolve();
|
||||
|
||||
ASSERT_EQ(configuration.host, https_proxy_server.getHost());
|
||||
ASSERT_EQ(configuration.port, https_proxy_server.getPort());
|
||||
ASSERT_EQ(configuration.protocol, DB::ProxyConfiguration::protocolFromString(https_proxy_server.getScheme()));
|
||||
}
|
||||
|
||||
TEST(EnvironmentProxyConfigurationResolver, TestANYNoEnv)
|
||||
{
|
||||
DB::EnvironmentProxyConfigurationResolver resolver(DB::ProxyConfiguration::Protocol::ANY);
|
||||
|
||||
auto configuration = resolver.resolve();
|
||||
|
||||
ASSERT_EQ(configuration.host, "");
|
||||
ASSERT_EQ(configuration.protocol, DB::ProxyConfiguration::Protocol::HTTP);
|
||||
ASSERT_EQ(configuration.port, 0u);
|
||||
}
|
26
src/Common/tests/gtest_proxy_list_configuration_resolver.cpp
Normal file
26
src/Common/tests/gtest_proxy_list_configuration_resolver.cpp
Normal file
@ -0,0 +1,26 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <Common/ProxyListConfigurationResolver.h>
|
||||
#include <Poco/URI.h>
|
||||
|
||||
namespace
|
||||
{
|
||||
auto proxy_server1 = Poco::URI("http://proxy_server1:3128");
|
||||
auto proxy_server2 = Poco::URI("http://proxy_server2:3128");
|
||||
}
|
||||
|
||||
TEST(ProxyListConfigurationResolver, SimpleTest)
|
||||
{
|
||||
DB::ProxyListConfigurationResolver resolver({proxy_server1, proxy_server2});
|
||||
|
||||
auto configuration1 = resolver.resolve();
|
||||
auto configuration2 = resolver.resolve();
|
||||
|
||||
ASSERT_EQ(configuration1.host, proxy_server1.getHost());
|
||||
ASSERT_EQ(configuration1.port, proxy_server1.getPort());
|
||||
ASSERT_EQ(configuration1.protocol, DB::ProxyConfiguration::protocolFromString(proxy_server1.getScheme()));
|
||||
|
||||
ASSERT_EQ(configuration2.host, proxy_server2.getHost());
|
||||
ASSERT_EQ(configuration2.port, proxy_server2.getPort());
|
||||
ASSERT_EQ(configuration2.protocol, DB::ProxyConfiguration::protocolFromString(proxy_server2.getScheme()));
|
||||
}
|
@ -9,7 +9,7 @@ TEST(SLRUCache, set)
|
||||
slru_cache.set(1, std::make_shared<int>(2));
|
||||
slru_cache.set(2, std::make_shared<int>(3));
|
||||
|
||||
auto w = slru_cache.weight();
|
||||
auto w = slru_cache.sizeInBytes();
|
||||
auto n = slru_cache.count();
|
||||
ASSERT_EQ(w, 2);
|
||||
ASSERT_EQ(n, 2);
|
||||
@ -125,7 +125,7 @@ TEST(SLRUCache, evictOnElements)
|
||||
auto n = slru_cache.count();
|
||||
ASSERT_EQ(n, 1);
|
||||
|
||||
auto w = slru_cache.weight();
|
||||
auto w = slru_cache.sizeInBytes();
|
||||
ASSERT_EQ(w, 3);
|
||||
|
||||
auto value = slru_cache.get(1);
|
||||
@ -148,7 +148,7 @@ TEST(SLRUCache, evictOnWeight)
|
||||
auto n = slru_cache.count();
|
||||
ASSERT_EQ(n, 2);
|
||||
|
||||
auto w = slru_cache.weight();
|
||||
auto w = slru_cache.sizeInBytes();
|
||||
ASSERT_EQ(w, 9);
|
||||
|
||||
auto value = slru_cache.get(1);
|
||||
|
@ -23,7 +23,7 @@ int main(int argc, char ** argv)
|
||||
|
||||
try
|
||||
{
|
||||
UncompressedCache cache(1024);
|
||||
UncompressedCache cache("SLRU", 1024, 0.5);
|
||||
std::string path = argv[1];
|
||||
|
||||
std::cerr << std::fixed << std::setprecision(3);
|
||||
|
@ -149,6 +149,9 @@ void KeeperConfigurationAndSettings::dump(WriteBufferFromOwnString & buf) const
|
||||
write_bool(coordination_settings->compress_snapshots_with_zstd_format);
|
||||
writeText("configuration_change_tries_count=", buf);
|
||||
write_int(coordination_settings->configuration_change_tries_count);
|
||||
|
||||
writeText("raft_limits_reconnect_limit=", buf);
|
||||
write_int(static_cast<uint64_t>(coordination_settings->raft_limits_reconnect_limit));
|
||||
}
|
||||
|
||||
KeeperConfigurationAndSettingsPtr
|
||||
|
@ -48,7 +48,8 @@ struct Settings;
|
||||
M(UInt64, configuration_change_tries_count, 20, "How many times we will try to apply configuration change (add/remove server) to the cluster", 0) \
|
||||
M(UInt64, max_log_file_size, 50 * 1024 * 1024, "Max size of the Raft log file. If possible, each created log file will preallocate this amount of bytes on disk. Set to 0 to disable the limit", 0) \
|
||||
M(UInt64, log_file_overallocate_size, 50 * 1024 * 1024, "If max_log_file_size is not set to 0, this value will be added to it for preallocating bytes on disk. If a log record is larger than this value, it could lead to uncaught out-of-space issues so a larger value is preferred", 0) \
|
||||
M(UInt64, min_request_size_for_cache, 50 * 1024, "Minimal size of the request to cache the deserialization result. Caching can have negative effect on latency for smaller requests, set to 0 to disable", 0)
|
||||
M(UInt64, min_request_size_for_cache, 50 * 1024, "Minimal size of the request to cache the deserialization result. Caching can have negative effect on latency for smaller requests, set to 0 to disable", 0) \
|
||||
M(UInt64, raft_limits_reconnect_limit, 50, "If connection to a peer is silent longer than this limit * (multiplied by heartbeat interval), we re-establish the connection.", 0)
|
||||
|
||||
DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)
|
||||
|
||||
|
@ -480,6 +480,7 @@ void KeeperDispatcher::shutdown()
|
||||
.session_id = session,
|
||||
.time = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(),
|
||||
.request = std::move(request),
|
||||
.digest = std::nullopt
|
||||
};
|
||||
|
||||
close_requests.push_back(std::move(request_info));
|
||||
@ -576,6 +577,7 @@ void KeeperDispatcher::sessionCleanerTask()
|
||||
.session_id = dead_session,
|
||||
.time = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(),
|
||||
.request = std::move(request),
|
||||
.digest = std::nullopt
|
||||
};
|
||||
if (!requests_queue->push(std::move(request_info)))
|
||||
LOG_INFO(log, "Cannot push close request to queue while cleaning outdated sessions");
|
||||
|
@ -372,6 +372,10 @@ void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & co
|
||||
|
||||
state_manager->getLogStore()->setRaftServer(raft_instance);
|
||||
|
||||
nuraft::raft_server::limits raft_limits;
|
||||
raft_limits.reconnect_limit_ = getValueOrMaxInt32AndLogWarning(coordination_settings->raft_limits_reconnect_limit, "raft_limits_reconnect_limit", log);
|
||||
raft_instance->set_raft_limits(raft_limits);
|
||||
|
||||
raft_instance->start_server(init_options.skip_initial_election_timeout_);
|
||||
|
||||
nuraft::ptr<nuraft::raft_server> casted_raft_server = raft_instance;
|
||||
|
@ -92,7 +92,8 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo
|
||||
auth_settings.region,
|
||||
RemoteHostFilter(), s3_max_redirects,
|
||||
enable_s3_requests_logging,
|
||||
/* for_disk_s3 = */ false, /* get_request_throttler = */ {}, /* put_request_throttler = */ {});
|
||||
/* for_disk_s3 = */ false, /* get_request_throttler = */ {}, /* put_request_throttler = */ {},
|
||||
new_uri.uri.getScheme());
|
||||
|
||||
client_configuration.endpointOverride = new_uri.endpoint;
|
||||
|
||||
|
@ -2127,7 +2127,7 @@ void KeeperStorage::preprocessRequest(
|
||||
}
|
||||
|
||||
std::vector<Delta> new_deltas;
|
||||
TransactionInfo transaction{.zxid = new_last_zxid};
|
||||
TransactionInfo transaction{.zxid = new_last_zxid, .nodes_digest = {}};
|
||||
uint64_t new_digest = getNodesDigest(false).value;
|
||||
SCOPE_EXIT({
|
||||
if (keeper_context->digestEnabled())
|
||||
|
@ -66,12 +66,18 @@
|
||||
#define DBMS_HIERARCHICAL_DICTIONARY_MAX_DEPTH 1000
|
||||
|
||||
/// Default maximum (total and entry) sizes and policies of various caches
|
||||
static constexpr auto DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE = 0_MiB;
|
||||
static constexpr auto DEFAULT_UNCOMPRESSED_CACHE_POLICY = "SLRU";
|
||||
static constexpr auto DEFAULT_MARK_CACHE_MAX_SIZE = 5368_MiB;
|
||||
static constexpr auto DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE = 0_MiB;
|
||||
static constexpr auto DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO = 0.5l;
|
||||
static constexpr auto DEFAULT_MARK_CACHE_POLICY = "SLRU";
|
||||
static constexpr auto DEFAULT_MARK_CACHE_MAX_SIZE = 5368_MiB;
|
||||
static constexpr auto DEFAULT_MARK_CACHE_SIZE_RATIO = 0.5l;
|
||||
static constexpr auto DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY = "SLRU";
|
||||
static constexpr auto DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE = 0_MiB;
|
||||
static constexpr auto DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO = 0.5l;
|
||||
static constexpr auto DEFAULT_INDEX_MARK_CACHE_POLICY = "SLRU";
|
||||
static constexpr auto DEFAULT_INDEX_MARK_CACHE_MAX_SIZE = 0_MiB;
|
||||
static constexpr auto DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO = 0.5l;
|
||||
static constexpr auto DEFAULT_MMAP_CACHE_MAX_SIZE = 1_KiB; /// chosen by rolling dice
|
||||
static constexpr auto DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE = 128_MiB;
|
||||
static constexpr auto DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES = 10'000;
|
||||
|
@ -60,10 +60,16 @@ namespace DB
|
||||
M(Double, cache_size_to_ram_max_ratio, 0.5, "Set cache size ro RAM max ratio. Allows to lower cache size on low-memory systems.", 0) \
|
||||
M(String, uncompressed_cache_policy, DEFAULT_UNCOMPRESSED_CACHE_POLICY, "Uncompressed cache policy name.", 0) \
|
||||
M(UInt64, uncompressed_cache_size, DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks. Zero means disabled.", 0) \
|
||||
M(UInt64, mark_cache_size, DEFAULT_MARK_CACHE_MAX_SIZE, "Size of cache for marks (index of MergeTree family of tables).", 0) \
|
||||
M(Double, uncompressed_cache_size_ratio, DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the uncompressed cache relative to the cache's total size.", 0) \
|
||||
M(String, mark_cache_policy, DEFAULT_MARK_CACHE_POLICY, "Mark cache policy name.", 0) \
|
||||
M(UInt64, mark_cache_size, DEFAULT_MARK_CACHE_MAX_SIZE, "Size of cache for marks (index of MergeTree family of tables).", 0) \
|
||||
M(Double, mark_cache_size_ratio, DEFAULT_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the mark cache relative to the cache's total size.", 0) \
|
||||
M(String, index_uncompressed_cache_policy, DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY, "Index uncompressed cache policy name.", 0) \
|
||||
M(UInt64, index_uncompressed_cache_size, DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks of MergeTree indices. Zero means disabled.", 0) \
|
||||
M(Double, index_uncompressed_cache_size_ratio, DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the index uncompressed cache relative to the cache's total size.", 0) \
|
||||
M(String, index_mark_cache_policy, DEFAULT_INDEX_MARK_CACHE_POLICY, "Index mark cache policy name.", 0) \
|
||||
M(UInt64, index_mark_cache_size, DEFAULT_INDEX_MARK_CACHE_MAX_SIZE, "Size of cache for index marks. Zero means disabled.", 0) \
|
||||
M(Double, index_mark_cache_size_ratio, DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the index mark cache relative to the cache's total size.", 0) \
|
||||
M(UInt64, mmap_cache_size, DEFAULT_MMAP_CACHE_MAX_SIZE, "A cache for mmapped files.", 0) \
|
||||
\
|
||||
M(Bool, disable_internal_dns_cache, false, "Disable internal DNS caching at all.", 0) \
|
||||
|
@ -535,6 +535,8 @@ class IColumn;
|
||||
M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \
|
||||
M(Bool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \
|
||||
M(Bool, optimize_trivial_count_query, true, "Process trivial 'SELECT count() FROM table' query from metadata.", 0) \
|
||||
M(Bool, optimize_count_from_files, true, "Optimize counting rows from files in supported input formats", 0) \
|
||||
M(Bool, use_cache_for_count_from_files, true, "Use cache to count the number of rows in files", 0) \
|
||||
M(Bool, optimize_respect_aliases, true, "If it is set to true, it will respect aliases in WHERE/GROUP BY/ORDER BY, that will help with partition pruning/secondary indexes/optimize_aggregation_in_order/optimize_read_in_order/optimize_trivial_count", 0) \
|
||||
M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \
|
||||
M(Bool, enable_lightweight_delete, true, "Enable lightweight DELETE mutations for mergetree tables.", 0) ALIAS(allow_experimental_lightweight_delete) \
|
||||
|
@ -263,7 +263,7 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const
|
||||
std::vector<UInt8> DatabaseReplicated::tryGetAreReplicasActive(const ClusterPtr & cluster_) const
|
||||
{
|
||||
Strings paths;
|
||||
const auto & addresses_with_failover = cluster->getShardsAddresses();
|
||||
const auto & addresses_with_failover = cluster_->getShardsAddresses();
|
||||
const auto & shards_info = cluster_->getShardsInfo();
|
||||
for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index)
|
||||
{
|
||||
|
@ -35,6 +35,7 @@ namespace ErrorCodes
|
||||
extern const int UNKNOWN_TABLE;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int CANNOT_GET_CREATE_TABLE_QUERY;
|
||||
}
|
||||
|
||||
DatabaseMaterializedPostgreSQL::DatabaseMaterializedPostgreSQL(
|
||||
@ -221,10 +222,25 @@ ASTPtr DatabaseMaterializedPostgreSQL::getCreateTableQueryImpl(const String & ta
|
||||
|
||||
std::lock_guard lock(handler_mutex);
|
||||
|
||||
/// FIXME TSA
|
||||
auto storage = std::make_shared<StorageMaterializedPostgreSQL>(StorageID(TSA_SUPPRESS_WARNING_FOR_READ(database_name), table_name), getContext(), remote_database_name, table_name);
|
||||
auto ast_storage = replication_handler->getCreateNestedTableQuery(storage.get(), table_name);
|
||||
assert_cast<ASTCreateQuery *>(ast_storage.get())->uuid = UUIDHelpers::generateV4();
|
||||
ASTPtr ast_storage;
|
||||
try
|
||||
{
|
||||
auto storage = std::make_shared<StorageMaterializedPostgreSQL>(StorageID(TSA_SUPPRESS_WARNING_FOR_READ(database_name), table_name), getContext(), remote_database_name, table_name);
|
||||
ast_storage = replication_handler->getCreateNestedTableQuery(storage.get(), table_name);
|
||||
assert_cast<ASTCreateQuery *>(ast_storage.get())->uuid = UUIDHelpers::generateV4();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
if (throw_on_error)
|
||||
{
|
||||
throw Exception(ErrorCodes::CANNOT_GET_CREATE_TABLE_QUERY,
|
||||
"Received error while fetching table structure for table {} from PostgreSQL: {}",
|
||||
backQuote(table_name), getCurrentExceptionMessage(true));
|
||||
}
|
||||
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
|
||||
return ast_storage;
|
||||
}
|
||||
|
||||
|
@ -228,7 +228,7 @@ void RegExpTreeDictionary::initRegexNodes(Block & block)
|
||||
else
|
||||
{
|
||||
Field field = parseStringToField(value, attr.type);
|
||||
node->attributes[name_] = RegexTreeNode::AttributeValue{.field = std::move(field), .original_value = value};
|
||||
node->attributes[name_] = RegexTreeNode::AttributeValue{.field = std::move(field), .pieces = {}, .original_value = value};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -114,6 +114,26 @@ void AsynchronousBoundedReadBuffer::setReadUntilPosition(size_t position)
|
||||
{
|
||||
if (!read_until_position || position != *read_until_position)
|
||||
{
|
||||
if (position < file_offset_of_buffer_end)
|
||||
{
|
||||
/// file has been read beyond new read until position already
|
||||
if (working_buffer.size() >= file_offset_of_buffer_end - position)
|
||||
{
|
||||
/// new read until position is inside working buffer
|
||||
file_offset_of_buffer_end = position;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// new read until position is before working buffer begin
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Attempt to set read until position before already read data ({} > {}, info: {})",
|
||||
position,
|
||||
getPosition(),
|
||||
impl->getInfoForLog());
|
||||
}
|
||||
}
|
||||
|
||||
read_until_position = position;
|
||||
|
||||
/// We must wait on future and reset the prefetch here, because otherwise there might be
|
||||
@ -248,7 +268,6 @@ off_t AsynchronousBoundedReadBuffer::seek(off_t offset, int whence)
|
||||
{
|
||||
/// Position is still inside the buffer.
|
||||
/// Probably it is at the end of the buffer - then we will load data on the following 'next' call.
|
||||
|
||||
pos = working_buffer.end() - file_offset_of_buffer_end + new_pos;
|
||||
assert(pos >= working_buffer.begin());
|
||||
assert(pos <= working_buffer.end());
|
||||
|
@ -46,6 +46,8 @@ public:
|
||||
|
||||
void setReadUntilEnd() override { return setReadUntilPosition(getFileSize()); }
|
||||
|
||||
size_t getFileOffsetOfBufferEnd() const override { return file_offset_of_buffer_end; }
|
||||
|
||||
off_t getPosition() override { return file_offset_of_buffer_end - available() + bytes_to_ignore; }
|
||||
|
||||
private:
|
||||
|
@ -147,11 +147,19 @@ void CachedOnDiskReadBufferFromFile::initialize(size_t offset, size_t size)
|
||||
}
|
||||
|
||||
CachedOnDiskReadBufferFromFile::ImplementationBufferPtr
|
||||
CachedOnDiskReadBufferFromFile::getCacheReadBuffer(const FileSegment & file_segment) const
|
||||
CachedOnDiskReadBufferFromFile::getCacheReadBuffer(const FileSegment & file_segment)
|
||||
{
|
||||
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::CachedReadBufferCreateBufferMicroseconds);
|
||||
|
||||
auto path = file_segment.getPathInLocalCache();
|
||||
if (cache_file_reader)
|
||||
{
|
||||
chassert(cache_file_reader->getFileName() == path);
|
||||
if (cache_file_reader->getFileName() == path)
|
||||
return cache_file_reader;
|
||||
|
||||
cache_file_reader.reset();
|
||||
}
|
||||
|
||||
ReadSettings local_read_settings{settings};
|
||||
/// Do not allow to use asynchronous version of LocalFSReadMethod.
|
||||
@ -160,12 +168,12 @@ CachedOnDiskReadBufferFromFile::getCacheReadBuffer(const FileSegment & file_segm
|
||||
if (use_external_buffer)
|
||||
local_read_settings.local_fs_buffer_size = 0;
|
||||
|
||||
auto buf = createReadBufferFromFileBase(path, local_read_settings, std::nullopt, std::nullopt, file_segment.getFlagsForLocalRead());
|
||||
cache_file_reader = createReadBufferFromFileBase(path, local_read_settings, std::nullopt, std::nullopt, file_segment.getFlagsForLocalRead());
|
||||
|
||||
if (getFileSizeFromReadBuffer(*buf) == 0)
|
||||
if (getFileSizeFromReadBuffer(*cache_file_reader) == 0)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read from an empty cache file: {}", path);
|
||||
|
||||
return buf;
|
||||
return cache_file_reader;
|
||||
}
|
||||
|
||||
CachedOnDiskReadBufferFromFile::ImplementationBufferPtr
|
||||
@ -207,7 +215,7 @@ CachedOnDiskReadBufferFromFile::getRemoteReadBuffer(FileSegment & file_segment,
|
||||
}
|
||||
else
|
||||
{
|
||||
chassert(remote_fs_segment_reader->getFileOffsetOfBufferEnd() == file_segment.getCurrentWriteOffset(false));
|
||||
chassert(remote_fs_segment_reader->getFileOffsetOfBufferEnd() == file_segment.getCurrentWriteOffset());
|
||||
}
|
||||
|
||||
return remote_fs_segment_reader;
|
||||
@ -240,12 +248,12 @@ bool CachedOnDiskReadBufferFromFile::canStartFromCache(size_t current_offset, co
|
||||
/// segment{k} state: DOWNLOADING
|
||||
/// cache: [______|___________
|
||||
/// ^
|
||||
/// first_non_downloaded_offset (in progress)
|
||||
/// current_write_offset (in progress)
|
||||
/// requested_range: [__________]
|
||||
/// ^
|
||||
/// current_offset
|
||||
size_t first_non_downloaded_offset = file_segment.getFirstNonDownloadedOffset(true);
|
||||
return first_non_downloaded_offset > current_offset;
|
||||
size_t current_write_offset = file_segment.getCurrentWriteOffset();
|
||||
return current_write_offset > current_offset;
|
||||
}
|
||||
|
||||
CachedOnDiskReadBufferFromFile::ImplementationBufferPtr
|
||||
@ -285,7 +293,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegment & file_s
|
||||
/// segment{k} state: DOWNLOADING
|
||||
/// cache: [______|___________
|
||||
/// ^
|
||||
/// first_non_downloaded_offset (in progress)
|
||||
/// current_write_offset (in progress)
|
||||
/// requested_range: [__________]
|
||||
/// ^
|
||||
/// file_offset_of_buffer_end
|
||||
@ -310,7 +318,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegment & file_s
|
||||
/// segment{k} state: PARTIALLY_DOWNLOADED
|
||||
/// cache: [______|___________
|
||||
/// ^
|
||||
/// first_non_downloaded_offset (in progress)
|
||||
/// current_write_offset (in progress)
|
||||
/// requested_range: [__________]
|
||||
/// ^
|
||||
/// file_offset_of_buffer_end
|
||||
@ -327,7 +335,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegment & file_s
|
||||
/// segment{k}
|
||||
/// cache: [______|___________
|
||||
/// ^
|
||||
/// first_non_downloaded_offset
|
||||
/// current_write_offset
|
||||
/// requested_range: [__________]
|
||||
/// ^
|
||||
/// file_offset_of_buffer_end
|
||||
@ -337,7 +345,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegment & file_s
|
||||
return getCacheReadBuffer(file_segment);
|
||||
}
|
||||
|
||||
auto current_write_offset = file_segment.getCurrentWriteOffset(false);
|
||||
auto current_write_offset = file_segment.getCurrentWriteOffset();
|
||||
if (current_write_offset < file_offset_of_buffer_end)
|
||||
{
|
||||
/// segment{1}
|
||||
@ -459,7 +467,7 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegment & file_segme
|
||||
|
||||
if (bytes_to_predownload)
|
||||
{
|
||||
const size_t current_write_offset = file_segment.getCurrentWriteOffset(false);
|
||||
const size_t current_write_offset = file_segment.getCurrentWriteOffset();
|
||||
read_buffer_for_file_segment->seek(current_write_offset, SEEK_SET);
|
||||
}
|
||||
else
|
||||
@ -469,7 +477,7 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegment & file_segme
|
||||
chassert(read_buffer_for_file_segment->getFileOffsetOfBufferEnd() == file_offset_of_buffer_end);
|
||||
}
|
||||
|
||||
const auto current_write_offset = file_segment.getCurrentWriteOffset(false);
|
||||
const auto current_write_offset = file_segment.getCurrentWriteOffset();
|
||||
if (current_write_offset != static_cast<size_t>(read_buffer_for_file_segment->getPosition()))
|
||||
{
|
||||
throw Exception(
|
||||
@ -501,6 +509,7 @@ bool CachedOnDiskReadBufferFromFile::completeFileSegmentAndGetNext()
|
||||
appendFilesystemCacheLog(*current_file_segment, read_type);
|
||||
|
||||
chassert(file_offset_of_buffer_end > completed_range.right);
|
||||
cache_file_reader.reset();
|
||||
|
||||
file_segments->popFront();
|
||||
if (file_segments->empty())
|
||||
@ -547,9 +556,9 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegment & file_segment)
|
||||
/// download from offset a'' < a', but return buffer from offset a'.
|
||||
LOG_TEST(log, "Bytes to predownload: {}, caller_id: {}", bytes_to_predownload, FileSegment::getCallerId());
|
||||
|
||||
/// chassert(implementation_buffer->getFileOffsetOfBufferEnd() == file_segment.getCurrentWriteOffset(false));
|
||||
chassert(static_cast<size_t>(implementation_buffer->getPosition()) == file_segment.getCurrentWriteOffset(false));
|
||||
size_t current_offset = file_segment.getCurrentWriteOffset(false);
|
||||
/// chassert(implementation_buffer->getFileOffsetOfBufferEnd() == file_segment.getCurrentWriteOffset());
|
||||
size_t current_offset = file_segment.getCurrentWriteOffset();
|
||||
chassert(static_cast<size_t>(implementation_buffer->getPosition()) == current_offset);
|
||||
const auto & current_range = file_segment.range();
|
||||
|
||||
while (true)
|
||||
@ -575,7 +584,7 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegment & file_segment)
|
||||
"current download offset: {}, expected: {}, eof: {}",
|
||||
bytes_to_predownload,
|
||||
current_range.toString(),
|
||||
file_segment.getCurrentWriteOffset(false),
|
||||
file_segment.getCurrentWriteOffset(),
|
||||
file_offset_of_buffer_end,
|
||||
implementation_buffer->eof());
|
||||
|
||||
@ -585,7 +594,7 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegment & file_segment)
|
||||
{
|
||||
nextimpl_working_buffer_offset = implementation_buffer->offset();
|
||||
|
||||
auto current_write_offset = file_segment.getCurrentWriteOffset(false);
|
||||
auto current_write_offset = file_segment.getCurrentWriteOffset();
|
||||
if (current_write_offset != static_cast<size_t>(implementation_buffer->getPosition())
|
||||
|| current_write_offset != file_offset_of_buffer_end)
|
||||
{
|
||||
@ -614,7 +623,7 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegment & file_segment)
|
||||
{
|
||||
LOG_TEST(log, "Left to predownload: {}, buffer size: {}", bytes_to_predownload, current_impl_buffer_size);
|
||||
|
||||
chassert(file_segment.getCurrentWriteOffset(false) == static_cast<size_t>(implementation_buffer->getPosition()));
|
||||
chassert(file_segment.getCurrentWriteOffset() == static_cast<size_t>(implementation_buffer->getPosition()));
|
||||
|
||||
continue_predownload = writeCache(implementation_buffer->buffer().begin(), current_predownload_size, current_offset, file_segment);
|
||||
if (continue_predownload)
|
||||
@ -695,38 +704,19 @@ bool CachedOnDiskReadBufferFromFile::updateImplementationBufferIfNeeded()
|
||||
{
|
||||
/// If current read_type is ReadType::CACHED and file segment is not DOWNLOADED,
|
||||
/// it means the following case, e.g. we started from CacheReadBuffer and continue with RemoteFSReadBuffer.
|
||||
/// segment{k}
|
||||
/// cache: [______|___________
|
||||
/// segment{k}
|
||||
/// cache: [______|___________]
|
||||
/// ^
|
||||
/// current_write_offset
|
||||
/// requested_range: [__________]
|
||||
/// requested_range: [__________
|
||||
/// ^
|
||||
/// file_offset_of_buffer_end
|
||||
|
||||
auto current_write_offset = file_segment.getCurrentWriteOffset(true);
|
||||
bool cached_part_is_finished = current_write_offset == file_offset_of_buffer_end;
|
||||
|
||||
LOG_TEST(log, "Current write offset: {}, file offset of buffer end: {}", current_write_offset, file_offset_of_buffer_end);
|
||||
|
||||
if (cached_part_is_finished)
|
||||
if (file_offset_of_buffer_end >= file_segment.getCurrentWriteOffset())
|
||||
{
|
||||
/// TODO: makes sense to reuse local file reader if we return here with CACHED read type again?
|
||||
implementation_buffer = getImplementationBuffer(file_segment);
|
||||
|
||||
return true;
|
||||
}
|
||||
else if (current_write_offset < file_offset_of_buffer_end)
|
||||
{
|
||||
const auto path = file_segment.getPathInLocalCache();
|
||||
size_t file_size = 0;
|
||||
if (fs::exists(path))
|
||||
file_size = fs::file_size(path);
|
||||
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Invariant failed. Expected {} >= {} (size on fs: {}, {})",
|
||||
current_write_offset, file_offset_of_buffer_end, file_size, getInfoForLog());
|
||||
}
|
||||
}
|
||||
else if (read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE)
|
||||
{
|
||||
@ -950,23 +940,6 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromCacheBytes, size);
|
||||
ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromCacheMicroseconds, elapsed);
|
||||
|
||||
if (result)
|
||||
{
|
||||
const size_t new_file_offset = file_offset_of_buffer_end + size;
|
||||
const size_t file_segment_write_offset = file_segment.getCurrentWriteOffset(true);
|
||||
if (new_file_offset > file_segment.range().right + 1 || new_file_offset > file_segment_write_offset)
|
||||
{
|
||||
auto file_segment_path = file_segment.getPathInLocalCache();
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR, "Read unexpected size. "
|
||||
"File size: {}, file segment path: {}, impl size: {}, impl path: {}"
|
||||
"file segment info: {}",
|
||||
fs::file_size(file_segment_path), file_segment_path,
|
||||
implementation_buffer->getFileSize(), implementation_buffer->getFileName(),
|
||||
file_segment.getInfoForLog());
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -985,15 +958,15 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
|
||||
bool success = file_segment.reserve(size);
|
||||
if (success)
|
||||
{
|
||||
chassert(file_segment.getCurrentWriteOffset(false) == static_cast<size_t>(implementation_buffer->getPosition()));
|
||||
chassert(file_segment.getCurrentWriteOffset() == static_cast<size_t>(implementation_buffer->getPosition()));
|
||||
|
||||
success = writeCache(implementation_buffer->position(), size, file_offset_of_buffer_end, file_segment);
|
||||
if (success)
|
||||
{
|
||||
chassert(file_segment.getCurrentWriteOffset(false) <= file_segment.range().right + 1);
|
||||
chassert(file_segment.getCurrentWriteOffset() <= file_segment.range().right + 1);
|
||||
chassert(
|
||||
/* last_file_segment */file_segments->size() == 1
|
||||
|| file_segment.getCurrentWriteOffset(false) == implementation_buffer->getFileOffsetOfBufferEnd());
|
||||
|| file_segment.getCurrentWriteOffset() == implementation_buffer->getFileOffsetOfBufferEnd());
|
||||
|
||||
LOG_TEST(log, "Successfully written {} bytes", size);
|
||||
download_current_segment_succeeded = true;
|
||||
@ -1035,7 +1008,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
|
||||
file_offset_of_buffer_end += size;
|
||||
|
||||
if (download_current_segment && download_current_segment_succeeded)
|
||||
chassert(file_segment.getCurrentWriteOffset(false) >= file_offset_of_buffer_end);
|
||||
chassert(file_segment.getCurrentWriteOffset() >= file_offset_of_buffer_end);
|
||||
chassert(file_offset_of_buffer_end <= read_until_position);
|
||||
}
|
||||
|
||||
@ -1084,7 +1057,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
|
||||
current_read_range.toString(),
|
||||
file_offset_of_buffer_end,
|
||||
FileSegment::stateToString(file_segment.state()),
|
||||
file_segment.getCurrentWriteOffset(false),
|
||||
file_segment.getCurrentWriteOffset(),
|
||||
toString(read_type),
|
||||
read_until_position,
|
||||
first_offset,
|
||||
@ -1179,6 +1152,7 @@ off_t CachedOnDiskReadBufferFromFile::seek(off_t offset, int whence)
|
||||
file_segments.reset();
|
||||
implementation_buffer.reset();
|
||||
initialized = false;
|
||||
cache_file_reader.reset();
|
||||
|
||||
LOG_TEST(log, "Reset state for seek to position {}", new_pos);
|
||||
|
||||
@ -1214,6 +1188,7 @@ void CachedOnDiskReadBufferFromFile::setReadUntilPosition(size_t position)
|
||||
file_segments.reset();
|
||||
implementation_buffer.reset();
|
||||
initialized = false;
|
||||
cache_file_reader.reset();
|
||||
|
||||
read_until_position = position;
|
||||
|
||||
|
@ -76,7 +76,7 @@ private:
|
||||
|
||||
ImplementationBufferPtr getReadBufferForFileSegment(FileSegment & file_segment);
|
||||
|
||||
ImplementationBufferPtr getCacheReadBuffer(const FileSegment & file_segment) const;
|
||||
ImplementationBufferPtr getCacheReadBuffer(const FileSegment & file_segment);
|
||||
|
||||
ImplementationBufferPtr getRemoteReadBuffer(FileSegment & file_segment, ReadType read_type_);
|
||||
|
||||
@ -110,7 +110,8 @@ private:
|
||||
ImplementationBufferCreator implementation_buffer_creator;
|
||||
|
||||
/// Remote read buffer, which can only be owned by current buffer.
|
||||
FileSegment::RemoteFileReaderPtr remote_file_reader;
|
||||
ImplementationBufferPtr remote_file_reader;
|
||||
ImplementationBufferPtr cache_file_reader;
|
||||
|
||||
FileSegmentsHolderPtr file_segments;
|
||||
|
||||
|
@ -70,7 +70,7 @@ bool FileSegmentRangeWriter::write(const char * data, size_t size, size_t offset
|
||||
|
||||
while (size > 0)
|
||||
{
|
||||
size_t available_size = file_segment->range().size() - file_segment->getDownloadedSize(false);
|
||||
size_t available_size = file_segment->range().size() - file_segment->getDownloadedSize();
|
||||
if (available_size == 0)
|
||||
{
|
||||
completeFileSegment();
|
||||
@ -155,7 +155,7 @@ void FileSegmentRangeWriter::appendFilesystemCacheLog(const FileSegment & file_s
|
||||
return;
|
||||
|
||||
auto file_segment_range = file_segment.range();
|
||||
size_t file_segment_right_bound = file_segment_range.left + file_segment.getDownloadedSize(false) - 1;
|
||||
size_t file_segment_right_bound = file_segment_range.left + file_segment.getDownloadedSize() - 1;
|
||||
|
||||
FilesystemCacheLogElement elem
|
||||
{
|
||||
@ -165,6 +165,7 @@ void FileSegmentRangeWriter::appendFilesystemCacheLog(const FileSegment & file_s
|
||||
.file_segment_range = { file_segment_range.left, file_segment_right_bound },
|
||||
.requested_range = {},
|
||||
.cache_type = FilesystemCacheLogElement::CacheType::WRITE_THROUGH_CACHE,
|
||||
.file_segment_key = {},
|
||||
.file_segment_size = file_segment_range.size(),
|
||||
.read_from_cache_attempted = false,
|
||||
.read_buffer_id = {},
|
||||
|
@ -62,8 +62,16 @@ IOUringReader::IOUringReader(uint32_t entries_)
|
||||
|
||||
struct io_uring_params params =
|
||||
{
|
||||
.sq_entries = 0, // filled by the kernel, initializing to silence warning
|
||||
.cq_entries = 0, // filled by the kernel, initializing to silence warning
|
||||
.flags = 0,
|
||||
.sq_thread_cpu = 0, // Unused (IORING_SETUP_SQ_AFF isn't set). Silences warning
|
||||
.sq_thread_idle = 0, // Unused (IORING_SETUP_SQPOL isn't set). Silences warning
|
||||
.features = 0, // filled by the kernel, initializing to silence warning
|
||||
.wq_fd = 0, // Unused (IORING_SETUP_ATTACH_WQ isn't set). Silences warning.
|
||||
.resv = {0, 0, 0}, // "The resv array must be initialized to zero."
|
||||
.sq_off = {}, // filled by the kernel, initializing to silence warning
|
||||
.cq_off = {}, // filled by the kernel, initializing to silence warning
|
||||
};
|
||||
|
||||
int ret = io_uring_queue_init_params(entries_, &ring, ¶ms);
|
||||
|
@ -69,7 +69,7 @@ private:
|
||||
static_cast<uint64_t>(blob.BlobSize),
|
||||
Poco::Timestamp::fromEpochTime(
|
||||
std::chrono::duration_cast<std::chrono::seconds>(
|
||||
blob.Details.LastModified.time_since_epoch()).count()),
|
||||
static_cast<std::chrono::system_clock::time_point>(blob.Details.LastModified).time_since_epoch()).count()),
|
||||
{}});
|
||||
}
|
||||
|
||||
@ -162,7 +162,7 @@ void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWith
|
||||
static_cast<uint64_t>(blob.BlobSize),
|
||||
Poco::Timestamp::fromEpochTime(
|
||||
std::chrono::duration_cast<std::chrono::seconds>(
|
||||
blob.Details.LastModified.time_since_epoch()).count()),
|
||||
static_cast<std::chrono::system_clock::time_point>(blob.Details.LastModified).time_since_epoch()).count()),
|
||||
{}});
|
||||
}
|
||||
|
||||
@ -350,7 +350,7 @@ ObjectMetadata AzureObjectStorage::getObjectMetadata(const std::string & path) c
|
||||
for (const auto & [key, value] : properties.Metadata)
|
||||
(*result.attributes)[key] = value;
|
||||
}
|
||||
result.last_modified.emplace(properties.LastModified.time_since_epoch().count());
|
||||
result.last_modified.emplace(static_cast<std::chrono::system_clock::time_point>(properties.LastModified).time_since_epoch().count());
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -1,25 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if USE_AWS_S3
|
||||
|
||||
#include <utility>
|
||||
#include <base/types.h>
|
||||
#include <IO/S3/PocoHTTPClient.h>
|
||||
#include <Poco/URI.h>
|
||||
|
||||
namespace DB::S3
|
||||
{
|
||||
class ProxyConfiguration
|
||||
{
|
||||
public:
|
||||
virtual ~ProxyConfiguration() = default;
|
||||
/// Returns proxy configuration on each HTTP request.
|
||||
virtual ClientConfigurationPerRequest getConfiguration(const Aws::Http::HttpRequest & request) = 0;
|
||||
virtual void errorReport(const ClientConfigurationPerRequest & config) = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -1,32 +0,0 @@
|
||||
#include "ProxyListConfiguration.h"
|
||||
|
||||
#if USE_AWS_S3
|
||||
|
||||
#include <utility>
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
namespace DB::S3
|
||||
{
|
||||
ProxyListConfiguration::ProxyListConfiguration(std::vector<Poco::URI> proxies_) : proxies(std::move(proxies_)), access_counter(0)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
ClientConfigurationPerRequest ProxyListConfiguration::getConfiguration(const Aws::Http::HttpRequest &)
|
||||
{
|
||||
/// Avoid atomic increment if number of proxies is 1.
|
||||
size_t index = proxies.size() > 1 ? (access_counter++) % proxies.size() : 0;
|
||||
|
||||
ClientConfigurationPerRequest cfg;
|
||||
cfg.proxy_scheme = Aws::Http::SchemeMapper::FromString(proxies[index].getScheme().c_str());
|
||||
cfg.proxy_host = proxies[index].getHost();
|
||||
cfg.proxy_port = proxies[index].getPort();
|
||||
|
||||
LOG_DEBUG(&Poco::Logger::get("AWSClient"), "Use proxy: {}", proxies[index].toString());
|
||||
|
||||
return cfg;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -1,32 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if USE_AWS_S3
|
||||
|
||||
#include <atomic> // for std::atomic<size_t>
|
||||
|
||||
#include "ProxyConfiguration.h"
|
||||
|
||||
namespace DB::S3
|
||||
{
|
||||
/**
|
||||
* For each request to S3 it chooses a proxy from the specified list using round-robin strategy.
|
||||
*/
|
||||
class ProxyListConfiguration : public ProxyConfiguration
|
||||
{
|
||||
public:
|
||||
explicit ProxyListConfiguration(std::vector<Poco::URI> proxies_);
|
||||
ClientConfigurationPerRequest getConfiguration(const Aws::Http::HttpRequest & request) override;
|
||||
void errorReport(const ClientConfigurationPerRequest &) override {}
|
||||
|
||||
private:
|
||||
/// List of configured proxies.
|
||||
const std::vector<Poco::URI> proxies;
|
||||
/// Access counter to get proxy using round-robin strategy.
|
||||
std::atomic<size_t> access_counter;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -1,42 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if USE_AWS_S3
|
||||
|
||||
#include "ProxyConfiguration.h"
|
||||
|
||||
#include <mutex>
|
||||
|
||||
namespace DB::S3
|
||||
{
|
||||
/**
|
||||
* Proxy configuration where proxy host is obtained each time from specified endpoint.
|
||||
* For each request to S3 it makes GET request to specified endpoint URL and reads proxy host from a response body.
|
||||
* Specified scheme and port added to obtained proxy host to form completed proxy URL.
|
||||
*/
|
||||
class ProxyResolverConfiguration : public ProxyConfiguration
|
||||
{
|
||||
public:
|
||||
ProxyResolverConfiguration(const Poco::URI & endpoint_, String proxy_scheme_, unsigned proxy_port_, unsigned cache_ttl_);
|
||||
ClientConfigurationPerRequest getConfiguration(const Aws::Http::HttpRequest & request) override;
|
||||
void errorReport(const ClientConfigurationPerRequest & config) override;
|
||||
|
||||
private:
|
||||
/// Endpoint to obtain a proxy host.
|
||||
const Poco::URI endpoint;
|
||||
/// Scheme for obtained proxy.
|
||||
const String proxy_scheme;
|
||||
/// Port for obtained proxy.
|
||||
const unsigned proxy_port;
|
||||
|
||||
std::mutex cache_mutex;
|
||||
bool cache_valid = false;
|
||||
std::chrono::time_point<std::chrono::system_clock> cache_timestamp;
|
||||
const std::chrono::seconds cache_ttl{0};
|
||||
ClientConfigurationPerRequest cached_config;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -5,6 +5,7 @@
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/Throttler.h>
|
||||
#include <Common/ProxyConfigurationResolverProvider.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/Context.h>
|
||||
@ -17,9 +18,6 @@
|
||||
|
||||
#include <Storages/StorageS3Settings.h>
|
||||
#include <Disks/ObjectStorages/S3/S3ObjectStorage.h>
|
||||
#include <Disks/ObjectStorages/S3/ProxyConfiguration.h>
|
||||
#include <Disks/ObjectStorages/S3/ProxyListConfiguration.h>
|
||||
#include <Disks/ObjectStorages/S3/ProxyResolverConfiguration.h>
|
||||
#include <Disks/ObjectStorages/DiskObjectStorageCommon.h>
|
||||
#include <Disks/DiskLocal.h>
|
||||
#include <Common/Macros.h>
|
||||
@ -44,76 +42,15 @@ std::unique_ptr<S3ObjectStorageSettings> getSettings(const Poco::Util::AbstractC
|
||||
config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000));
|
||||
}
|
||||
|
||||
std::shared_ptr<S3::ProxyResolverConfiguration> getProxyResolverConfiguration(
|
||||
const String & prefix, const Poco::Util::AbstractConfiguration & proxy_resolver_config)
|
||||
{
|
||||
auto endpoint = Poco::URI(proxy_resolver_config.getString(prefix + ".endpoint"));
|
||||
auto proxy_scheme = proxy_resolver_config.getString(prefix + ".proxy_scheme");
|
||||
if (proxy_scheme != "http" && proxy_scheme != "https")
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Only HTTP/HTTPS schemas allowed in proxy resolver config: {}", proxy_scheme);
|
||||
auto proxy_port = proxy_resolver_config.getUInt(prefix + ".proxy_port");
|
||||
auto cache_ttl = proxy_resolver_config.getUInt(prefix + ".proxy_cache_time", 10);
|
||||
|
||||
LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Configured proxy resolver: {}, Scheme: {}, Port: {}",
|
||||
endpoint.toString(), proxy_scheme, proxy_port);
|
||||
|
||||
return std::make_shared<S3::ProxyResolverConfiguration>(endpoint, proxy_scheme, proxy_port, cache_ttl);
|
||||
}
|
||||
|
||||
std::shared_ptr<S3::ProxyListConfiguration> getProxyListConfiguration(
|
||||
const String & prefix, const Poco::Util::AbstractConfiguration & proxy_config)
|
||||
{
|
||||
std::vector<String> keys;
|
||||
proxy_config.keys(prefix, keys);
|
||||
|
||||
std::vector<Poco::URI> proxies;
|
||||
for (const auto & key : keys)
|
||||
if (startsWith(key, "uri"))
|
||||
{
|
||||
Poco::URI proxy_uri(proxy_config.getString(prefix + "." + key));
|
||||
|
||||
if (proxy_uri.getScheme() != "http" && proxy_uri.getScheme() != "https")
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Only HTTP/HTTPS schemas allowed in proxy uri: {}", proxy_uri.toString());
|
||||
if (proxy_uri.getHost().empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Empty host in proxy uri: {}", proxy_uri.toString());
|
||||
|
||||
proxies.push_back(proxy_uri);
|
||||
|
||||
LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Configured proxy: {}", proxy_uri.toString());
|
||||
}
|
||||
|
||||
if (!proxies.empty())
|
||||
return std::make_shared<S3::ProxyListConfiguration>(proxies);
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::shared_ptr<S3::ProxyConfiguration> getProxyConfiguration(const String & prefix, const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
if (!config.has(prefix + ".proxy"))
|
||||
return nullptr;
|
||||
|
||||
std::vector<String> config_keys;
|
||||
config.keys(prefix + ".proxy", config_keys);
|
||||
|
||||
if (auto resolver_configs = std::count(config_keys.begin(), config_keys.end(), "resolver"))
|
||||
{
|
||||
if (resolver_configs > 1)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple proxy resolver configurations aren't allowed");
|
||||
|
||||
return getProxyResolverConfiguration(prefix + ".proxy.resolver", config);
|
||||
}
|
||||
|
||||
return getProxyListConfiguration(prefix + ".proxy", config);
|
||||
}
|
||||
|
||||
|
||||
std::unique_ptr<S3::Client> getClient(
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const String & config_prefix,
|
||||
ContextPtr context,
|
||||
const S3ObjectStorageSettings & settings)
|
||||
{
|
||||
String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint"));
|
||||
S3::URI uri(endpoint);
|
||||
|
||||
S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration(
|
||||
config.getString(config_prefix + ".region", ""),
|
||||
context->getRemoteHostFilter(),
|
||||
@ -121,10 +58,9 @@ std::unique_ptr<S3::Client> getClient(
|
||||
context->getGlobalContext()->getSettingsRef().enable_s3_requests_logging,
|
||||
/* for_disk_s3 = */ true,
|
||||
settings.request_settings.get_request_throttler,
|
||||
settings.request_settings.put_request_throttler);
|
||||
settings.request_settings.put_request_throttler,
|
||||
uri.uri.getScheme());
|
||||
|
||||
String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint"));
|
||||
S3::URI uri(endpoint);
|
||||
if (uri.key.back() != '/')
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 path must ends with '/', but '{}' doesn't.", uri.key);
|
||||
|
||||
@ -136,11 +72,14 @@ std::unique_ptr<S3::Client> getClient(
|
||||
client_configuration.http_connection_pool_size = config.getUInt(config_prefix + ".http_connection_pool_size", 1000);
|
||||
client_configuration.wait_on_pool_size_limit = false;
|
||||
|
||||
auto proxy_config = getProxyConfiguration(config_prefix, config);
|
||||
/*
|
||||
* Override proxy configuration for backwards compatibility with old configuration format.
|
||||
* */
|
||||
auto proxy_config = DB::ProxyConfigurationResolverProvider::getFromOldSettingsFormat(config_prefix, config);
|
||||
if (proxy_config)
|
||||
{
|
||||
client_configuration.per_request_configuration
|
||||
= [proxy_config](const auto & request) { return proxy_config->getConfiguration(request); };
|
||||
= [proxy_config]() { return proxy_config->resolve(); };
|
||||
client_configuration.error_report
|
||||
= [proxy_config](const auto & request_config) { proxy_config->errorReport(request_config); };
|
||||
}
|
||||
|
@ -126,6 +126,86 @@ namespace JSONUtils
|
||||
return fileSegmentationEngineJSONEachRowImpl<'[', ']'>(in, memory, min_bytes, min_rows, max_rows);
|
||||
}
|
||||
|
||||
template <const char opening_bracket, const char closing_bracket>
|
||||
void skipRowForJSONEachRowImpl(ReadBuffer & in)
|
||||
{
|
||||
size_t balance = 0;
|
||||
bool quotes = false;
|
||||
while (!in.eof())
|
||||
{
|
||||
if (quotes)
|
||||
{
|
||||
auto * pos = find_first_symbols<'\\', '"'>(in.position(), in.buffer().end());
|
||||
in.position() = pos;
|
||||
|
||||
if (in.position() > in.buffer().end())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Position in buffer is out of bounds. There must be a bug.");
|
||||
else if (in.position() == in.buffer().end())
|
||||
continue;
|
||||
|
||||
if (*in.position() == '\\')
|
||||
{
|
||||
++in.position();
|
||||
if (!in.eof())
|
||||
++in.position();
|
||||
}
|
||||
else if (*in.position() == '"')
|
||||
{
|
||||
++in.position();
|
||||
quotes = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
auto * pos = find_first_symbols<opening_bracket, closing_bracket, '\\', '"'>(in.position(), in.buffer().end());
|
||||
in.position() = pos;
|
||||
|
||||
if (in.position() > in.buffer().end())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Position in buffer is out of bounds. There must be a bug.");
|
||||
else if (in.position() == in.buffer().end())
|
||||
continue;
|
||||
|
||||
else if (*in.position() == opening_bracket)
|
||||
{
|
||||
++balance;
|
||||
++in.position();
|
||||
}
|
||||
else if (*in.position() == closing_bracket)
|
||||
{
|
||||
--balance;
|
||||
++in.position();
|
||||
}
|
||||
else if (*in.position() == '\\')
|
||||
{
|
||||
++in.position();
|
||||
if (!in.eof())
|
||||
++in.position();
|
||||
}
|
||||
else if (*in.position() == '"')
|
||||
{
|
||||
quotes = true;
|
||||
++in.position();
|
||||
}
|
||||
|
||||
if (balance == 0)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected eof");
|
||||
|
||||
}
|
||||
|
||||
void skipRowForJSONEachRow(ReadBuffer & in)
|
||||
{
|
||||
return skipRowForJSONEachRowImpl<'{', '}'>(in);
|
||||
}
|
||||
|
||||
void skipRowForJSONCompactEachRow(ReadBuffer & in)
|
||||
{
|
||||
return skipRowForJSONEachRowImpl<'[', ']'>(in);
|
||||
}
|
||||
|
||||
NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, const FormatSettings & settings, JSONInferenceInfo * inference_info)
|
||||
{
|
||||
skipWhitespaceIfAny(in);
|
||||
@ -612,8 +692,11 @@ namespace JSONUtils
|
||||
auto names_and_types = JSONUtils::readMetadata(in);
|
||||
for (const auto & [name, type] : names_and_types)
|
||||
{
|
||||
if (!header.has(name))
|
||||
continue;
|
||||
|
||||
auto header_type = header.getByName(name).type;
|
||||
if (header.has(name) && !type->equals(*header_type))
|
||||
if (!type->equals(*header_type))
|
||||
throw Exception(
|
||||
ErrorCodes::INCORRECT_DATA,
|
||||
"Type {} of column '{}' from metadata is not the same as type in header {}",
|
||||
|
@ -20,6 +20,9 @@ namespace JSONUtils
|
||||
std::pair<bool, size_t> fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows);
|
||||
std::pair<bool, size_t> fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t min_rows, size_t max_rows);
|
||||
|
||||
void skipRowForJSONEachRow(ReadBuffer & in);
|
||||
void skipRowForJSONCompactEachRow(ReadBuffer & in);
|
||||
|
||||
/// Read row in JSONEachRow format and try to determine type for each field.
|
||||
/// Return list of names and types.
|
||||
/// If cannot determine the type of some field, return nullptr for it.
|
||||
|
@ -47,7 +47,7 @@ bool isRetryableSchemaInferenceError(int code)
|
||||
ColumnsDescription readSchemaFromFormat(
|
||||
const String & format_name,
|
||||
const std::optional<FormatSettings> & format_settings,
|
||||
ReadBufferIterator & read_buffer_iterator,
|
||||
IReadBufferIterator & read_buffer_iterator,
|
||||
bool retry,
|
||||
ContextPtr & context,
|
||||
std::unique_ptr<ReadBuffer> & buf)
|
||||
@ -77,13 +77,12 @@ try
|
||||
size_t max_bytes_to_read = format_settings ? format_settings->max_bytes_to_read_for_schema_inference
|
||||
: context->getSettingsRef().input_format_max_bytes_to_read_for_schema_inference;
|
||||
size_t iterations = 0;
|
||||
ColumnsDescription cached_columns;
|
||||
while (true)
|
||||
{
|
||||
bool is_eof = false;
|
||||
try
|
||||
{
|
||||
buf = read_buffer_iterator(cached_columns);
|
||||
buf = read_buffer_iterator.next();
|
||||
if (!buf)
|
||||
break;
|
||||
is_eof = buf->eof();
|
||||
@ -123,6 +122,9 @@ try
|
||||
schema_reader = FormatFactory::instance().getSchemaReader(format_name, *buf, context, format_settings);
|
||||
schema_reader->setMaxRowsAndBytesToRead(max_rows_to_read, max_bytes_to_read);
|
||||
names_and_types = schema_reader->readSchema();
|
||||
auto num_rows = schema_reader->readNumberOrRows();
|
||||
if (num_rows)
|
||||
read_buffer_iterator.setNumRowsToLastFile(*num_rows);
|
||||
break;
|
||||
}
|
||||
catch (...)
|
||||
@ -177,8 +179,8 @@ try
|
||||
}
|
||||
}
|
||||
|
||||
if (!cached_columns.empty())
|
||||
return cached_columns;
|
||||
if (auto cached_columns = read_buffer_iterator.getCachedColumns())
|
||||
return *cached_columns;
|
||||
|
||||
if (names_and_types.empty())
|
||||
throw Exception(
|
||||
@ -229,7 +231,7 @@ catch (Exception & e)
|
||||
ColumnsDescription readSchemaFromFormat(
|
||||
const String & format_name,
|
||||
const std::optional<FormatSettings> & format_settings,
|
||||
ReadBufferIterator & read_buffer_iterator,
|
||||
IReadBufferIterator & read_buffer_iterator,
|
||||
bool retry,
|
||||
ContextPtr & context)
|
||||
{
|
||||
|
@ -1,15 +1,44 @@
|
||||
#pragma once
|
||||
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
#include <Storages/Cache/SchemaCache.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <Storages/Cache/SchemaCache.h>
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
using ReadBufferIterator = std::function<std::unique_ptr<ReadBuffer>(ColumnsDescription &)>;
|
||||
struct IReadBufferIterator
|
||||
{
|
||||
virtual ~IReadBufferIterator() = default;
|
||||
|
||||
/// Try to determine the schema of the data in the specified format.
|
||||
virtual std::unique_ptr<ReadBuffer> next() = 0;
|
||||
|
||||
virtual std::optional<ColumnsDescription> getCachedColumns() { return std::nullopt; }
|
||||
|
||||
virtual void setNumRowsToLastFile(size_t /*num_rows*/) {}
|
||||
};
|
||||
|
||||
struct SingleReadBufferIterator : public IReadBufferIterator
|
||||
{
|
||||
public:
|
||||
SingleReadBufferIterator(std::unique_ptr<ReadBuffer> buf_) : buf(std::move(buf_))
|
||||
{
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBuffer> next() override
|
||||
{
|
||||
if (done)
|
||||
return nullptr;
|
||||
done = true;
|
||||
return std::move(buf);
|
||||
}
|
||||
|
||||
private:
|
||||
std::unique_ptr<ReadBuffer> buf;
|
||||
bool done = false;
|
||||
};
|
||||
|
||||
/// Try to determine the schema of the data and number of rows in data in the specified format.
|
||||
/// For formats that have an external schema reader, it will
|
||||
/// use it and won't create a read buffer.
|
||||
/// For formats that have a schema reader from the data,
|
||||
@ -22,7 +51,7 @@ using ReadBufferIterator = std::function<std::unique_ptr<ReadBuffer>(ColumnsDesc
|
||||
ColumnsDescription readSchemaFromFormat(
|
||||
const String & format_name,
|
||||
const std::optional<FormatSettings> & format_settings,
|
||||
ReadBufferIterator & read_buffer_iterator,
|
||||
IReadBufferIterator & read_buffer_iterator,
|
||||
bool retry,
|
||||
ContextPtr & context);
|
||||
|
||||
@ -30,12 +59,12 @@ ColumnsDescription readSchemaFromFormat(
|
||||
ColumnsDescription readSchemaFromFormat(
|
||||
const String & format_name,
|
||||
const std::optional<FormatSettings> & format_settings,
|
||||
ReadBufferIterator & read_buffer_iterator,
|
||||
IReadBufferIterator & read_buffer_iterator,
|
||||
bool retry,
|
||||
ContextPtr & context,
|
||||
std::unique_ptr<ReadBuffer> & buf_out);
|
||||
|
||||
SchemaCache::Key getKeyForSchemaCache(const String & source, const String & format, const std::optional<FormatSettings> & format_settings, const ContextPtr & context);
|
||||
SchemaCache::Keys getKeysForSchemaCache(const Strings & sources, const String & format, const std::optional<FormatSettings> & format_settings, const ContextPtr & context);
|
||||
SchemaCache::Key getKeyForSchemaCache(const String & source, const String & format, const std::optional<FormatSettings> & format_settings, const ContextPtr & context);
|
||||
SchemaCache::Keys getKeysForSchemaCache(const Strings & sources, const String & format, const std::optional<FormatSettings> & format_settings, const ContextPtr & context);
|
||||
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user