Merge branch 'master' into feature_database_and_table_count_limitation_issue56347

This commit is contained in:
Han Fei 2024-06-18 15:52:18 +02:00
commit c32b97d904
324 changed files with 8613 additions and 3448 deletions

View File

@ -48,8 +48,7 @@ At a minimum, the following information should be added (but add more as needed)
- [ ] <!---ci_include_stateful--> Allow: Stateful tests
- [ ] <!---ci_include_integration--> Allow: Integration Tests
- [ ] <!---ci_include_performance--> Allow: Performance tests
- [ ] <!---ci_set_normal_builds--> Allow: Normal Builds
- [ ] <!---ci_set_special_builds--> Allow: Special Builds
- [ ] <!---ci_set_builds--> Allow: All Builds
- [ ] <!---ci_set_non_required--> Allow: All NOT Required Checks
- [ ] <!---batch_0_1--> Allow: batch 1, 2 for multi-batch jobs
- [ ] <!---batch_2_3--> Allow: batch 3, 4, 5, 6 for multi-batch jobs

View File

@ -70,7 +70,7 @@ jobs:
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Compatibility check (amd64)
test_name: Compatibility check (release)
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
CompatibilityCheckAarch64:
@ -194,7 +194,7 @@ jobs:
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Install packages (amd64)
test_name: Install packages (release)
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
@ -204,7 +204,7 @@ jobs:
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Install packages (arm64)
test_name: Install packages (aarch64)
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
@ -273,5 +273,5 @@ jobs:
- name: Finish label
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 finish_check.py
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
python3 merge_pr.py

View File

@ -115,25 +115,16 @@ jobs:
data: ${{ needs.RunConfig.outputs.data }}
################################# Reports #################################
# Reports should be run even if Builds_1/2 failed - put them separately in wf (not in Tests_1/2)
Builds_1_Report:
# Reports should run even if Builds_1/2 fail - run them separately, not in Tests_1/2/3
Builds_Report:
# run report check for failed builds to indicate the CI error
if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse build check') }}
needs: [RunConfig, Builds_1]
needs: [RunConfig, Builds_1, Builds_2]
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse build check
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
Builds_2_Report:
# run report check for failed builds to indicate the CI error
if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse special build check') }}
needs: [RunConfig, Builds_2]
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse special build check
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
MarkReleaseReady:
if: ${{ !failure() && !cancelled() }}
@ -165,7 +156,7 @@ jobs:
FinishCheck:
if: ${{ !cancelled() }}
needs: [RunConfig, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2, Tests_3]
needs: [RunConfig, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Check out repository code
@ -173,4 +164,4 @@ jobs:
- name: Finish label
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 finish_check.py
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}

View File

@ -99,7 +99,7 @@ jobs:
################################# Stage Final #################################
#
FinishCheck:
if: ${{ !failure() && !cancelled() }}
if: ${{ !cancelled() }}
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Tests_1]
runs-on: [self-hosted, style-checker-aarch64]
steps:
@ -112,4 +112,4 @@ jobs:
- name: Finish label
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 finish_check.py ${{ (contains(needs.*.result, 'failure') && github.event_name == 'merge_group') && '--pipeline-failure' || '' }}
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}

View File

@ -143,29 +143,20 @@ jobs:
data: ${{ needs.RunConfig.outputs.data }}
################################# Reports #################################
# Reports should by run even if Builds_1/2 fail, so put them separately in wf (not in Tests_1/2)
Builds_1_Report:
# Reports should run even if Builds_1/2 fail - run them separately (not in Tests_1/2/3)
Builds_Report:
# run report check for failed builds to indicate the CI error
if: ${{ !cancelled() && needs.StyleCheck.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse build check') }}
needs: [RunConfig, StyleCheck, Builds_1]
if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse build check') }}
needs: [RunConfig, StyleCheck, Builds_1, Builds_2]
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse build check
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
Builds_2_Report:
# run report check for failed builds to indicate the CI error
if: ${{ !cancelled() && needs.StyleCheck.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse special build check') }}
needs: [RunConfig, StyleCheck, Builds_2]
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse special build check
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
CheckReadyForMerge:
if: ${{ !cancelled() && needs.StyleCheck.result == 'success' }}
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2]
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Check out repository code
@ -181,7 +172,7 @@ jobs:
#
FinishCheck:
if: ${{ !cancelled() }}
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2, Tests_3]
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Check out repository code
@ -191,7 +182,7 @@ jobs:
- name: Finish label
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 finish_check.py
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
#############################################################################################
###################################### JEPSEN TESTS #########################################

View File

@ -65,7 +65,7 @@ jobs:
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Compatibility check (amd64)
test_name: Compatibility check (release)
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
CompatibilityCheckAarch64:
@ -244,7 +244,7 @@ jobs:
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Install packages (amd64)
test_name: Install packages (release)
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
@ -254,7 +254,7 @@ jobs:
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Install packages (arm64)
test_name: Install packages (aarch64)
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
@ -496,4 +496,4 @@ jobs:
- name: Finish label
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 finish_check.py
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}

View File

@ -32,7 +32,7 @@ constexpr void static_for(F && f)
template <is_enum T>
struct fmt::formatter<T> : fmt::formatter<std::string_view>
{
constexpr auto format(T value, auto& format_context)
constexpr auto format(T value, auto& format_context) const
{
return formatter<string_view>::format(magic_enum::enum_name(value), format_context);
}

View File

@ -12,6 +12,8 @@
#include <base/types.h>
#include <base/unaligned.h>
#include <base/simd.h>
#include <fmt/core.h>
#include <fmt/ostream.h>
#include <city.h>
@ -376,3 +378,5 @@ namespace PackedZeroTraits
std::ostream & operator<<(std::ostream & os, const StringRef & str);
template<> struct fmt::formatter<StringRef> : fmt::ostream_formatter {};

View File

@ -62,7 +62,7 @@ struct fmt::formatter<wide::integer<Bits, Signed>>
}
template <typename FormatContext>
auto format(const wide::integer<Bits, Signed> & value, FormatContext & ctx)
auto format(const wide::integer<Bits, Signed> & value, FormatContext & ctx) const
{
return fmt::format_to(ctx.out(), "{}", to_string(value));
}

2
contrib/aws vendored

@ -1 +1 @@
Subproject commit deeaa9e7c5fe690e3dacc4005d7ecfa7a66a32bb
Subproject commit 1c2946bfcb7f1e3ae0a858de0b59d4f1a7b4ccaf

2
contrib/fmtlib vendored

@ -1 +1 @@
Subproject commit b6f4ceaed0a0a24ccf575fab6c56dd50ccf6f1a9
Subproject commit a33701196adfad74917046096bf5a2aa0ab0bb50

View File

@ -13,7 +13,6 @@ set (SRCS
${FMT_SOURCE_DIR}/include/fmt/core.h
${FMT_SOURCE_DIR}/include/fmt/format.h
${FMT_SOURCE_DIR}/include/fmt/format-inl.h
${FMT_SOURCE_DIR}/include/fmt/locale.h
${FMT_SOURCE_DIR}/include/fmt/os.h
${FMT_SOURCE_DIR}/include/fmt/ostream.h
${FMT_SOURCE_DIR}/include/fmt/printf.h

View File

@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="24.5.2.34"
ARG VERSION="24.5.3.5"
ARG PACKAGES="clickhouse-keeper"
ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="24.5.2.34"
ARG VERSION="24.5.3.5"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="24.5.2.34"
ARG VERSION="24.5.3.5"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
#docker-official-library:off

View File

@ -208,6 +208,7 @@ handle SIGPIPE nostop noprint pass
handle SIGTERM nostop noprint pass
handle SIGUSR1 nostop noprint pass
handle SIGUSR2 nostop noprint pass
handle SIGSEGV nostop pass
handle SIG$RTMIN nostop noprint pass
info signals
continue

View File

@ -20,6 +20,7 @@ handle SIGPIPE nostop noprint pass
handle SIGTERM nostop noprint pass
handle SIGUSR1 nostop noprint pass
handle SIGUSR2 nostop noprint pass
handle SIGSEGV nostop pass
handle SIG$RTMIN nostop noprint pass
info signals
continue

View File

@ -89,10 +89,6 @@ function configure()
# since we run clickhouse from root
sudo chown root: /var/lib/clickhouse
# Set more frequent update period of asynchronous metrics to more frequently update information about real memory usage (less chance of OOM).
echo "<clickhouse><asynchronous_metrics_update_period_s>1</asynchronous_metrics_update_period_s></clickhouse>" \
> /etc/clickhouse-server/config.d/asynchronous_metrics_update_period_s.xml
local total_mem
total_mem=$(awk '/MemTotal/ { print $(NF-1) }' /proc/meminfo) # KiB
total_mem=$(( total_mem*1024 )) # bytes

View File

@ -0,0 +1,40 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v23.8.15.35-lts (060ff8e813a) FIXME as compared to v23.8.14.6-lts (967e51c1d6b)
#### Build/Testing/Packaging Improvement
* Backported in [#63621](https://github.com/ClickHouse/ClickHouse/issues/63621): The Dockerfile is reviewed by the docker official library in https://github.com/docker-library/official-images/pull/15846. [#63400](https://github.com/ClickHouse/ClickHouse/pull/63400) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#65153](https://github.com/ClickHouse/ClickHouse/issues/65153): Decrease the `unit-test` image a few times. [#65102](https://github.com/ClickHouse/ClickHouse/pull/65102) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Backported in [#64422](https://github.com/ClickHouse/ClickHouse/issues/64422): Fixes [#59989](https://github.com/ClickHouse/ClickHouse/issues/59989): runs init scripts when force-enabled or when no database exists, rather than the inverse. [#59991](https://github.com/ClickHouse/ClickHouse/pull/59991) ([jktng](https://github.com/jktng)).
* Backported in [#64016](https://github.com/ClickHouse/ClickHouse/issues/64016): Fix "Invalid storage definition in metadata file" for parameterized views. [#60708](https://github.com/ClickHouse/ClickHouse/pull/60708) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#63456](https://github.com/ClickHouse/ClickHouse/issues/63456): Fix the issue where the function `addDays` (and similar functions) reports an error when the first parameter is `DateTime64`. [#61561](https://github.com/ClickHouse/ClickHouse/pull/61561) ([Shuai li](https://github.com/loneylee)).
* Backported in [#63289](https://github.com/ClickHouse/ClickHouse/issues/63289): Fix crash with untuple and unresolved lambda. [#63131](https://github.com/ClickHouse/ClickHouse/pull/63131) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#63512](https://github.com/ClickHouse/ClickHouse/issues/63512): Fix `X-ClickHouse-Timezone` header returning wrong timezone when using `session_timezone` as query level setting. [#63377](https://github.com/ClickHouse/ClickHouse/pull/63377) ([Andrey Zvonov](https://github.com/zvonand)).
* Backported in [#63902](https://github.com/ClickHouse/ClickHouse/issues/63902): `query_plan_remove_redundant_distinct` can break queries with WINDOW FUNCTIONS (with `allow_experimental_analyzer` is on). Fixes [#62820](https://github.com/ClickHouse/ClickHouse/issues/62820). [#63776](https://github.com/ClickHouse/ClickHouse/pull/63776) ([Igor Nikonov](https://github.com/devcrafter)).
* Backported in [#64104](https://github.com/ClickHouse/ClickHouse/issues/64104): Deserialize untrusted binary inputs in a safer way. [#64024](https://github.com/ClickHouse/ClickHouse/pull/64024) ([Robert Schulze](https://github.com/rschu1ze)).
* Backported in [#64265](https://github.com/ClickHouse/ClickHouse/issues/64265): Prevent LOGICAL_ERROR on CREATE TABLE as MaterializedView. [#64174](https://github.com/ClickHouse/ClickHouse/pull/64174) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#64867](https://github.com/ClickHouse/ClickHouse/issues/64867): Fixed memory possible incorrect memory tracking in several kinds of queries: queries that read any data from S3, queries via http protocol, asynchronous inserts. [#64844](https://github.com/ClickHouse/ClickHouse/pull/64844) ([Anton Popov](https://github.com/CurtizJ)).
#### NO CL CATEGORY
* Backported in [#63704](https://github.com/ClickHouse/ClickHouse/issues/63704):. [#63415](https://github.com/ClickHouse/ClickHouse/pull/63415) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
#### NO CL ENTRY
* NO CL ENTRY: 'Installation test has wrong check_state'. [#63994](https://github.com/ClickHouse/ClickHouse/pull/63994) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Backported in [#63343](https://github.com/ClickHouse/ClickHouse/issues/63343): The commit url has different pattern. [#63331](https://github.com/ClickHouse/ClickHouse/pull/63331) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#63965](https://github.com/ClickHouse/ClickHouse/issues/63965): fix 02124_insert_deduplication_token_multiple_blocks. [#63950](https://github.com/ClickHouse/ClickHouse/pull/63950) ([Han Fei](https://github.com/hanfei1991)).
* Backported in [#64043](https://github.com/ClickHouse/ClickHouse/issues/64043): Do not create new release in release branch automatically. [#64039](https://github.com/ClickHouse/ClickHouse/pull/64039) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Pin requests to fix the integration tests. [#65183](https://github.com/ClickHouse/ClickHouse/pull/65183) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -0,0 +1,14 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.5.3.5-stable (e0eb66f8e17) FIXME as compared to v24.5.2.34-stable (45589aeee49)
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Backported in [#65227](https://github.com/ClickHouse/ClickHouse/issues/65227): Capture weak_ptr of ContextAccess for safety. [#65051](https://github.com/ClickHouse/ClickHouse/pull/65051) ([Alexander Gololobov](https://github.com/davenger)).
* Backported in [#65219](https://github.com/ClickHouse/ClickHouse/issues/65219): Fix false positives leaky memory warnings in OpenSSL. [#65125](https://github.com/ClickHouse/ClickHouse/pull/65125) ([Robert Schulze](https://github.com/rschu1ze)).

View File

@ -1490,6 +1490,8 @@ Differs from [PrettySpaceNoEscapes](#prettyspacenoescapes) in that up to 10,000
- [output_format_pretty_color](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_color) - use ANSI escape sequences to paint colors in Pretty formats. Default value - `true`.
- [output_format_pretty_grid_charset](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_grid_charset) - Charset for printing grid borders. Available charsets: ASCII, UTF-8. Default value - `UTF-8`.
- [output_format_pretty_row_numbers](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_row_numbers) - Add row numbers before each row for pretty output format. Default value - `true`.
- [output_format_pretty_display_footer_column_names](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_display_footer_column_names) - Display column names in the footer if table contains many rows. Default value - `true`.
- [output_format_pretty_display_footer_column_names_min_rows](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_display_footer_column_names_min_rows) - Sets the minimum number of rows for which a footer will be displayed if [output_format_pretty_display_footer_column_names](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_display_footer_column_names) is enabled. Default value - 50.
## RowBinary {#rowbinary}

View File

@ -508,7 +508,7 @@ Now `rule` can configure `method`, `headers`, `url`, `handler`:
- `headers` are responsible for matching the header part of the HTTP request. It is compatible with RE2s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the header portion of the HTTP request.
- `handler` contains the main processing part. Now `handler` can configure `type`, `status`, `content_type`, `response_content`, `query`, `query_param_name`.
- `handler` contains the main processing part. Now `handler` can configure `type`, `status`, `content_type`, `http_response_headers`, `response_content`, `query`, `query_param_name`.
`type` currently supports three types: [predefined_query_handler](#predefined_query_handler), [dynamic_query_handler](#dynamic_query_handler), [static](#static).
- `query` — use with `predefined_query_handler` type, executes query when the handler is called.
@ -519,6 +519,8 @@ Now `rule` can configure `method`, `headers`, `url`, `handler`:
- `content_type` — use with any type, response [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type).
- `http_response_headers` — use with any type, response headers map. Could be used to set content type as well.
- `response_content` — use with `static` type, response content sent to client, when using the prefix file:// or config://, find the content from the file or configuration sends to client.
Next are the configuration methods for different `type`.
@ -616,6 +618,33 @@ Return a message.
<type>static</type>
<status>402</status>
<content_type>text/html; charset=UTF-8</content_type>
<http_response_headers>
<Content-Language>en</Content-Language>
<X-My-Custom-Header>43</X-My-Custom-Header>
</http_response_headers>
<response_content>Say Hi!</response_content>
</handler>
</rule>
<defaults/>
</http_handlers>
```
`http_response_headers` could be used to set content type instead of `content_type`.
``` xml
<http_handlers>
<rule>
<methods>GET</methods>
<headers><XXX>xxx</XXX></headers>
<url>/hi</url>
<handler>
<type>static</type>
<status>402</status>
<http_response_headers>
<Content-Type>text/html; charset=UTF-8</Content-Type>
<Content-Language>en</Content-Language>
<X-My-Custom-Header>43</X-My-Custom-Header>
</http_response_headers>
<response_content>Say Hi!</response_content>
</handler>
</rule>
@ -696,6 +725,9 @@ Find the content from the file send to client.
<handler>
<type>static</type>
<content_type>text/html; charset=UTF-8</content_type>
<http_response_headers>
<ETag>737060cd8c284d8af7ad3082f209582d</ETag>
</http_response_headers>
<response_content>file:///absolute_path_file.html</response_content>
</handler>
</rule>
@ -706,6 +738,9 @@ Find the content from the file send to client.
<handler>
<type>static</type>
<content_type>text/html; charset=UTF-8</content_type>
<http_response_headers>
<ETag>737060cd8c284d8af7ad3082f209582d</ETag>
</http_response_headers>
<response_content>file://./relative_path_file.html</response_content>
</handler>
</rule>

View File

@ -59,10 +59,10 @@ For that, we need to use `jemalloc`'s tool called [jeprof](https://github.com/je
If thats the case, we recommend installing an [alternative implementation](https://github.com/gimli-rs/addr2line) of the tool.
```
git clone https://github.com/gimli-rs/addr2line
git clone https://github.com/gimli-rs/addr2line.git --depth=1 --branch=0.23.0
cd addr2line
cargo b --examples -r
cp ./target/release/examples/addr2line path/to/current/addr2line
cargo build --features bin --release
cp ./target/release/addr2line path/to/current/addr2line
```
:::

View File

@ -3100,3 +3100,21 @@ This setting is only necessary for the migration period and will become obsolete
Type: Bool
Default: 1
## merge_workload {#merge_workload}
Used to regulate how resources are utilized and shared between merges and other workloads. Specified value is used as `workload` setting value for all background merges. Can be overridden by a merge tree setting.
Default value: "default"
**See Also**
- [Workload Scheduling](/docs/en/operations/workload-scheduling.md)
## mutation_workload {#mutation_workload}
Used to regulate how resources are utilized and shared between mutations and other workloads. Specified value is used as `workload` setting value for all background mutations. Can be overridden by a merge tree setting.
Default value: "default"
**See Also**
- [Workload Scheduling](/docs/en/operations/workload-scheduling.md)

View File

@ -974,6 +974,24 @@ Default value: false
- [exclude_deleted_rows_for_part_size_in_merge](#exclude_deleted_rows_for_part_size_in_merge) setting
## merge_workload
Used to regulate how resources are utilized and shared between merges and other workloads. Specified value is used as `workload` setting value for background merges of this table. If not specified (empty string), then server setting `merge_workload` is used instead.
Default value: an empty string
**See Also**
- [Workload Scheduling](/docs/en/operations/workload-scheduling.md)
## mutation_workload
Used to regulate how resources are utilized and shared between mutations and other workloads. Specified value is used as `workload` setting value for background mutations of this table. If not specified (empty string), then server setting `mutation_workload` is used instead.
Default value: an empty string
**See Also**
- [Workload Scheduling](/docs/en/operations/workload-scheduling.md)
### optimize_row_order
Controls if the row order should be optimized during inserts to improve the compressability of the newly inserted table part.

View File

@ -1706,6 +1706,43 @@ Result:
└────────────┘
```
## output_format_pretty_display_footer_column_names
Display column names in the footer if there are many table rows.
Possible values:
- 0 — No column names are displayed in the footer.
- 1 — Column names are displayed in the footer if row count is greater than or equal to the threshold value set by [output_format_pretty_display_footer_column_names_min_rows](#output_format_pretty_display_footer_column_names_min_rows) (50 by default).
Default value: `1`.
**Example**
Query:
```sql
SELECT *, toTypeName(*) FROM (SELECT * FROM system.numbers LIMIT 1000);
```
Result:
```response
┌─number─┬─toTypeName(number)─┐
1. │ 0 │ UInt64 │
2. │ 1 │ UInt64 │
3. │ 2 │ UInt64 │
...
999. │ 998 │ UInt64 │
1000. │ 999 │ UInt64 │
└─number─┴─toTypeName(number)─┘
```
## output_format_pretty_display_footer_column_names_min_rows
Sets the minimum number of rows for which a footer with column names will be displayed if setting [output_format_pretty_display_footer_column_names](#output_format_pretty_display_footer_column_names) is enabled.
Default value: `50`.
## Template format settings {#template-format-settings}
### format_template_resultset {#format_template_resultset}

View File

@ -639,6 +639,10 @@ An internal metric of the low-level memory allocator (jemalloc). See https://jem
An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html
### jemalloc.prof.active
An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html
**See Also**
- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring.

View File

@ -47,6 +47,8 @@ Example:
Queries can be marked with setting `workload` to distinguish different workloads. If `workload` is not set, than value "default" is used. Note that you are able to specify the other value using settings profiles. Setting constraints can be used to make `workload` constant if you want all queries from the user to be marked with fixed value of `workload` setting.
It is possible to assign a `workload` setting for background activities. Merges and mutations are using `merge_workload` and `mutation_workload` server settings correspondingly. These values can also be overridden for specific tables using `merge_workload` and `mutation_workload` merge tree settings
Let's consider an example of a system with two different workloads: "production" and "development".
```sql
@ -151,6 +153,9 @@ Example:
</clickhouse>
```
## See also
- [system.scheduler](/docs/en/operations/system-tables/scheduler.md)
- [merge_workload](/docs/en/operations/settings/merge-tree-settings.md#merge_workload) merge tree setting
- [merge_workload](/docs/en/operations/server-configuration-parameters/settings.md#merge_workload) global server setting
- [mutation_workload](/docs/en/operations/settings/merge-tree-settings.md#mutation_workload) merge tree setting
- [mutation_workload](/docs/en/operations/server-configuration-parameters/settings.md#mutation_workload) global server setting

View File

@ -7,33 +7,43 @@ sidebar_label: Float32, Float64
# Float32, Float64
:::note
If you need accurate calculations, in particular if you work with financial or business data requiring a high precision you should consider using Decimal instead. Floats might lead to inaccurate results as illustrated below:
If you need accurate calculations, in particular if you work with financial or business data requiring a high precision, you should consider using [Decimal](../data-types/decimal.md) instead.
```
[Floating Point Numbers](https://en.wikipedia.org/wiki/IEEE_754) might lead to inaccurate results as illustrated below:
```sql
CREATE TABLE IF NOT EXISTS float_vs_decimal
(
my_float Float64,
my_decimal Decimal64(3)
)Engine=MergeTree ORDER BY tuple()
INSERT INTO float_vs_decimal SELECT round(randCanonical(), 3) AS res, res FROM system.numbers LIMIT 1000000; # Generate 1 000 000 random number with 2 decimal places and store them as a float and as a decimal
)
Engine=MergeTree
ORDER BY tuple();
# Generate 1 000 000 random numbers with 2 decimal places and store them as a float and as a decimal
INSERT INTO float_vs_decimal SELECT round(randCanonical(), 3) AS res, res FROM system.numbers LIMIT 1000000;
```
```
SELECT sum(my_float), sum(my_decimal) FROM float_vs_decimal;
> 500279.56300000014 500279.563
┌──────sum(my_float)─┬─sum(my_decimal)─┐
│ 499693.60500000004 │ 499693.605 │
└────────────────────┴─────────────────┘
SELECT sumKahan(my_float), sumKahan(my_decimal) FROM float_vs_decimal;
> 500279.563 500279.563
┌─sumKahan(my_float)─┬─sumKahan(my_decimal)─┐
│ 499693.605 │ 499693.605 │
└────────────────────┴──────────────────────┘
```
:::
[Floating point numbers](https://en.wikipedia.org/wiki/IEEE_754).
Types are equivalent to types of C:
The equivalent types in ClickHouse and in C are given below:
- `Float32``float`.
- `Float64``double`.
Aliases:
Float types in ClickHouse have the following aliases:
- `Float32``FLOAT`, `REAL`, `SINGLE`.
- `Float64``DOUBLE`, `DOUBLE PRECISION`.

View File

@ -1136,16 +1136,136 @@ SELECT tryBase58Decode('3dc8KtHrwM') as res, tryBase58Decode('invalid') as res_i
## base64Encode
Encodes a String or FixedString as base64.
Encodes a String or FixedString as base64, according to [RFC 4648](https://datatracker.ietf.org/doc/html/rfc4648#section-4).
Alias: `TO_BASE64`.
**Syntax**
```sql
base64Encode(plaintext)
```
**Arguments**
- `plaintext` — [String](../data-types/string.md) column or constant.
**Returned value**
- A string containing the encoded value of the argument.
**Example**
``` sql
SELECT base64Encode('clickhouse');
```
Result:
```result
┌─base64Encode('clickhouse')─┐
│ Y2xpY2tob3VzZQ== │
└────────────────────────────┘
```
## base64UrlEncode
Encodes an URL (String or FixedString) as base64 with URL-specific modifications, according to [RFC 4648](https://datatracker.ietf.org/doc/html/rfc4648#section-5).
**Syntax**
```sql
base64UrlEncode(url)
```
**Arguments**
- `url` — [String](../data-types/string.md) column or constant.
**Returned value**
- A string containing the encoded value of the argument.
**Example**
``` sql
SELECT base64UrlEncode('https://clickhouse.com');
```
Result:
```result
┌─base64UrlEncode('https://clickhouse.com')─┐
│ aHR0cDovL2NsaWNraG91c2UuY29t │
└───────────────────────────────────────────┘
```
## base64Decode
Decodes a base64-encoded String or FixedString. Throws an exception in case of error.
Accepts a String and decodes it from base64, according to [RFC 4648](https://datatracker.ietf.org/doc/html/rfc4648#section-4). Throws an exception in case of an error.
Alias: `FROM_BASE64`.
**Syntax**
```sql
base64Decode(encoded)
```
**Arguments**
- `encoded` — [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value, an exception is thrown.
**Returned value**
- A string containing the decoded value of the argument.
**Example**
``` sql
SELECT base64Decode('Y2xpY2tob3VzZQ==');
```
Result:
```result
┌─base64Decode('Y2xpY2tob3VzZQ==')─┐
│ clickhouse │
└──────────────────────────────────┘
```
## base64UrlDecode
Accepts a base64-encoded URL and decodes it from base64 with URL-specific modifications, according to [RFC 4648](https://datatracker.ietf.org/doc/html/rfc4648#section-5). Throws an exception in case of an error.
**Syntax**
```sql
base64UrlDecode(encodedUrl)
```
**Arguments**
- `encodedUrl` — [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, an exception is thrown.
**Returned value**
- A string containing the decoded value of the argument.
**Example**
``` sql
SELECT base64UrlDecode('aHR0cDovL2NsaWNraG91c2UuY29t');
```
Result:
```result
┌─base64UrlDecode('aHR0cDovL2NsaWNraG91c2UuY29t')─┐
│ https://clickhouse.com │
└─────────────────────────────────────────────────┘
```
## tryBase64Decode
Like `base64Decode` but returns an empty string in case of error.
@ -1156,9 +1276,13 @@ Like `base64Decode` but returns an empty string in case of error.
tryBase64Decode(encoded)
```
**Parameters**
**Arguments**
- `encoded`: [String](../data-types/string.md) column or constant. If the string is not a valid Base58-encoded value, returns an empty string in case of error.
- `encoded`: [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value, returns an empty string.
**Returned value**
- A string containing the decoded value of the argument.
**Examples**
@ -1169,9 +1293,41 @@ SELECT tryBase64Decode('RW5jb2RlZA==') as res, tryBase64Decode('invalid') as res
```
```response
┌─res─────┬─res_invalid─┐
│ Encoded │ │
└─────────┴─────────────┘
┌─res────────┬─res_invalid─┐
│ clickhouse │ │
└────────────┴─────────────┘
```
## tryBase64UrlDecode
Like `base64UrlDecode` but returns an empty string in case of error.
**Syntax**
```sql
tryBase64UrlDecode(encodedUrl)
```
**Parameters**
- `encodedUrl`: [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string.
**Returned value**
- A string containing the decoded value of the argument.
**Examples**
Query:
```sql
SELECT tryBase64UrlDecode('aHR0cDovL2NsaWNraG91c2UuY29t') as res, tryBase64Decode('aHR0cHM6Ly9jbGlja') as res_invalid;
```
```response
┌─res────────────────────┬─res_invalid─┐
│ https://clickhouse.com │ │
└────────────────────────┴─────────────┘
```
## endsWith {#endswith}

View File

@ -414,6 +414,8 @@ $ curl -v 'http://localhost:8123/predefined_query'
- `content_type` — используется со всеми типами, возвращает [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type).
- `http_response_headers` — используется со всеми типами чтобы добавить кастомные хедеры в ответ. Может использоваться в том числе для задания хедера `Content-Type` вместо `content_type`.
- `response_content` — используется с типом`static`, содержимое ответа, отправленное клиенту, при использовании префикса file:// or config://, находит содержимое из файла или конфигурации, отправленного клиенту.
Далее приведены методы настройки для различных типов.
@ -509,6 +511,33 @@ max_final_threads 2
<type>static</type>
<status>402</status>
<content_type>text/html; charset=UTF-8</content_type>
<http_response_headers>
<Content-Language>en</Content-Language>
<X-My-Custom-Header>43</X-My-Custom-Header>
</http_response_headers>
<response_content>Say Hi!</response_content>
</handler>
</rule>
<defaults/>
</http_handlers>
```
`http_response_headers` так же может использоваться для определения `Content-Type` вместо `content_type`.
``` xml
<http_handlers>
<rule>
<methods>GET</methods>
<headers><XXX>xxx</XXX></headers>
<url>/hi</url>
<handler>
<type>static</type>
<status>402</status>
<http_response_headers>
<Content-Type>text/html; charset=UTF-8</Content-Type>
<Content-Language>en</Content-Language>
<X-My-Custom-Header>43</X-My-Custom-Header>
</http_response_headers>
<response_content>Say Hi!</response_content>
</handler>
</rule>
@ -589,6 +618,9 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
<handler>
<type>static</type>
<content_type>text/html; charset=UTF-8</content_type>
<http_response_headers>
<ETag>737060cd8c284d8af7ad3082f209582d</ETag>
</http_response_headers>
<response_content>file:///absolute_path_file.html</response_content>
</handler>
</rule>
@ -599,6 +631,9 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
<handler>
<type>static</type>
<content_type>text/html; charset=UTF-8</content_type>
<http_response_headers>
<ETag>737060cd8c284d8af7ad3082f209582d</ETag>
</http_response_headers>
<response_content>file://./relative_path_file.html</response_content>
</handler>
</rule>

View File

@ -538,16 +538,28 @@ SELECT base58Decode('3dc8KtHrwM');
Синоним: `TO_BASE64`.
## base64UrlEncode(s)
Производит кодирование URL (String или FixedString) в base64-представление в соответствии с [RFC 4648](https://tools.ietf.org/html/rfc4648).
## base64Decode(s) {#base64decode}
Декодирует base64-представление s в исходную строку. При невозможности декодирования выбрасывает исключение
Синоним: `FROM_BASE64`.
## base64UrlDecode(s)
Декодирует base64-представление URL в исходную строку в соответствии с [RFC 4648](https://tools.ietf.org/html/rfc4648). При невозможности декодирования выбрасывает исключение
## tryBase64Decode(s) {#trybase64decode}
Функционал аналогичен base64Decode, но при невозможности декодирования возвращает пустую строку.
## tryBase64UrlDecode(s)
Функционал аналогичен base64UrlDecode, но при невозможности декодирования возвращает пустую строку.
## endsWith(s, suffix) {#endswith}
Возвращает 1, если строка завершается указанным суффиксом, и 0 в противном случае.

View File

@ -154,8 +154,6 @@ if (BUILD_STANDALONE_KEEPER)
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ThreadPoolRemoteFSReader.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ThreadPoolReader.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Storages/StorageS3Settings.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/BaseDaemon.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/SentryWriter.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/GraphiteWriter.cpp

View File

@ -361,9 +361,10 @@ try
}
GlobalThreadPool::initialize(
config().getUInt("max_thread_pool_size", 100),
config().getUInt("max_thread_pool_free_size", 1000),
config().getUInt("thread_pool_queue_size", 10000)
/// We need to have sufficient amount of threads for connections + nuraft workers + keeper workers, 1000 is an estimation
std::min(1000U, config().getUInt("max_thread_pool_size", 1000)),
config().getUInt("max_thread_pool_free_size", 100),
config().getUInt("thread_pool_queue_size", 1000)
);
/// Wait for all threads to avoid possible use-after-free (for example logging objects can be already destroyed).
SCOPE_EXIT({

View File

@ -983,6 +983,18 @@ try
}
}
std::string path_str = getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH));
fs::path path = path_str;
/// Check that the process user id matches the owner of the data.
assertProcessUserMatchesDataOwner(path_str, [&](const std::string & message){ global_context->addWarningMessage(message); });
global_context->setPath(path_str);
StatusFile status{path / "status", StatusFile::write_full_info};
ServerUUID::load(path / "uuid", log);
zkutil::validateZooKeeperConfig(config());
bool has_zookeeper = zkutil::hasZooKeeperConfig(config());
@ -994,7 +1006,7 @@ try
ConfigProcessor config_processor(config_path);
loaded_config = config_processor.loadConfigWithZooKeeperIncludes(
main_config_zk_node_cache, main_config_zk_changed_event, /* fallback_to_preprocessed = */ true);
config_processor.savePreprocessedConfig(loaded_config, config().getString("path", DBMS_DEFAULT_PATH));
config_processor.savePreprocessedConfig(loaded_config, path_str);
config().removeConfiguration(old_configuration.get());
config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false);
global_context->setConfig(loaded_config.configuration);
@ -1128,19 +1140,6 @@ try
global_context->setRemoteHostFilter(config());
global_context->setHTTPHeaderFilter(config());
std::string path_str = getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH));
fs::path path = path_str;
std::string default_database = server_settings.default_database.toString();
/// Check that the process user id matches the owner of the data.
assertProcessUserMatchesDataOwner(path_str, [&](const std::string & message){ global_context->addWarningMessage(message); });
global_context->setPath(path_str);
StatusFile status{path / "status", StatusFile::write_full_info};
ServerUUID::load(path / "uuid", log);
/// Try to increase limit on number of open files.
{
rlimit rlim;
@ -1610,6 +1609,10 @@ try
0, // We don't need any threads one all the parts will be deleted
new_server_settings.max_parts_cleaning_thread_pool_size);
global_context->setMergeWorkload(new_server_settings.merge_workload);
global_context->setMutationWorkload(new_server_settings.mutation_workload);
if (config->has("resources"))
{
global_context->getResourceManager()->updateConfiguration(*config);
@ -1932,6 +1935,7 @@ try
/// Set current database name before loading tables and databases because
/// system logs may copy global context.
std::string default_database = server_settings.default_database.toString();
global_context->setCurrentDatabaseNameInGlobalContext(default_database);
LOG_INFO(log, "Loading metadata from {}", path_str);

View File

@ -371,7 +371,7 @@
<!-- Enables asynchronous loading of databases and tables to speedup server startup.
Queries to not yet loaded entity will be blocked until load is finished.
-->
<!-- <async_load_databases>true</async_load_databases> -->
<async_load_databases>true</async_load_databases>
<!-- On memory constrained environments you may have to set this to value larger than 1.
-->
@ -1396,6 +1396,14 @@
<!-- <host_name>replica</host_name> -->
</distributed_ddl>
<!-- Used to regulate how resources are utilized and shared between merges, mutations and other workloads.
Specified value is used as `workload` setting value for background merge or mutation.
-->
<!--
<merge_workload>merges_and_mutations</merge_workload>
<mutation_workload>merges_and_mutations</mutation_workload>
-->
<!-- Settings to fine-tune MergeTree tables. See documentation in source code, in MergeTreeSettings.h -->
<!--
<merge_tree>

View File

@ -31,6 +31,7 @@ namespace DB
{
namespace ErrorCodes
{
extern const int AUTHENTICATION_FAILED;
extern const int SUPPORT_IS_DISABLED;
extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR;
@ -90,8 +91,10 @@ bool AuthenticationData::Util::checkPasswordBcrypt(std::string_view password [[m
{
#if USE_BCRYPT
int ret = bcrypt_checkpw(password.data(), reinterpret_cast<const char *>(password_bcrypt.data()));
/// Before 24.6 we didn't validate hashes on creation, so it could be that the stored hash is invalid
/// and it could not be decoded by the library
if (ret == -1)
throw Exception(ErrorCodes::LOGICAL_ERROR, "BCrypt library failed: bcrypt_checkpw returned {}", ret);
throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Internal failure decoding Bcrypt hash");
return (ret == 0);
#else
throw Exception(
@ -230,6 +233,17 @@ void AuthenticationData::setPasswordHashBinary(const Digest & hash)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Password hash for the 'BCRYPT_PASSWORD' authentication type has length {} "
"but must be 59 or 60 bytes.", hash.size());
auto resized = hash;
resized.resize(64);
#if USE_BCRYPT
/// Verify that it is a valid hash
int ret = bcrypt_checkpw("", reinterpret_cast<const char *>(resized.data()));
if (ret == -1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Could not decode the provided hash with 'bcrypt_hash'");
#endif
password_hash = hash;
password_hash.resize(64);
return;

View File

@ -0,0 +1,265 @@
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <Columns/IColumn.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnString.h>
#include <Core/ServerSettings.h>
#include <Core/ColumnWithTypeAndName.h>
#include <Common/ArenaAllocator.h>
#include <Common/assert_cast.h>
#include <Interpreters/castColumn.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int BAD_ARGUMENTS;
}
namespace
{
struct GroupConcatDataBase
{
UInt64 data_size = 0;
UInt64 allocated_size = 0;
char * data = nullptr;
void checkAndUpdateSize(UInt64 add, Arena * arena)
{
if (data_size + add >= allocated_size)
{
auto old_size = allocated_size;
allocated_size = std::max(2 * allocated_size, data_size + add);
data = arena->realloc(data, old_size, allocated_size);
}
}
void insertChar(const char * str, UInt64 str_size, Arena * arena)
{
checkAndUpdateSize(str_size, arena);
memcpy(data + data_size, str, str_size);
data_size += str_size;
}
};
struct GroupConcatData : public GroupConcatDataBase
{
using Offset = UInt64;
using Allocator = MixedAlignedArenaAllocator<alignof(Offset), 4096>;
using Offsets = PODArray<Offset, 32, Allocator>;
/// offset[i * 2] - beginning of the i-th row, offset[i * 2 + 1] - end of the i-th row
Offsets offsets;
UInt64 num_rows = 0;
UInt64 getSize(size_t i) const { return offsets[i * 2 + 1] - offsets[i * 2]; }
UInt64 getString(size_t i) const { return offsets[i * 2]; }
void insert(const IColumn * column, const SerializationPtr & serialization, size_t row_num, Arena * arena)
{
WriteBufferFromOwnString buff;
serialization->serializeText(*column, row_num, buff, {});
auto string = buff.stringView();
checkAndUpdateSize(string.size(), arena);
memcpy(data + data_size, string.data(), string.size());
offsets.push_back(data_size, arena);
data_size += string.size();
offsets.push_back(data_size, arena);
num_rows++;
}
};
template <bool has_limit>
class GroupConcatImpl final
: public IAggregateFunctionDataHelper<GroupConcatData, GroupConcatImpl<has_limit>>
{
static constexpr auto name = "groupConcat";
SerializationPtr serialization;
UInt64 limit;
const String delimiter;
public:
GroupConcatImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 limit_, const String & delimiter_)
: IAggregateFunctionDataHelper<GroupConcatData, GroupConcatImpl<has_limit>>(
{data_type_}, parameters_, std::make_shared<DataTypeString>())
, serialization(this->argument_types[0]->getDefaultSerialization())
, limit(limit_)
, delimiter(delimiter_)
{
}
String getName() const override { return name; }
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
auto & cur_data = this->data(place);
if constexpr (has_limit)
if (cur_data.num_rows >= limit)
return;
if (cur_data.data_size != 0)
cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena);
cur_data.insert(columns[0], serialization, row_num, arena);
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
auto & cur_data = this->data(place);
auto & rhs_data = this->data(rhs);
if (rhs_data.data_size == 0)
return;
if constexpr (has_limit)
{
UInt64 new_elems_count = std::min(rhs_data.num_rows, limit - cur_data.num_rows);
for (UInt64 i = 0; i < new_elems_count; ++i)
{
if (cur_data.data_size != 0)
cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena);
cur_data.offsets.push_back(cur_data.data_size, arena);
cur_data.insertChar(rhs_data.data + rhs_data.getString(i), rhs_data.getSize(i), arena);
cur_data.num_rows++;
cur_data.offsets.push_back(cur_data.data_size, arena);
}
}
else
{
if (cur_data.data_size != 0)
cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena);
cur_data.insertChar(rhs_data.data, rhs_data.data_size, arena);
}
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
auto & cur_data = this->data(place);
writeVarUInt(cur_data.data_size, buf);
writeVarUInt(cur_data.allocated_size, buf);
buf.write(cur_data.data, cur_data.data_size);
if constexpr (has_limit)
{
writeVarUInt(cur_data.num_rows, buf);
for (const auto & offset : cur_data.offsets)
writeVarUInt(offset, buf);
}
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
auto & cur_data = this->data(place);
readVarUInt(cur_data.data_size, buf);
readVarUInt(cur_data.allocated_size, buf);
buf.readStrict(cur_data.data, cur_data.data_size);
if constexpr (has_limit)
{
readVarUInt(cur_data.num_rows, buf);
cur_data.offsets.resize_exact(cur_data.num_rows * 2, arena);
for (auto & offset : cur_data.offsets)
readVarUInt(offset, buf);
}
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
auto & cur_data = this->data(place);
if (cur_data.data_size == 0)
{
auto column_nullable = IColumn::mutate(makeNullable(to.getPtr()));
column_nullable->insertDefault();
return;
}
auto & column_string = assert_cast<ColumnString &>(to);
column_string.insertData(cur_data.data, cur_data.data_size);
}
bool allocatesMemoryInArena() const override { return true; }
};
AggregateFunctionPtr createAggregateFunctionGroupConcat(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertUnary(name, argument_types);
bool has_limit = false;
UInt64 limit = 0;
String delimiter;
if (parameters.size() > 2)
throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION,
"Incorrect number of parameters for aggregate function {}, should be 0, 1 or 2, got: {}", name, parameters.size());
if (!parameters.empty())
{
auto type = parameters[0].getType();
if (type != Field::Types::String)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First parameter for aggregate function {} should be string", name);
delimiter = parameters[0].get<String>();
}
if (parameters.size() == 2)
{
auto type = parameters[1].getType();
if (type != Field::Types::Int64 && type != Field::Types::UInt64)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter for aggregate function {} should be a positive number", name);
if ((type == Field::Types::Int64 && parameters[1].get<Int64>() <= 0) ||
(type == Field::Types::UInt64 && parameters[1].get<UInt64>() == 0))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter for aggregate function {} should be a positive number, got: {}", name, parameters[1].get<Int64>());
has_limit = true;
limit = parameters[1].get<UInt64>();
}
if (has_limit)
return std::make_shared<GroupConcatImpl</* has_limit= */ true>>(argument_types[0], parameters, limit, delimiter);
else
return std::make_shared<GroupConcatImpl</* has_limit= */ false>>(argument_types[0], parameters, limit, delimiter);
}
}
void registerAggregateFunctionGroupConcat(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
factory.registerFunction("groupConcat", { createAggregateFunctionGroupConcat, properties });
factory.registerAlias("group_concat", "groupConcat", AggregateFunctionFactory::CaseInsensitive);
}
}

View File

@ -19,6 +19,7 @@ void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factor
void registerAggregateFunctionGroupUniqArray(AggregateFunctionFactory &);
void registerAggregateFunctionGroupArrayInsertAt(AggregateFunctionFactory &);
void registerAggregateFunctionGroupArrayIntersect(AggregateFunctionFactory &);
void registerAggregateFunctionGroupConcat(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantile(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileDeterministic(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileExact(AggregateFunctionFactory &);
@ -120,6 +121,7 @@ void registerAggregateFunctions()
registerAggregateFunctionGroupUniqArray(factory);
registerAggregateFunctionGroupArrayInsertAt(factory);
registerAggregateFunctionGroupArrayIntersect(factory);
registerAggregateFunctionGroupConcat(factory);
registerAggregateFunctionsQuantile(factory);
registerAggregateFunctionsQuantileDeterministic(factory);
registerAggregateFunctionsQuantileExact(factory);

View File

@ -406,7 +406,7 @@ struct fmt::formatter<DB::Identifier>
}
template <typename FormatContext>
auto format(const DB::Identifier & identifier, FormatContext & ctx)
auto format(const DB::Identifier & identifier, FormatContext & ctx) const
{
return fmt::format_to(ctx.out(), "{}", identifier.getFullName());
}
@ -428,7 +428,7 @@ struct fmt::formatter<DB::IdentifierView>
}
template <typename FormatContext>
auto format(const DB::IdentifierView & identifier_view, FormatContext & ctx)
auto format(const DB::IdentifierView & identifier_view, FormatContext & ctx) const
{
return fmt::format_to(ctx.out(), "{}", identifier_view.getFullName());
}

View File

@ -41,9 +41,9 @@ public:
return;
bool replaced_argument = false;
auto & uniq_function_arguments_nodes = function_node->getArguments().getNodes();
auto replaced_uniq_function_arguments_nodes = function_node->getArguments().getNodes();
for (auto & uniq_function_argument_node : uniq_function_arguments_nodes)
for (auto & uniq_function_argument_node : replaced_uniq_function_arguments_nodes)
{
auto * uniq_function_argument_node_typed = uniq_function_argument_node->as<FunctionNode>();
if (!uniq_function_argument_node_typed || !uniq_function_argument_node_typed->isOrdinaryFunction())
@ -67,12 +67,10 @@ public:
if (!replaced_argument)
return;
const auto & function_node_argument_nodes = function_node->getArguments().getNodes();
DataTypes argument_types;
argument_types.reserve(function_node_argument_nodes.size());
argument_types.reserve(replaced_uniq_function_arguments_nodes.size());
for (const auto & function_node_argument : function_node_argument_nodes)
for (const auto & function_node_argument : replaced_uniq_function_arguments_nodes)
argument_types.emplace_back(function_node_argument->getResultType());
AggregateFunctionProperties properties;
@ -83,6 +81,11 @@ public:
function_node->getAggregateFunction()->getParameters(),
properties);
/// uniqCombined returns nullable with nullable arguments so the result type might change which breaks the pass
if (!aggregate_function->getResultType()->equals(*function_node->getAggregateFunction()->getResultType()))
return;
function_node->getArguments().getNodes() = replaced_uniq_function_arguments_nodes;
function_node->resolveAsAggregateFunction(std::move(aggregate_function));
}
};

View File

@ -985,18 +985,18 @@ std::string QueryAnalyzer::rewriteAggregateFunctionNameIfNeeded(
{
result_aggregate_function_name = settings.count_distinct_implementation;
}
else if (aggregate_function_name_lowercase == "countdistinctif" || aggregate_function_name_lowercase == "countifdistinct")
else if (aggregate_function_name_lowercase == "countifdistinct" ||
(settings.rewrite_count_distinct_if_with_count_distinct_implementation && aggregate_function_name_lowercase == "countdistinctif"))
{
result_aggregate_function_name = settings.count_distinct_implementation;
result_aggregate_function_name += "If";
}
/// Replace aggregateFunctionIfDistinct into aggregateFunctionDistinctIf to make execution more optimal
if (result_aggregate_function_name.ends_with("ifdistinct"))
else if (aggregate_function_name_lowercase.ends_with("ifdistinct"))
{
/// Replace aggregateFunctionIfDistinct into aggregateFunctionDistinctIf to make execution more optimal
size_t prefix_length = result_aggregate_function_name.size() - strlen("ifdistinct");
result_aggregate_function_name = result_aggregate_function_name.substr(0, prefix_length) + "DistinctIf";
}
}
bool need_add_or_null = settings.aggregate_functions_null_for_empty && !result_aggregate_function_name.ends_with("OrNull");
if (need_add_or_null)

View File

@ -88,14 +88,10 @@ namespace
std::move(headers),
S3::CredentialsConfiguration
{
settings.auth_settings.use_environment_credentials.value_or(
context->getConfigRef().getBool("s3.use_environment_credentials", true)),
settings.auth_settings.use_insecure_imds_request.value_or(
context->getConfigRef().getBool("s3.use_insecure_imds_request", false)),
settings.auth_settings.expiration_window_seconds.value_or(
context->getConfigRef().getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)),
settings.auth_settings.no_sign_request.value_or(
context->getConfigRef().getBool("s3.no_sign_request", false)),
settings.auth_settings.use_environment_credentials,
settings.auth_settings.use_insecure_imds_request,
settings.auth_settings.expiration_window_seconds,
settings.auth_settings.no_sign_request
});
}
@ -131,12 +127,18 @@ BackupReaderS3::BackupReaderS3(
: BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderS3"))
, s3_uri(s3_uri_)
, data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::S3, MetadataStorageType::None, s3_uri.endpoint, false, false}
, s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName(), /*ignore_user=*/is_internal_backup).value_or(S3Settings{}))
{
auto & request_settings = s3_settings.request_settings;
request_settings.updateFromSettingsIfChanged(context_->getSettingsRef());
request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint
request_settings.allow_native_copy = allow_s3_native_copy;
s3_settings.loadFromConfig(context_->getConfigRef(), "s3", context_->getSettingsRef());
if (auto endpoint_settings = context_->getStorageS3Settings().getSettings(
s3_uri.uri.toString(), context_->getUserName(), /*ignore_user=*/is_internal_backup))
{
s3_settings.updateIfChanged(*endpoint_settings);
}
s3_settings.request_settings.updateFromSettings(context_->getSettingsRef(), /* if_changed */true);
s3_settings.request_settings.allow_native_copy = allow_s3_native_copy;
client = makeS3Client(s3_uri_, access_key_id_, secret_access_key_, s3_settings, context_);
if (auto blob_storage_system_log = context_->getBlobStorageLog())
@ -223,13 +225,19 @@ BackupWriterS3::BackupWriterS3(
: BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterS3"))
, s3_uri(s3_uri_)
, data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::S3, MetadataStorageType::None, s3_uri.endpoint, false, false}
, s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName(), /*ignore_user=*/is_internal_backup).value_or(S3Settings{}))
{
auto & request_settings = s3_settings.request_settings;
request_settings.updateFromSettingsIfChanged(context_->getSettingsRef());
request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint
request_settings.allow_native_copy = allow_s3_native_copy;
request_settings.setStorageClassName(storage_class_name);
s3_settings.loadFromConfig(context_->getConfigRef(), "s3", context_->getSettingsRef());
if (auto endpoint_settings = context_->getStorageS3Settings().getSettings(
s3_uri.uri.toString(), context_->getUserName(), /*ignore_user=*/is_internal_backup))
{
s3_settings.updateIfChanged(*endpoint_settings);
}
s3_settings.request_settings.updateFromSettings(context_->getSettingsRef(), /* if_changed */true);
s3_settings.request_settings.allow_native_copy = allow_s3_native_copy;
s3_settings.request_settings.storage_class_name = storage_class_name;
client = makeS3Client(s3_uri_, access_key_id_, secret_access_key_, s3_settings, context_);
if (auto blob_storage_system_log = context_->getBlobStorageLog())
{

View File

@ -7,7 +7,7 @@
#include <Common/Logger.h>
#include <Disks/DiskType.h>
#include <IO/S3Common.h>
#include <Storages/StorageS3Settings.h>
#include <IO/S3Settings.h>
#include <Interpreters/Context_fwd.h>
#include <IO/S3/BlobStorageLogWriter.h>

View File

@ -44,13 +44,12 @@
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTColumnDeclaration.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/Kusto/ParserKQLStatement.h>
#include <Parsers/PRQL/ParserPRQLQuery.h>
#include <Parsers/Kusto/ParserKQLStatement.h>
#include <Parsers/Kusto/parseKQLQuery.h>
#include <Processors/Formats/Impl/NullFormat.h>
#include <Processors/Formats/IInputFormat.h>
#include <Processors/Formats/IOutputFormat.h>
#include <Processors/QueryPlan/QueryPlan.h>
#include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>

View File

@ -112,7 +112,7 @@ struct fmt::formatter<DB::TestHint::ErrorVector>
}
template <typename FormatContext>
auto format(const DB::TestHint::ErrorVector & ErrorVector, FormatContext & ctx)
auto format(const DB::TestHint::ErrorVector & ErrorVector, FormatContext & ctx) const
{
if (ErrorVector.empty())
return fmt::format_to(ctx.out(), "{}", 0);

View File

@ -51,7 +51,7 @@ public:
std::string getName() const override { return "Nullable(" + nested_column->getName() + ")"; }
TypeIndex getDataType() const override { return TypeIndex::Nullable; }
MutableColumnPtr cloneResized(size_t size) const override;
size_t size() const override { return nested_column->size(); }
size_t size() const override { return assert_cast<const ColumnUInt8 &>(*null_map).size(); }
bool isNullAt(size_t n) const override { return assert_cast<const ColumnUInt8 &>(*null_map).getData()[n] != 0;}
Field operator[](size_t n) const override;
void get(size_t n, Field & res) const override;

View File

@ -23,15 +23,10 @@ struct ConstantFilterDescription
struct IFilterDescription
{
/// has_one can be pre-compute during creating the filter description in some cases
Int64 has_one = -1;
virtual ColumnPtr filter(const IColumn & column, ssize_t result_size_hint) const = 0;
virtual size_t countBytesInFilter() const = 0;
virtual ~IFilterDescription() = default;
bool hasOne() { return has_one >= 0 ? has_one : hasOneImpl();}
protected:
/// Calculate if filter has a non-zero from the filter values, may update has_one
virtual bool hasOneImpl() = 0;
};
/// Obtain a filter from non constant Column, that may have type: UInt8, Nullable(UInt8).
@ -45,7 +40,6 @@ struct FilterDescription final : public IFilterDescription
ColumnPtr filter(const IColumn & column, ssize_t result_size_hint) const override { return column.filter(*data, result_size_hint); }
size_t countBytesInFilter() const override { return DB::countBytesInFilter(*data); }
protected:
bool hasOneImpl() override { return data ? (has_one = !memoryIsZero(data->data(), 0, data->size())) : false; }
};
struct SparseFilterDescription final : public IFilterDescription
@ -56,7 +50,6 @@ struct SparseFilterDescription final : public IFilterDescription
ColumnPtr filter(const IColumn & column, ssize_t) const override { return column.index(*filter_indices, 0); }
size_t countBytesInFilter() const override { return filter_indices->size(); }
protected:
bool hasOneImpl() override { return filter_indices && !filter_indices->empty(); }
};
struct ColumnWithTypeAndName;

View File

@ -415,6 +415,15 @@ Value saveAllArenasMetric(AsynchronousMetricValues & values,
fmt::format("jemalloc.arenas.all.{}", metric_name));
}
template<typename Value>
Value saveJemallocProf(AsynchronousMetricValues & values,
const std::string & metric_name)
{
return saveJemallocMetricImpl<Value>(values,
fmt::format("prof.{}", metric_name),
fmt::format("jemalloc.prof.{}", metric_name));
}
}
#endif
@ -607,6 +616,7 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update)
saveJemallocMetric<size_t>(new_values, "background_thread.num_threads");
saveJemallocMetric<uint64_t>(new_values, "background_thread.num_runs");
saveJemallocMetric<uint64_t>(new_values, "background_thread.run_intervals");
saveJemallocProf<size_t>(new_values, "active");
saveAllArenasMetric<size_t>(new_values, "pactive");
[[maybe_unused]] size_t je_malloc_pdirty = saveAllArenasMetric<size_t>(new_values, "pdirty");
[[maybe_unused]] size_t je_malloc_pmuzzy = saveAllArenasMetric<size_t>(new_values, "pmuzzy");

View File

@ -85,9 +85,18 @@ StatusFile::StatusFile(std::string path_, FillFunction fill_)
/// Write information about current server instance to the file.
WriteBufferFromFileDescriptor out(fd, 1024);
fill(out);
/// Finalize here to avoid throwing exceptions in destructor.
out.finalize();
try
{
fill(out);
/// Finalize here to avoid throwing exceptions in destructor.
out.finalize();
}
catch (...)
{
/// Finalize in case of exception to avoid throwing exceptions in destructor
out.finalize();
throw;
}
}
catch (...)
{

View File

@ -41,21 +41,9 @@ Throttler::Throttler(size_t max_speed_, size_t limit_, const char * limit_exceed
UInt64 Throttler::add(size_t amount)
{
// Values obtained under lock to be checked after release
size_t count_value;
double tokens_value;
{
std::lock_guard lock(mutex);
auto now = clock_gettime_ns_adjusted(prev_ns);
if (max_speed)
{
double delta_seconds = prev_ns ? static_cast<double>(now - prev_ns) / NS : 0;
tokens = std::min<double>(tokens + max_speed * delta_seconds - amount, max_burst);
}
count += amount;
count_value = count;
tokens_value = tokens;
prev_ns = now;
}
size_t count_value = 0;
double tokens_value = 0.0;
addImpl(amount, count_value, tokens_value);
if (limit && count_value > limit)
throw Exception::createDeprecated(limit_exceeded_exception_message + std::string(" Maximum: ") + toString(limit), ErrorCodes::LIMIT_EXCEEDED);
@ -77,6 +65,21 @@ UInt64 Throttler::add(size_t amount)
return static_cast<UInt64>(sleep_time_ns);
}
void Throttler::addImpl(size_t amount, size_t & count_value, double & tokens_value)
{
std::lock_guard lock(mutex);
auto now = clock_gettime_ns_adjusted(prev_ns);
if (max_speed)
{
double delta_seconds = prev_ns ? static_cast<double>(now - prev_ns) / NS : 0;
tokens = std::min<double>(tokens + max_speed * delta_seconds - amount, max_burst);
}
count += amount;
count_value = count;
tokens_value = tokens;
prev_ns = now;
}
void Throttler::reset()
{
std::lock_guard lock(mutex);
@ -98,4 +101,14 @@ bool Throttler::isThrottling() const
return false;
}
Int64 Throttler::getAvailable()
{
// To update bucket state and receive current number of token in a thread-safe way
size_t count_value = 0;
double tokens_value = 0.0;
addImpl(0, count_value, tokens_value);
return static_cast<Int64>(tokens_value);
}
}

View File

@ -57,7 +57,13 @@ public:
/// Is throttler already accumulated some sleep time and throttling.
bool isThrottling() const;
Int64 getAvailable();
UInt64 getMaxSpeed() const { return static_cast<UInt64>(max_speed); }
UInt64 getMaxBurst() const { return static_cast<UInt64>(max_burst); }
private:
void addImpl(size_t amount, size_t & count_value, double & tokens_value);
size_t count{0};
const size_t max_speed{0}; /// in tokens per second.
const size_t max_burst{0}; /// in tokens.

View File

@ -108,7 +108,7 @@ struct fmt::formatter<DB::TransactionID>
}
template<typename FormatContext>
auto format(const DB::TransactionID & tid, FormatContext & context)
auto format(const DB::TransactionID & tid, FormatContext & context) const
{
return fmt::format_to(context.out(), "({}, {}, {})", tid.start_csn, tid.local_tid, tid.host_id);
}

View File

@ -647,7 +647,7 @@ public:
template <> struct fmt::formatter<Coordination::Error> : fmt::formatter<std::string_view>
{
constexpr auto format(Coordination::Error code, auto & ctx)
constexpr auto format(Coordination::Error code, auto & ctx) const
{
return formatter<string_view>::format(Coordination::errorMessage(code), ctx);
}

View File

@ -49,7 +49,7 @@ struct fmt::formatter<ReadableSize>
}
template <typename FormatContext>
auto format(const ReadableSize & size, FormatContext & ctx)
auto format(const ReadableSize & size, FormatContext & ctx) const
{
return fmt::format_to(ctx.out(), "{}", formatReadableSizeWithBinarySuffix(size.value));
}

View File

@ -18,6 +18,7 @@
#include <IO/S3/Client.h>
#include <IO/WriteHelpers.h>
#include <IO/copyData.h>
#include <Interpreters/Context.h>
#include <Common/Macros.h>
#include <aws/core/auth/AWSCredentials.h>
@ -64,7 +65,8 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo
return;
}
auto auth_settings = S3::AuthSettings::loadFromConfig(config_prefix, config);
const auto & settings = Context::getGlobalContextInstance()->getSettingsRef();
auto auth_settings = S3::AuthSettings(config, settings, config_prefix);
String endpoint = macros->expand(config.getString(config_prefix + ".endpoint"));
auto new_uri = S3::URI{endpoint};
@ -118,10 +120,10 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo
std::move(headers),
S3::CredentialsConfiguration
{
auth_settings.use_environment_credentials.value_or(true),
auth_settings.use_insecure_imds_request.value_or(false),
auth_settings.expiration_window_seconds.value_or(S3::DEFAULT_EXPIRATION_WINDOW_SECONDS),
auth_settings.no_sign_request.value_or(false),
auth_settings.use_environment_credentials,
auth_settings.use_insecure_imds_request,
auth_settings.expiration_window_seconds,
auth_settings.no_sign_request,
},
credentials.GetSessionToken());
@ -154,7 +156,7 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const SnapshotFileInfo & snapsh
if (s3_client == nullptr)
return;
S3Settings::RequestSettings request_settings_1;
S3::RequestSettings request_settings_1;
const auto create_writer = [&](const auto & key)
{
@ -197,7 +199,7 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const SnapshotFileInfo & snapsh
lock_writer.finalize();
// We read back the written UUID, if it's the same we can upload the file
S3Settings::RequestSettings request_settings_2;
S3::RequestSettings request_settings_2;
request_settings_2.max_single_read_retries = 1;
ReadBufferFromS3 lock_reader
{

View File

@ -609,7 +609,10 @@ void KeeperStorage::UncommittedState::commit(int64_t commit_zxid)
uncommitted_auth.pop_front();
if (uncommitted_auth.empty())
session_and_auth.erase(add_auth->session_id);
}
else if (auto * close_session = std::get_if<CloseSessionDelta>(&front_delta.operation))
{
closed_sessions.erase(close_session->session_id);
}
deltas.pop_front();
@ -682,6 +685,10 @@ void KeeperStorage::UncommittedState::rollback(int64_t rollback_zxid)
session_and_auth.erase(add_auth->session_id);
}
}
else if (auto * close_session = std::get_if<CloseSessionDelta>(&delta_it->operation))
{
closed_sessions.erase(close_session->session_id);
}
}
if (delta_it == deltas.rend())
@ -878,6 +885,10 @@ Coordination::Error KeeperStorage::commit(int64_t commit_zxid)
session_and_auth[operation.session_id].emplace_back(std::move(operation.auth_id));
return Coordination::Error::ZOK;
}
else if constexpr (std::same_as<DeltaType, KeeperStorage::CloseSessionDelta>)
{
return Coordination::Error::ZOK;
}
else
{
// shouldn't be called in any process functions
@ -2366,12 +2377,15 @@ void KeeperStorage::preprocessRequest(
ephemerals.erase(session_ephemerals);
}
new_deltas.emplace_back(transaction.zxid, CloseSessionDelta{session_id});
uncommitted_state.closed_sessions.insert(session_id);
new_digest = calculateNodesDigest(new_digest, new_deltas);
return;
}
if (check_acl && !request_processor->checkAuth(*this, session_id, false))
if ((check_acl && !request_processor->checkAuth(*this, session_id, false)) ||
uncommitted_state.closed_sessions.contains(session_id)) // Is session closed but not committed yet
{
uncommitted_state.deltas.emplace_back(new_last_zxid, Coordination::Error::ZNOAUTH);
return;

View File

@ -314,8 +314,13 @@ public:
AuthID auth_id;
};
struct CloseSessionDelta
{
int64_t session_id;
};
using Operation = std::
variant<CreateNodeDelta, RemoveNodeDelta, UpdateNodeDelta, SetACLDelta, AddAuthDelta, ErrorDelta, SubDeltaEnd, FailedMultiDelta>;
variant<CreateNodeDelta, RemoveNodeDelta, UpdateNodeDelta, SetACLDelta, AddAuthDelta, ErrorDelta, SubDeltaEnd, FailedMultiDelta, CloseSessionDelta>;
struct Delta
{
@ -351,6 +356,7 @@ public:
std::shared_ptr<Node> tryGetNodeFromStorage(StringRef path) const;
std::unordered_map<int64_t, std::list<const AuthID *>> session_and_auth;
std::unordered_set<int64_t> closed_sessions;
struct UncommittedNode
{

View File

@ -57,7 +57,7 @@ using ClusterUpdateActions = std::vector<ClusterUpdateAction>;
template <>
struct fmt::formatter<DB::RaftServerConfig> : fmt::formatter<string_view>
{
constexpr auto format(const DB::RaftServerConfig & server, format_context & ctx)
constexpr auto format(const DB::RaftServerConfig & server, format_context & ctx) const
{
return fmt::format_to(
ctx.out(), "server.{}={};{};{}", server.id, server.endpoint, server.learner ? "learner" : "participant", server.priority);
@ -67,7 +67,7 @@ struct fmt::formatter<DB::RaftServerConfig> : fmt::formatter<string_view>
template <>
struct fmt::formatter<DB::ClusterUpdateAction> : fmt::formatter<string_view>
{
constexpr auto format(const DB::ClusterUpdateAction & action, format_context & ctx)
constexpr auto format(const DB::ClusterUpdateAction & action, format_context & ctx) const
{
if (const auto * add = std::get_if<DB::AddRaftServer>(&action))
return fmt::format_to(ctx.out(), "(Add server {})", add->id);

View File

@ -5,7 +5,7 @@
#include <Common/ThreadPool.h>
#include <Common/callOnce.h>
#include <Disks/IO/IOUringReader.h>
#include <Storages/StorageS3Settings.h>
#include <IO/S3Settings.h>
#include <Disks/IO/getIOUringReader.h>
#include <Core/ServerSettings.h>
@ -146,7 +146,7 @@ struct ContextSharedPart : boost::noncopyable
mutable ThrottlerPtr local_read_throttler; /// A server-wide throttler for local IO reads
mutable ThrottlerPtr local_write_throttler; /// A server-wide throttler for local IO writes
std::optional<StorageS3Settings> storage_s3_settings TSA_GUARDED_BY(mutex); /// Settings of S3 storage
std::optional<S3SettingsByEndpoint> storage_s3_settings TSA_GUARDED_BY(mutex); /// Settings of S3 storage
mutable std::mutex keeper_dispatcher_mutex;
mutable std::shared_ptr<KeeperDispatcher> keeper_dispatcher TSA_GUARDED_BY(keeper_dispatcher_mutex);
@ -455,14 +455,14 @@ std::shared_ptr<zkutil::ZooKeeper> Context::getZooKeeper() const
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Cannot connect to ZooKeeper from Keeper");
}
const StorageS3Settings & Context::getStorageS3Settings() const
const S3SettingsByEndpoint & Context::getStorageS3Settings() const
{
std::lock_guard lock(shared->mutex);
if (!shared->storage_s3_settings)
{
const auto & config = shared->config ? *shared->config : Poco::Util::Application::instance().config();
shared->storage_s3_settings.emplace().loadFromConfig("s3", config, getSettingsRef());
shared->storage_s3_settings.emplace().loadFromConfig(config, "s3", getSettingsRef());
}
return *shared->storage_s3_settings;

View File

@ -37,7 +37,7 @@ class FilesystemCacheLog;
class FilesystemReadPrefetchesLog;
class BlobStorageLog;
class IOUringReader;
class StorageS3Settings;
class S3SettingsByEndpoint;
/// A small class which owns ContextShared.
/// We don't use something like unique_ptr directly to allow ContextShared type to be incomplete.
@ -130,7 +130,8 @@ public:
enum class ApplicationType : uint8_t
{
KEEPER
KEEPER,
SERVER,
};
void setApplicationType(ApplicationType) {}
@ -163,7 +164,7 @@ public:
zkutil::ZooKeeperPtr getZooKeeper() const;
const StorageS3Settings & getStorageS3Settings() const;
const S3SettingsByEndpoint & getStorageS3Settings() const;
const String & getUserName() const { static std::string user; return user; }

View File

@ -2019,6 +2019,67 @@ TEST_P(CoordinationTest, TestCreateNodeWithAuthSchemeForAclWhenAuthIsPrecommitte
EXPECT_EQ(acls[0].permissions, 31);
}
TEST_P(CoordinationTest, TestPreprocessWhenCloseSessionIsPrecommitted)
{
using namespace Coordination;
using namespace DB;
ChangelogDirTest snapshots("./snapshots");
setSnapshotDirectory("./snapshots");
ResponsesQueue queue(std::numeric_limits<size_t>::max());
SnapshotsQueue snapshots_queue{1};
int64_t session_id = 1;
size_t term = 0;
auto state_machine = std::make_shared<KeeperStateMachine>(queue, snapshots_queue, keeper_context, nullptr);
state_machine->init();
auto & storage = state_machine->getStorageUnsafe();
const auto & uncommitted_state = storage.uncommitted_state;
// Create first node for the session
String node_path_1 = "/node_1";
std::shared_ptr<ZooKeeperCreateRequest> create_req_1 = std::make_shared<ZooKeeperCreateRequest>();
create_req_1->path = node_path_1;
auto create_entry_1 = getLogEntryFromZKRequest(term, session_id, state_machine->getNextZxid(), create_req_1);
state_machine->pre_commit(1, create_entry_1->get_buf());
EXPECT_TRUE(uncommitted_state.nodes.contains(node_path_1));
state_machine->commit(1, create_entry_1->get_buf());
EXPECT_TRUE(storage.container.contains(node_path_1));
// Close session
std::shared_ptr<ZooKeeperCloseRequest> close_req = std::make_shared<ZooKeeperCloseRequest>();
auto close_entry = getLogEntryFromZKRequest(term, session_id, state_machine->getNextZxid(), close_req);
// Pre-commit close session
state_machine->pre_commit(2, close_entry->get_buf());
// Try to create second node after close session is pre-committed
String node_path_2 = "/node_2";
std::shared_ptr<ZooKeeperCreateRequest> create_req_2 = std::make_shared<ZooKeeperCreateRequest>();
create_req_2->path = node_path_2;
auto create_entry_2 = getLogEntryFromZKRequest(term, session_id, state_machine->getNextZxid(), create_req_2);
// Pre-commit creating second node
state_machine->pre_commit(3, create_entry_2->get_buf());
// Second node wasn't created
EXPECT_FALSE(uncommitted_state.nodes.contains(node_path_2));
// Rollback pre-committed closing session
state_machine->rollback(3, create_entry_2->get_buf());
state_machine->rollback(2, close_entry->get_buf());
// Pre-commit creating second node
state_machine->pre_commit(2, create_entry_2->get_buf());
// Now second node was created
EXPECT_TRUE(uncommitted_state.nodes.contains(node_path_2));
state_machine->commit(2, create_entry_2->get_buf());
EXPECT_TRUE(storage.container.contains(node_path_1));
EXPECT_TRUE(storage.container.contains(node_path_2));
}
TEST_P(CoordinationTest, TestSetACLWithAuthSchemeForAclWhenAuthIsPrecommitted)
{
using namespace Coordination;

View File

@ -108,6 +108,7 @@ public:
public:
const String & getName() const;
Field getValue() const;
void setValue(const Field & value);
Field getDefaultValue() const;
String getValueString() const;
String getDefaultValueString() const;
@ -122,10 +123,10 @@ public:
private:
friend class BaseSettings;
const BaseSettings * settings;
BaseSettings * settings;
const typename Traits::Accessor * accessor;
size_t index;
std::conditional_t<Traits::allow_custom_settings, const CustomSettingMap::mapped_type*, boost::blank> custom_setting;
std::conditional_t<Traits::allow_custom_settings, CustomSettingMap::mapped_type*, boost::blank> custom_setting;
};
enum SkipFlags
@ -144,35 +145,50 @@ public:
Iterator & operator++();
Iterator operator++(int); /// NOLINT
const SettingFieldRef & operator *() const { return field_ref; }
SettingFieldRef & operator *() { return field_ref; }
bool operator ==(const Iterator & other) const;
bool operator !=(const Iterator & other) const { return !(*this == other); }
private:
friend class BaseSettings;
Iterator(const BaseSettings & settings_, const typename Traits::Accessor & accessor_, SkipFlags skip_flags_);
Iterator(BaseSettings & settings_, const typename Traits::Accessor & accessor_, SkipFlags skip_flags_);
void doSkip();
void setPointerToCustomSetting();
SettingFieldRef field_ref;
std::conditional_t<Traits::allow_custom_settings, CustomSettingMap::const_iterator, boost::blank> custom_settings_iterator;
std::conditional_t<Traits::allow_custom_settings, CustomSettingMap::iterator, boost::blank> custom_settings_iterator;
SkipFlags skip_flags;
};
class Range
{
public:
Range(const BaseSettings & settings_, SkipFlags skip_flags_) : settings(settings_), accessor(Traits::Accessor::instance()), skip_flags(skip_flags_) {}
Range(BaseSettings & settings_, SkipFlags skip_flags_) : settings(settings_), accessor(Traits::Accessor::instance()), skip_flags(skip_flags_) {}
Iterator begin() const { return Iterator(settings, accessor, skip_flags); }
Iterator end() const { return Iterator(settings, accessor, SKIP_ALL); }
private:
const BaseSettings & settings;
BaseSettings & settings;
const typename Traits::Accessor & accessor;
SkipFlags skip_flags;
};
Range all(SkipFlags skip_flags = SKIP_NONE) const { return Range{*this, skip_flags}; }
class MutableRange
{
public:
MutableRange(BaseSettings & settings_, SkipFlags skip_flags_) : settings(settings_), accessor(Traits::Accessor::instance()), skip_flags(skip_flags_) {}
Iterator begin() { return Iterator(settings, accessor, skip_flags); }
Iterator end() { return Iterator(settings, accessor, SKIP_ALL); }
private:
BaseSettings & settings;
const typename Traits::Accessor & accessor;
SkipFlags skip_flags;
};
Range all(SkipFlags skip_flags = SKIP_NONE) const { return Range{const_cast<BaseSettings<Traits> &>(*this), skip_flags}; }
MutableRange allMutable(SkipFlags skip_flags = SKIP_NONE) { return MutableRange{*this, skip_flags}; }
Range allChanged() const { return all(SKIP_UNCHANGED); }
Range allUnchanged() const { return all(SKIP_CHANGED); }
Range allBuiltin() const { return all(SKIP_CUSTOM); }
@ -608,7 +624,7 @@ const SettingFieldCustom * BaseSettings<TTraits>::tryGetCustomSetting(std::strin
}
template <typename TTraits>
BaseSettings<TTraits>::Iterator::Iterator(const BaseSettings & settings_, const typename Traits::Accessor & accessor_, SkipFlags skip_flags_)
BaseSettings<TTraits>::Iterator::Iterator(BaseSettings & settings_, const typename Traits::Accessor & accessor_, SkipFlags skip_flags_)
: skip_flags(skip_flags_)
{
field_ref.settings = &settings_;
@ -741,6 +757,18 @@ Field BaseSettings<TTraits>::SettingFieldRef::getValue() const
return accessor->getValue(*settings, index);
}
template <typename TTraits>
void BaseSettings<TTraits>::SettingFieldRef::setValue(const Field & value)
{
if constexpr (Traits::allow_custom_settings)
{
if (custom_setting)
custom_setting->second = value;
}
else
accessor->setValue(*settings, index, value);
}
template <typename TTraits>
Field BaseSettings<TTraits>::SettingFieldRef::getDefaultValue() const
{

View File

@ -1038,7 +1038,7 @@ struct fmt::formatter<DB::Field>
}
template <typename FormatContext>
auto format(const DB::Field & x, FormatContext & ctx)
auto format(const DB::Field & x, FormatContext & ctx) const
{
return fmt::format_to(ctx.out(), "{}", toString(x));
}

View File

@ -125,7 +125,7 @@ namespace fmt
}
template <typename FormatContext>
auto format(const DB::QualifiedTableName & name, FormatContext & ctx)
auto format(const DB::QualifiedTableName & name, FormatContext & ctx) const
{
return fmt::format_to(ctx.out(), "{}.{}", DB::backQuoteIfNeed(name.database), DB::backQuoteIfNeed(name.table));
}

View File

@ -148,6 +148,8 @@ namespace DB
M(UInt64, global_profiler_real_time_period_ns, 0, "Period for real clock timer of global profiler (in nanoseconds). Set 0 value to turn off the real clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \
M(UInt64, global_profiler_cpu_time_period_ns, 0, "Period for CPU clock timer of global profiler (in nanoseconds). Set 0 value to turn off the CPU clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \
M(Bool, enable_azure_sdk_logging, false, "Enables logging from Azure sdk", 0) \
M(String, merge_workload, "default", "Name of workload to be used to access resources for all merges (may be overridden by a merge tree setting)", 0) \
M(String, mutation_workload, "default", "Name of workload to be used to access resources for all mutations (may be overridden by a merge tree setting)", 0) \
M(Double, gwp_asan_force_sample_probability, 0, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \
/// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp

View File

@ -1,4 +1,5 @@
#include <Core/ServerUUID.h>
#include <Interpreters/Context.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/ReadHelpers.h>
@ -11,6 +12,16 @@ namespace DB
namespace ErrorCodes
{
extern const int CANNOT_CREATE_FILE;
extern const int LOGICAL_ERROR;
}
UUID ServerUUID::get()
{
if (server_uuid == UUIDHelpers::Nil &&
(Context::getGlobalContextInstance()->getApplicationType() == Context::ApplicationType::SERVER ||
Context::getGlobalContextInstance()->getApplicationType() == Context::ApplicationType::KEEPER))
throw Exception(ErrorCodes::LOGICAL_ERROR, "ServerUUID is not initialized yet");
return server_uuid;
}
void ServerUUID::load(const fs::path & server_uuid_file, Poco::Logger * log)
@ -57,4 +68,9 @@ UUID loadServerUUID(const fs::path & server_uuid_file, Poco::Logger * log)
}
}
void ServerUUID::setRandomForUnitTests()
{
server_uuid = UUIDHelpers::generateV4();
}
}

View File

@ -15,10 +15,12 @@ class ServerUUID
public:
/// Returns persistent UUID of current clickhouse-server or clickhouse-keeper instance.
static UUID get() { return server_uuid; }
static UUID get();
/// Loads server UUID from file or creates new one. Should be called on daemon startup.
static void load(const fs::path & server_uuid_file, Poco::Logger * log);
static void setRandomForUnitTests();
};
UUID loadServerUUID(const fs::path & server_uuid_file, Poco::Logger * log);

View File

@ -5,6 +5,7 @@
#include <Core/SettingsEnums.h>
#include <Core/Defines.h>
#include <IO/ReadSettings.h>
#include <IO/S3Defines.h>
#include <base/unit.h>
@ -78,34 +79,36 @@ class IColumn;
M(UInt64, idle_connection_timeout, 3600, "Close idle TCP connections after specified number of seconds.", 0) \
M(UInt64, distributed_connections_pool_size, 1024, "Maximum number of connections with one remote server in the pool.", 0) \
M(UInt64, connections_with_failover_max_tries, 3, "The maximum number of attempts to connect to replicas.", 0) \
M(UInt64, s3_strict_upload_part_size, 0, "The exact size of part to upload during multipart upload to S3 (some implementations does not supports variable size parts).", 0) \
M(UInt64, s3_strict_upload_part_size, S3::DEFAULT_STRICT_UPLOAD_PART_SIZE, "The exact size of part to upload during multipart upload to S3 (some implementations does not supports variable size parts).", 0) \
M(UInt64, azure_strict_upload_part_size, 0, "The exact size of part to upload during multipart upload to Azure blob storage.", 0) \
M(UInt64, azure_max_blocks_in_multipart_upload, 50000, "Maximum number of blocks in multipart upload for Azure.", 0) \
M(UInt64, s3_min_upload_part_size, 16*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \
M(UInt64, s3_max_upload_part_size, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to S3.", 0) \
M(UInt64, s3_min_upload_part_size, S3::DEFAULT_MIN_UPLOAD_PART_SIZE, "The minimum size of part to upload during multipart upload to S3.", 0) \
M(UInt64, s3_max_upload_part_size, S3::DEFAULT_MAX_UPLOAD_PART_SIZE, "The maximum size of part to upload during multipart upload to S3.", 0) \
M(UInt64, azure_min_upload_part_size, 16*1024*1024, "The minimum size of part to upload during multipart upload to Azure blob storage.", 0) \
M(UInt64, azure_max_upload_part_size, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to Azure blob storage.", 0) \
M(UInt64, s3_upload_part_size_multiply_factor, 2, "Multiply s3_min_upload_part_size by this factor each time s3_multiply_parts_count_threshold parts were uploaded from a single write to S3.", 0) \
M(UInt64, s3_upload_part_size_multiply_parts_count_threshold, 500, "Each time this number of parts was uploaded to S3, s3_min_upload_part_size is multiplied by s3_upload_part_size_multiply_factor.", 0) \
M(UInt64, s3_upload_part_size_multiply_factor, S3::DEFAULT_UPLOAD_PART_SIZE_MULTIPLY_FACTOR, "Multiply s3_min_upload_part_size by this factor each time s3_multiply_parts_count_threshold parts were uploaded from a single write to S3.", 0) \
M(UInt64, s3_upload_part_size_multiply_parts_count_threshold, S3::DEFAULT_UPLOAD_PART_SIZE_MULTIPLY_PARTS_COUNT_THRESHOLD, "Each time this number of parts was uploaded to S3, s3_min_upload_part_size is multiplied by s3_upload_part_size_multiply_factor.", 0) \
M(UInt64, s3_max_part_number, S3::DEFAULT_MAX_PART_NUMBER, "Maximum part number number for s3 upload part.", 0) \
M(UInt64, s3_max_single_operation_copy_size, S3::DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE, "Maximum size for a single copy operation in s3", 0) \
M(UInt64, azure_upload_part_size_multiply_factor, 2, "Multiply azure_min_upload_part_size by this factor each time azure_multiply_parts_count_threshold parts were uploaded from a single write to Azure blob storage.", 0) \
M(UInt64, azure_upload_part_size_multiply_parts_count_threshold, 500, "Each time this number of parts was uploaded to Azure blob storage, azure_min_upload_part_size is multiplied by azure_upload_part_size_multiply_factor.", 0) \
M(UInt64, s3_max_inflight_parts_for_one_file, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited.", 0) \
M(UInt64, s3_max_inflight_parts_for_one_file, S3::DEFAULT_MAX_INFLIGHT_PARTS_FOR_ONE_FILE, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited.", 0) \
M(UInt64, azure_max_inflight_parts_for_one_file, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited.", 0) \
M(UInt64, s3_max_single_part_upload_size, 32*1024*1024, "The maximum size of object to upload using singlepart upload to S3.", 0) \
M(UInt64, s3_max_single_part_upload_size, S3::DEFAULT_MAX_SINGLE_PART_UPLOAD_SIZE, "The maximum size of object to upload using singlepart upload to S3.", 0) \
M(UInt64, azure_max_single_part_upload_size, 100*1024*1024, "The maximum size of object to upload using singlepart upload to Azure blob storage.", 0) \
M(UInt64, azure_max_single_part_copy_size, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage.", 0) \
M(UInt64, s3_max_single_read_retries, 4, "The maximum number of retries during single S3 read.", 0) \
M(UInt64, s3_max_single_read_retries, S3::DEFAULT_MAX_SINGLE_READ_TRIES, "The maximum number of retries during single S3 read.", 0) \
M(UInt64, azure_max_single_read_retries, 4, "The maximum number of retries during single Azure blob storage read.", 0) \
M(UInt64, azure_max_unexpected_write_error_retries, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write", 0) \
M(UInt64, s3_max_unexpected_write_error_retries, 4, "The maximum number of retries in case of unexpected errors during S3 write.", 0) \
M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \
M(UInt64, s3_max_connections, 1024, "The maximum number of connections per server.", 0) \
M(UInt64, s3_max_unexpected_write_error_retries, S3::DEFAULT_MAX_UNEXPECTED_WRITE_ERROR_RETRIES, "The maximum number of retries in case of unexpected errors during S3 write.", 0) \
M(UInt64, s3_max_redirects, S3::DEFAULT_MAX_REDIRECTS, "Max number of S3 redirects hops allowed.", 0) \
M(UInt64, s3_max_connections, S3::DEFAULT_MAX_CONNECTIONS, "The maximum number of connections per server.", 0) \
M(UInt64, s3_max_get_rps, 0, "Limit on S3 GET request per second rate before throttling. Zero means unlimited.", 0) \
M(UInt64, s3_max_get_burst, 0, "Max number of requests that can be issued simultaneously before hitting request per second limit. By default (0) equals to `s3_max_get_rps`", 0) \
M(UInt64, s3_max_put_rps, 0, "Limit on S3 PUT request per second rate before throttling. Zero means unlimited.", 0) \
M(UInt64, s3_max_put_burst, 0, "Max number of requests that can be issued simultaneously before hitting request per second limit. By default (0) equals to `s3_max_put_rps`", 0) \
M(UInt64, s3_list_object_keys_size, 1000, "Maximum number of files that could be returned in batch by ListObject request", 0) \
M(Bool, s3_use_adaptive_timeouts, true, "When adaptive timeouts are enabled first two attempts are made with low receive and send timeout", 0) \
M(UInt64, s3_list_object_keys_size, S3::DEFAULT_LIST_OBJECT_KEYS_SIZE, "Maximum number of files that could be returned in batch by ListObject request", 0) \
M(Bool, s3_use_adaptive_timeouts, S3::DEFAULT_USE_ADAPTIVE_TIMEOUTS, "When adaptive timeouts are enabled first two attempts are made with low receive and send timeout", 0) \
M(UInt64, azure_list_object_keys_size, 1000, "Maximum number of files that could be returned in batch by ListObject request", 0) \
M(Bool, s3_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables.", 0) \
M(Bool, azure_truncate_on_insert, false, "Enables or disables truncate before insert in azure engine tables.", 0) \
@ -122,10 +125,10 @@ class IColumn;
M(Bool, hdfs_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in HDFS table engine", 0) \
M(Bool, azure_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in AzureBlobStorage table engine", 0) \
M(Bool, s3_validate_request_settings, true, "Validate S3 request settings", 0) \
M(Bool, s3_disable_checksum, false, "Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.", 0) \
M(UInt64, s3_retry_attempts, 100, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \
M(UInt64, s3_request_timeout_ms, 30000, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \
M(UInt64, s3_connect_timeout_ms, 1000, "Connection timeout for host from s3 disks.", 0) \
M(Bool, s3_disable_checksum, S3::DEFAULT_DISABLE_CHECKSUM, "Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.", 0) \
M(UInt64, s3_retry_attempts, S3::DEFAULT_RETRY_ATTEMPTS, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \
M(UInt64, s3_request_timeout_ms, S3::DEFAULT_REQUEST_TIMEOUT_MS, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \
M(UInt64, s3_connect_timeout_ms, S3::DEFAULT_CONNECT_TIMEOUT_MS, "Connection timeout for host from s3 disks.", 0) \
M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Makes sense for debug only.", 0) \
M(String, s3queue_default_zookeeper_path, "/clickhouse/s3queue/", "Default zookeeper path prefix for S3Queue engine", 0) \
M(Bool, s3queue_enable_logging_to_s3queue_log, false, "Enable writing to system.s3queue_log. The value can be overwritten per table with table settings", 0) \
@ -398,7 +401,7 @@ class IColumn;
M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer.", 0) \
M(Bool, analyzer_compatibility_join_using_top_level_identifier, false, "Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`).", 0) \
M(Bool, prefer_global_in_and_join, false, "If enabled, all IN/JOIN operators will be rewritten as GLOBAL IN/JOIN. It's useful when the to-be-joined tables are only available on the initiator and we need to always scatter their data on-the-fly during distributed processing with the GLOBAL keyword. It's also useful to reduce the need to access the external sources joining external tables.", 0) \
M(Bool, enable_vertical_final, false, "Not recommended. If enable, remove duplicated rows during FINAL by marking rows as deleted and filtering them later instead of merging rows", 0) \
M(Bool, enable_vertical_final, true, "If enable, remove duplicated rows during FINAL by marking rows as deleted and filtering them later instead of merging rows", 0) \
\
\
/** Limits during query execution are part of the settings. \
@ -1009,6 +1012,8 @@ class IColumn;
M(Char, format_csv_delimiter, ',', "The character to be considered as a delimiter in CSV data. If setting with a string, a string has to have a length of 1.", 0) \
M(Bool, format_csv_allow_single_quotes, false, "If it is set to true, allow strings in single quotes.", 0) \
M(Bool, format_csv_allow_double_quotes, true, "If it is set to true, allow strings in double quotes.", 0) \
M(Bool, output_format_csv_serialize_tuple_into_separate_columns, true, "If it set to true, then Tuples in CSV format are serialized as separate columns (that is, their nesting in the tuple is lost)", 0) \
M(Bool, input_format_csv_deserialize_separate_columns_into_tuple, true, "If it set to true, then separate columns written in CSV format can be deserialized to Tuple column.", 0) \
M(Bool, output_format_csv_crlf_end_of_line, false, "If it is set true, end of line in CSV format will be \\r\\n instead of \\n.", 0) \
M(Bool, input_format_csv_allow_cr_end_of_line, false, "If it is set true, \\r will be allowed at end of line not followed by \\n", 0) \
M(Bool, input_format_csv_enum_as_number, false, "Treat inserted enum values in CSV formats as enum indices", 0) \
@ -1047,6 +1052,7 @@ class IColumn;
M(UInt64, input_format_max_bytes_to_read_for_schema_inference, 32 * 1024 * 1024, "The maximum bytes of data to read for automatic schema inference", 0) \
M(Bool, input_format_csv_use_best_effort_in_schema_inference, true, "Use some tweaks and heuristics to infer schema in CSV format", 0) \
M(Bool, input_format_csv_try_infer_numbers_from_strings, false, "Try to infer numbers from string fields while schema inference in CSV format", 0) \
M(Bool, input_format_csv_try_infer_strings_from_quoted_tuples, true, "Interpret quoted tuples in the input data as a value of type String.", 0) \
M(Bool, input_format_tsv_use_best_effort_in_schema_inference, true, "Use some tweaks and heuristics to infer schema in TSV format", 0) \
M(Bool, input_format_csv_detect_header, true, "Automatically detect header with names and types in CSV format", 0) \
M(Bool, input_format_csv_allow_whitespace_or_tab_as_delimiter, false, "Allow to use spaces and tabs(\\t) as field delimiter in the CSV strings", 0) \
@ -1138,7 +1144,9 @@ class IColumn;
M(UInt64, output_format_pretty_max_value_width, 10000, "Maximum width of value to display in Pretty formats. If greater - it will be cut.", 0) \
M(UInt64, output_format_pretty_max_value_width_apply_for_single_value, false, "Only cut values (see the `output_format_pretty_max_value_width` setting) when it is not a single value in a block. Otherwise output it entirely, which is useful for the `SHOW CREATE TABLE` query.", 0) \
M(UInt64Auto, output_format_pretty_color, "auto", "Use ANSI escape sequences in Pretty formats. 0 - disabled, 1 - enabled, 'auto' - enabled if a terminal.", 0) \
M(String, output_format_pretty_grid_charset, "UTF-8", "Charset for printing grid borders. Available charsets: ASCII, UTF-8 (default one).", 0) \
M(String, output_format_pretty_grid_charset, "UTF-8", "Charset for printing grid borders. Available charsets: ASCII, UTF-8 (default one).", 0) \
M(UInt64, output_format_pretty_display_footer_column_names, true, "Display column names in the footer if there are 999 or more rows.", 0) \
M(UInt64, output_format_pretty_display_footer_column_names_min_rows, 50, "Sets the minimum threshold value of rows for which to enable displaying column names in the footer. 50 (default)", 0) \
M(UInt64, output_format_parquet_row_group_size, 1000000, "Target row group size in rows.", 0) \
M(UInt64, output_format_parquet_row_group_size_bytes, 512 * 1024 * 1024, "Target row group size in bytes, before compression.", 0) \
M(Bool, output_format_parquet_string_as_string, true, "Use Parquet String type instead of Binary for String columns.", 0) \

View File

@ -96,6 +96,8 @@ static const std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges
{"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"},
{"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"},
{"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"},
{"s3_max_part_number", 10000, 10000, "Maximum part number number for s3 upload part"},
{"s3_max_single_operation_copy_size", 32 * 1024 * 1024, 32 * 1024 * 1024, "Maximum size for a single copy operation in s3"},
{"input_format_parquet_max_block_size", 8192, DEFAULT_BLOCK_SIZE, "Increase block size for parquet reader."},
{"input_format_parquet_prefer_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Average block bytes output by parquet reader."},
{"enable_blob_storage_log", true, true, "Write information about blob storage operations to system.blob_storage_log table"},
@ -103,8 +105,14 @@ static const std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges
{"allow_experimental_statistic", false, false, "Old setting which popped up here being renamed."},
{"allow_statistics_optimize", false, false, "The setting was renamed. The previous name is `allow_statistic_optimize`."},
{"allow_experimental_statistics", false, false, "The setting was renamed. The previous name is `allow_experimental_statistic`."},
{"enable_vertical_final", false, true, "Enable vertical final by default again after fixing bug"},
{"parallel_replicas_custom_key_range_lower", 0, 0, "Add settings to control the range filter when using parallel replicas with dynamic shards"},
{"parallel_replicas_custom_key_range_upper", 0, 0, "Add settings to control the range filter when using parallel replicas with dynamic shards. A value of 0 disables the upper limit"},
{"output_format_pretty_display_footer_column_names", 0, 1, "Add a setting to display column names in the footer if there are many rows. Threshold value is controlled by output_format_pretty_display_footer_column_names_min_rows."},
{"output_format_pretty_display_footer_column_names_min_rows", 0, 50, "Add a setting to control the threshold value for setting output_format_pretty_display_footer_column_names_min_rows. Default 50."},
{"output_format_csv_serialize_tuple_into_separate_columns", true, true, "A new way of how interpret tuples in CSV format was added."},
{"input_format_csv_deserialize_separate_columns_into_tuple", true, true, "A new way of how interpret tuples in CSV format was added."},
{"input_format_csv_try_infer_strings_from_quoted_tuples", true, true, "A new way of how interpret tuples in CSV format was added."},
}},
{"24.5", {{"allow_deprecated_error_prone_window_functions", true, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)"},
{"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."},
@ -172,6 +180,9 @@ static const std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges
{"azure_max_upload_part_size", 5ull*1024*1024*1024, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to Azure blob storage."},
{"azure_upload_part_size_multiply_factor", 2, 2, "Multiply azure_min_upload_part_size by this factor each time azure_multiply_parts_count_threshold parts were uploaded from a single write to Azure blob storage."},
{"azure_upload_part_size_multiply_parts_count_threshold", 500, 500, "Each time this number of parts was uploaded to Azure blob storage, azure_min_upload_part_size is multiplied by azure_upload_part_size_multiply_factor."},
{"output_format_csv_serialize_tuple_into_separate_columns", true, true, "A new way of how interpret tuples in CSV format was added."},
{"input_format_csv_deserialize_separate_columns_into_tuple", true, true, "A new way of how interpret tuples in CSV format was added."},
{"input_format_csv_try_infer_strings_from_quoted_tuples", true, true, "A new way of how interpret tuples in CSV format was added."},
}},
{"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"},
{"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"},

View File

@ -629,7 +629,7 @@ struct fmt::formatter<DB::DataTypePtr>
}
template <typename FormatContext>
auto format(const DB::DataTypePtr & type, FormatContext & ctx)
auto format(const DB::DataTypePtr & type, FormatContext & ctx) const
{
return fmt::format_to(ctx.out(), "{}", type->getName());
}

View File

@ -531,26 +531,98 @@ void SerializationTuple::serializeTextXML(const IColumn & column, size_t row_num
void SerializationTuple::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
WriteBufferFromOwnString wb;
serializeText(column, row_num, wb, settings);
writeCSV(wb.str(), ostr);
if (settings.csv.serialize_tuple_into_separate_columns)
{
for (size_t i = 0; i < elems.size(); ++i)
{
if (i != 0)
writeChar(settings.csv.tuple_delimiter, ostr);
elems[i]->serializeTextCSV(extractElementColumn(column, i), row_num, ostr, settings);
}
}
else
{
WriteBufferFromOwnString wb;
serializeText(column, row_num, wb, settings);
writeCSV(wb.str(), ostr);
}
}
void SerializationTuple::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String s;
readCSV(s, istr, settings.csv);
ReadBufferFromString rb(s);
deserializeText(column, rb, settings, true);
if (settings.csv.deserialize_separate_columns_into_tuple)
{
addElementSafe<void>(elems.size(), column, [&]
{
const size_t size = elems.size();
for (size_t i = 0; i < size; ++i)
{
if (i != 0)
{
skipWhitespaceIfAny(istr);
assertChar(settings.csv.tuple_delimiter, istr);
skipWhitespaceIfAny(istr);
}
auto & element_column = extractElementColumn(column, i);
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(element_column))
SerializationNullable::deserializeNullAsDefaultOrNestedTextCSV(element_column, istr, settings, elems[i]);
else
elems[i]->deserializeTextCSV(element_column, istr, settings);
}
return true;
});
}
else
{
String s;
readCSV(s, istr, settings.csv);
ReadBufferFromString rb(s);
deserializeText(column, rb, settings, true);
}
}
bool SerializationTuple::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String s;
if (!tryReadCSV(s, istr, settings.csv))
return false;
ReadBufferFromString rb(s);
return tryDeserializeText(column, rb, settings, true);
if (settings.csv.deserialize_separate_columns_into_tuple)
{
return addElementSafe<bool>(elems.size(), column, [&]
{
const size_t size = elems.size();
for (size_t i = 0; i < size; ++i)
{
if (i != 0)
{
skipWhitespaceIfAny(istr);
if (!checkChar(settings.csv.tuple_delimiter, istr))
return false;
skipWhitespaceIfAny(istr);
}
auto & element_column = extractElementColumn(column, i);
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(element_column))
{
if (!SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextCSV(element_column, istr, settings, elems[i]))
return false;
}
else
{
if (!elems[i]->tryDeserializeTextCSV(element_column, istr, settings))
return false;
}
}
return true;
});
}
else
{
String s;
if (!tryReadCSV(s, istr, settings.csv))
return false;
ReadBufferFromString rb(s);
return tryDeserializeText(column, rb, settings, true);
}
}
struct SerializeBinaryBulkStateTuple : public ISerialization::SerializeBinaryBulkState

View File

@ -146,7 +146,7 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams(
}
/// If we started to read a new column, reinitialize variant column in deserialization state.
if (!variant_element_state->variant || result_column->empty())
if (!variant_element_state->variant || mutable_column->empty())
{
variant_element_state->variant = mutable_column->cloneEmpty();

View File

@ -175,8 +175,7 @@ Columns DirectDictionary<dictionary_key_type>::getColumns(
if (!mask_filled)
(*default_mask)[requested_key_index] = 1;
Field value{};
result_column->insert(value);
result_column->insertDefault();
}
else
{

View File

@ -172,6 +172,14 @@ void checkS3Capabilities(
}
}
static std::string getEndpoint(
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
const ContextPtr & context)
{
return context->getMacros()->expand(config.getString(config_prefix + ".endpoint"));
}
void registerS3ObjectStorage(ObjectStorageFactory & factory)
{
static constexpr auto disk_type = "s3";
@ -185,8 +193,9 @@ void registerS3ObjectStorage(ObjectStorageFactory & factory)
{
auto uri = getS3URI(config, config_prefix, context);
auto s3_capabilities = getCapabilitiesFromConfig(config, config_prefix);
auto settings = getSettings(config, config_prefix, context);
auto client = getClient(config, config_prefix, context, *settings, true);
auto endpoint = getEndpoint(config, config_prefix, context);
auto settings = getSettings(config, config_prefix, context, endpoint, /* validate_settings */true);
auto client = getClient(endpoint, *settings, context, /* for_disk_s3 */true);
auto key_generator = getKeyGenerator(uri, config, config_prefix);
auto object_storage = createObjectStorage<S3ObjectStorage>(
@ -221,8 +230,9 @@ void registerS3PlainObjectStorage(ObjectStorageFactory & factory)
auto uri = getS3URI(config, config_prefix, context);
auto s3_capabilities = getCapabilitiesFromConfig(config, config_prefix);
auto settings = getSettings(config, config_prefix, context);
auto client = getClient(config, config_prefix, context, *settings, true);
auto endpoint = getEndpoint(config, config_prefix, context);
auto settings = getSettings(config, config_prefix, context, endpoint, /* validate_settings */true);
auto client = getClient(endpoint, *settings, context, /* for_disk_s3 */true);
auto key_generator = getKeyGenerator(uri, config, config_prefix);
auto object_storage = std::make_shared<PlainObjectStorage<S3ObjectStorage>>(
@ -255,8 +265,9 @@ void registerS3PlainRewritableObjectStorage(ObjectStorageFactory & factory)
auto uri = getS3URI(config, config_prefix, context);
auto s3_capabilities = getCapabilitiesFromConfig(config, config_prefix);
auto settings = getSettings(config, config_prefix, context);
auto client = getClient(config, config_prefix, context, *settings, true);
auto endpoint = getEndpoint(config, config_prefix, context);
auto settings = getSettings(config, config_prefix, context, endpoint, /* validate_settings */true);
auto client = getClient(endpoint, *settings, context, /* for_disk_s3 */true);
auto key_generator = getKeyGenerator(uri, config, config_prefix);
auto metadata_storage_metrics = DB::MetadataStorageMetrics::create<S3ObjectStorage, MetadataStorageType::PlainRewritable>();

View File

@ -168,7 +168,7 @@ private:
bool S3ObjectStorage::exists(const StoredObject & object) const
{
auto settings_ptr = s3_settings.get();
return S3::objectExists(*client.get(), uri.bucket, object.remote_path, {}, settings_ptr->request_settings);
return S3::objectExists(*client.get(), uri.bucket, object.remote_path, {});
}
std::unique_ptr<ReadBufferFromFileBase> S3ObjectStorage::readObjects( /// NOLINT
@ -258,13 +258,15 @@ std::unique_ptr<WriteBufferFromFileBase> S3ObjectStorage::writeObject( /// NOLIN
if (mode != WriteMode::Rewrite)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 doesn't support append to files");
S3Settings::RequestSettings request_settings = s3_settings.get()->request_settings;
S3::RequestSettings request_settings = s3_settings.get()->request_settings;
/// NOTE: For background operations settings are not propagated from session or query. They are taken from
/// default user's .xml config. It's obscure and unclear behavior. For them it's always better
/// to rely on settings from disk.
if (auto query_context = CurrentThread::getQueryContext(); query_context && !query_context->isBackgroundOperationContext())
if (auto query_context = CurrentThread::getQueryContext();
query_context && !query_context->isBackgroundOperationContext())
{
request_settings.updateFromSettingsIfChanged(query_context->getSettingsRef());
const auto & settings = query_context->getSettingsRef();
request_settings.updateFromSettings(settings, /* if_changed */true, settings.s3_validate_request_settings);
}
ThreadPoolCallbackRunnerUnsafe<void> scheduler;
@ -444,8 +446,7 @@ std::optional<ObjectMetadata> S3ObjectStorage::tryGetObjectMetadata(const std::s
{
auto settings_ptr = s3_settings.get();
auto object_info = S3::getObjectInfo(
*client.get(), uri.bucket, path, {}, settings_ptr->request_settings,
/* with_metadata= */ true, /* throw_on_error= */ false);
*client.get(), uri.bucket, path, {}, /* with_metadata= */ true, /* throw_on_error= */ false);
if (object_info.size == 0 && object_info.last_modification_time == 0 && object_info.metadata.empty())
return {};
@ -464,7 +465,7 @@ ObjectMetadata S3ObjectStorage::getObjectMetadata(const std::string & path) cons
S3::ObjectInfo object_info;
try
{
object_info = S3::getObjectInfo(*client.get(), uri.bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true);
object_info = S3::getObjectInfo(*client.get(), uri.bucket, path, {}, /* with_metadata= */ true);
}
catch (DB::Exception & e)
{
@ -493,7 +494,7 @@ void S3ObjectStorage::copyObjectToAnotherObjectStorage( // NOLINT
{
auto current_client = dest_s3->client.get();
auto settings_ptr = s3_settings.get();
auto size = S3::getObjectSize(*current_client, uri.bucket, object_from.remote_path, {}, settings_ptr->request_settings);
auto size = S3::getObjectSize(*current_client, uri.bucket, object_from.remote_path, {});
auto scheduler = threadPoolCallbackRunnerUnsafe<void>(getThreadPoolWriter(), "S3ObjStor_copy");
try
@ -537,7 +538,7 @@ void S3ObjectStorage::copyObject( // NOLINT
{
auto current_client = client.get();
auto settings_ptr = s3_settings.get();
auto size = S3::getObjectSize(*current_client, uri.bucket, object_from.remote_path, {}, settings_ptr->request_settings);
auto size = S3::getObjectSize(*current_client, uri.bucket, object_from.remote_path, {});
auto scheduler = threadPoolCallbackRunnerUnsafe<void>(getThreadPoolWriter(), "S3ObjStor_copy");
copyS3File(
@ -582,19 +583,22 @@ void S3ObjectStorage::applyNewSettings(
ContextPtr context,
const ApplyNewSettingsOptions & options)
{
auto settings_from_config = getSettings(config, config_prefix, context, context->getSettingsRef().s3_validate_request_settings);
auto settings_from_config = getSettings(config, config_prefix, context, uri.uri_str, context->getSettingsRef().s3_validate_request_settings);
auto modified_settings = std::make_unique<S3ObjectStorageSettings>(*s3_settings.get());
modified_settings->auth_settings.updateFrom(settings_from_config->auth_settings);
modified_settings->request_settings = settings_from_config->request_settings;
modified_settings->auth_settings.updateIfChanged(settings_from_config->auth_settings);
modified_settings->request_settings.updateIfChanged(settings_from_config->request_settings);
if (auto endpoint_settings = context->getStorageS3Settings().getSettings(uri.uri.toString(), context->getUserName()))
modified_settings->auth_settings.updateFrom(endpoint_settings->auth_settings);
{
modified_settings->auth_settings.updateIfChanged(endpoint_settings->auth_settings);
modified_settings->request_settings.updateIfChanged(endpoint_settings->request_settings);
}
auto current_settings = s3_settings.get();
if (options.allow_client_change
&& (current_settings->auth_settings.hasUpdates(modified_settings->auth_settings) || for_disk_s3))
{
auto new_client = getClient(config, config_prefix, context, *modified_settings, for_disk_s3, &uri);
auto new_client = getClient(uri, *modified_settings, context, for_disk_s3);
client.set(std::move(new_client));
}
s3_settings.set(std::move(modified_settings));
@ -606,8 +610,9 @@ std::unique_ptr<IObjectStorage> S3ObjectStorage::cloneObjectStorage(
const std::string & config_prefix,
ContextPtr context)
{
auto new_s3_settings = getSettings(config, config_prefix, context);
auto new_client = getClient(config, config_prefix, context, *new_s3_settings, true);
const auto & settings = context->getSettingsRef();
auto new_s3_settings = getSettings(config, config_prefix, context, uri.uri_str, settings.s3_validate_request_settings);
auto new_client = getClient(uri, *new_s3_settings, context, for_disk_s3);
auto new_uri{uri};
new_uri.bucket = new_namespace;

View File

@ -7,7 +7,7 @@
#include <Disks/ObjectStorages/IObjectStorage.h>
#include <Disks/ObjectStorages/S3/S3Capabilities.h>
#include <memory>
#include <Storages/StorageS3Settings.h>
#include <IO/S3Settings.h>
#include <Common/MultiVersion.h>
#include <Common/ObjectStorageKeyGenerator.h>
@ -20,7 +20,7 @@ struct S3ObjectStorageSettings
S3ObjectStorageSettings() = default;
S3ObjectStorageSettings(
const S3Settings::RequestSettings & request_settings_,
const S3::RequestSettings & request_settings_,
const S3::AuthSettings & auth_settings_,
uint64_t min_bytes_for_seek_,
int32_t list_object_keys_size_,
@ -34,7 +34,7 @@ struct S3ObjectStorageSettings
, read_only(read_only_)
{}
S3Settings::RequestSettings request_settings;
S3::RequestSettings request_settings;
S3::AuthSettings auth_settings;
uint64_t min_bytes_for_seek;

View File

@ -6,6 +6,7 @@
#include <Common/StringUtils.h>
#include <Common/logger_useful.h>
#include <Common/Macros.h>
#include <Common/Throttler.h>
#include <Common/ProxyConfigurationResolverProvider.h>
#include <IO/ReadHelpers.h>
@ -18,18 +19,12 @@
#include <IO/S3Common.h>
#include <IO/S3/Credentials.h>
#include <Storages/StorageS3Settings.h>
#include <IO/S3Settings.h>
#include <Disks/ObjectStorages/S3/S3ObjectStorage.h>
#include <Disks/DiskLocal.h>
#include <Common/Macros.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
namespace ErrorCodes
{
extern const int NO_ELEMENTS_IN_CONFIG;
@ -39,11 +34,16 @@ std::unique_ptr<S3ObjectStorageSettings> getSettings(
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix,
ContextPtr context,
const std::string & endpoint,
bool validate_settings)
{
const Settings & settings = context->getSettingsRef();
auto request_settings = S3Settings::RequestSettings(config, config_prefix, settings, "s3_", validate_settings);
auto auth_settings = S3::AuthSettings::loadFromConfig(config_prefix, config);
const auto & settings = context->getSettingsRef();
auto auth_settings = S3::AuthSettings(config, settings, config_prefix);
auto request_settings = S3::RequestSettings(config, settings, config_prefix, "s3_", validate_settings);
request_settings.proxy_resolver = DB::ProxyConfigurationResolverProvider::getFromOldSettingsFormat(
ProxyConfiguration::protocolFromString(S3::URI(endpoint).uri.getScheme()), config_prefix, config);
return std::make_unique<S3ObjectStorageSettings>(
request_settings,
@ -55,38 +55,33 @@ std::unique_ptr<S3ObjectStorageSettings> getSettings(
}
std::unique_ptr<S3::Client> getClient(
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix,
ContextPtr context,
const std::string & endpoint,
const S3ObjectStorageSettings & settings,
bool for_disk_s3,
const S3::URI * url_)
ContextPtr context,
bool for_disk_s3)
{
auto url = S3::URI(endpoint);
if (!url.key.ends_with('/'))
url.key.push_back('/');
return getClient(url, settings, context, for_disk_s3);
}
std::unique_ptr<S3::Client> getClient(
const S3::URI & url,
const S3ObjectStorageSettings & settings,
ContextPtr context,
bool for_disk_s3)
{
const Settings & global_settings = context->getGlobalContext()->getSettingsRef();
const Settings & local_settings = context->getSettingsRef();
const auto & auth_settings = settings.auth_settings;
const auto & request_settings = settings.request_settings;
S3::URI url;
if (for_disk_s3)
{
String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint"));
url = S3::URI(endpoint);
if (!url.key.ends_with('/'))
url.key.push_back('/');
}
else
{
if (!url_)
throw Exception(ErrorCodes::LOGICAL_ERROR, "URL not passed");
url = *url_;
}
const bool is_s3_express_bucket = S3::isS3ExpressEndpoint(url.endpoint);
if (is_s3_express_bucket && !config.has(config_prefix + ".region"))
if (is_s3_express_bucket && auth_settings.region.value.empty())
{
throw Exception(
ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Region should be explicitly specified for directory buckets ({})", config_prefix);
ErrorCodes::NO_ELEMENTS_IN_CONFIG,
"Region should be explicitly specified for directory buckets");
}
S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration(
@ -96,49 +91,40 @@ std::unique_ptr<S3::Client> getClient(
static_cast<int>(global_settings.s3_retry_attempts),
global_settings.enable_s3_requests_logging,
for_disk_s3,
settings.request_settings.get_request_throttler,
settings.request_settings.put_request_throttler,
request_settings.get_request_throttler,
request_settings.put_request_throttler,
url.uri.getScheme());
client_configuration.connectTimeoutMs = config.getUInt64(config_prefix + ".connect_timeout_ms", local_settings.s3_connect_timeout_ms.value);
client_configuration.requestTimeoutMs = config.getUInt64(config_prefix + ".request_timeout_ms", local_settings.s3_request_timeout_ms.value);
client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", static_cast<unsigned>(request_settings.max_connections));
client_configuration.http_keep_alive_timeout = config.getUInt(config_prefix + ".http_keep_alive_timeout", S3::DEFAULT_KEEP_ALIVE_TIMEOUT);
client_configuration.http_keep_alive_max_requests = config.getUInt(config_prefix + ".http_keep_alive_max_requests", S3::DEFAULT_KEEP_ALIVE_MAX_REQUESTS);
client_configuration.connectTimeoutMs = auth_settings.connect_timeout_ms;
client_configuration.requestTimeoutMs = auth_settings.request_timeout_ms;
client_configuration.maxConnections = static_cast<uint32_t>(auth_settings.max_connections);
client_configuration.http_keep_alive_timeout = auth_settings.http_keep_alive_timeout;
client_configuration.http_keep_alive_max_requests = auth_settings.http_keep_alive_max_requests;
client_configuration.endpointOverride = url.endpoint;
client_configuration.s3_use_adaptive_timeouts = config.getBool(
config_prefix + ".use_adaptive_timeouts", client_configuration.s3_use_adaptive_timeouts);
client_configuration.s3_use_adaptive_timeouts = auth_settings.use_adaptive_timeouts;
if (for_disk_s3)
if (request_settings.proxy_resolver)
{
/*
* Override proxy configuration for backwards compatibility with old configuration format.
* */
if (auto proxy_config = DB::ProxyConfigurationResolverProvider::getFromOldSettingsFormat(
ProxyConfiguration::protocolFromString(url.uri.getScheme()), config_prefix, config))
{
client_configuration.per_request_configuration
= [proxy_config]() { return proxy_config->resolve(); };
client_configuration.error_report
= [proxy_config](const auto & request_config) { proxy_config->errorReport(request_config); };
}
client_configuration.per_request_configuration = [=]() { return request_settings.proxy_resolver->resolve(); };
client_configuration.error_report = [=](const auto & request_config) { request_settings.proxy_resolver->errorReport(request_config); };
}
S3::ServerSideEncryptionKMSConfig sse_kms_config = S3::getSSEKMSConfig(config_prefix, config);
S3::ClientSettings client_settings{
.use_virtual_addressing = url.is_virtual_hosted_style,
.disable_checksum = local_settings.s3_disable_checksum,
.gcs_issue_compose_request = config.getBool("s3.gcs_issue_compose_request", false),
.is_s3express_bucket = is_s3_express_bucket,
.disable_checksum = auth_settings.disable_checksum,
.gcs_issue_compose_request = auth_settings.gcs_issue_compose_request,
};
auto credentials_configuration = S3::CredentialsConfiguration
{
auth_settings.use_environment_credentials.value_or(context->getConfigRef().getBool("s3.use_environment_credentials", true)),
auth_settings.use_insecure_imds_request.value_or(context->getConfigRef().getBool("s3.use_insecure_imds_request", false)),
auth_settings.expiration_window_seconds.value_or(context->getConfigRef().getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)),
auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)),
auth_settings.use_environment_credentials,
auth_settings.use_insecure_imds_request,
auth_settings.expiration_window_seconds,
auth_settings.no_sign_request,
};
return S3::ClientFactory::instance().create(
@ -147,7 +133,7 @@ std::unique_ptr<S3::Client> getClient(
auth_settings.access_key_id,
auth_settings.secret_access_key,
auth_settings.server_side_encryption_customer_key_base64,
std::move(sse_kms_config),
auth_settings.server_side_encryption_kms_config,
auth_settings.headers,
credentials_configuration,
auth_settings.session_token);

View File

@ -18,15 +18,20 @@ std::unique_ptr<S3ObjectStorageSettings> getSettings(
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix,
ContextPtr context,
bool validate_settings = true);
const std::string & endpoint,
bool validate_settings);
std::unique_ptr<S3::Client> getClient(
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix,
ContextPtr context,
const std::string & endpoint,
const S3ObjectStorageSettings & settings,
bool for_disk_s3,
const S3::URI * url_ = nullptr);
ContextPtr context,
bool for_disk_s3);
std::unique_ptr<S3::Client> getClient(
const S3::URI & url_,
const S3ObjectStorageSettings & settings,
ContextPtr context,
bool for_disk_s3);
}

View File

@ -47,7 +47,7 @@ namespace
auto result_disk = context->getOrCreateDisk(disk_name, [&](const DisksMap & disks_map) -> DiskPtr {
auto disk = DiskFactory::instance().create(
disk_name, *config, "", context, disks_map, /* attach */attach, /* custom_disk */true);
disk_name, *config, /* config_path */"", context, disks_map, /* attach */attach, /* custom_disk */true);
/// Mark that disk can be used without storage policy.
disk->markDiskAsCustom();
return disk;

View File

@ -303,7 +303,7 @@ DataTypePtr tryInferDataTypeByEscapingRule(const String & field, const FormatSet
auto type = tryInferDataTypeForSingleField(data, format_settings);
/// If we couldn't infer any type or it's a number and csv.try_infer_numbers_from_strings = 0, we determine it as a string.
if (!type || (isNumber(type) && !format_settings.csv.try_infer_numbers_from_strings))
if (!type || (format_settings.csv.try_infer_strings_from_quoted_tuples && isTuple(type)) || (!format_settings.csv.try_infer_numbers_from_strings && isNumber(type)))
return std::make_shared<DataTypeString>();
return type;

View File

@ -77,6 +77,8 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
format_settings.avro.output_rows_in_file = settings.output_format_avro_rows_in_file;
format_settings.csv.allow_double_quotes = settings.format_csv_allow_double_quotes;
format_settings.csv.allow_single_quotes = settings.format_csv_allow_single_quotes;
format_settings.csv.serialize_tuple_into_separate_columns = settings.output_format_csv_serialize_tuple_into_separate_columns;
format_settings.csv.deserialize_separate_columns_into_tuple = settings.input_format_csv_deserialize_separate_columns_into_tuple;
format_settings.csv.crlf_end_of_line = settings.output_format_csv_crlf_end_of_line;
format_settings.csv.allow_cr_end_of_line = settings.input_format_csv_allow_cr_end_of_line;
format_settings.csv.delimiter = settings.format_csv_delimiter;
@ -94,6 +96,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
format_settings.csv.allow_variable_number_of_columns = settings.input_format_csv_allow_variable_number_of_columns;
format_settings.csv.use_default_on_bad_values = settings.input_format_csv_use_default_on_bad_values;
format_settings.csv.try_infer_numbers_from_strings = settings.input_format_csv_try_infer_numbers_from_strings;
format_settings.csv.try_infer_strings_from_quoted_tuples = settings.input_format_csv_try_infer_strings_from_quoted_tuples;
format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter;
format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter;
format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter;
@ -178,6 +181,8 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
format_settings.pretty.highlight_digit_groups = settings.output_format_pretty_highlight_digit_groups;
format_settings.pretty.output_format_pretty_row_numbers = settings.output_format_pretty_row_numbers;
format_settings.pretty.output_format_pretty_single_large_number_tip_threshold = settings.output_format_pretty_single_large_number_tip_threshold;
format_settings.pretty.output_format_pretty_display_footer_column_names = settings.output_format_pretty_display_footer_column_names;
format_settings.pretty.output_format_pretty_display_footer_column_names_min_rows = settings.output_format_pretty_display_footer_column_names_min_rows;
format_settings.protobuf.input_flatten_google_wrappers = settings.input_format_protobuf_flatten_google_wrappers;
format_settings.protobuf.output_nullables_with_google_wrappers = settings.output_format_protobuf_nullables_with_google_wrappers;
format_settings.protobuf.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference;

View File

@ -153,6 +153,8 @@ struct FormatSettings
char delimiter = ',';
bool allow_single_quotes = true;
bool allow_double_quotes = true;
bool serialize_tuple_into_separate_columns = true;
bool deserialize_separate_columns_into_tuple = true;
bool empty_as_default = false;
bool crlf_end_of_line = false;
bool allow_cr_end_of_line = false;
@ -170,6 +172,7 @@ struct FormatSettings
bool allow_variable_number_of_columns = false;
bool use_default_on_bad_values = false;
bool try_infer_numbers_from_strings = true;
bool try_infer_strings_from_quoted_tuples = true;
} csv{};
struct HiveText
@ -286,6 +289,8 @@ struct FormatSettings
bool output_format_pretty_row_numbers = false;
UInt64 output_format_pretty_single_large_number_tip_threshold = 1'000'000;
UInt64 output_format_pretty_display_footer_column_names = 1;
UInt64 output_format_pretty_display_footer_column_names_min_rows = 50;
enum class Charset : uint8_t
{

View File

@ -12,7 +12,7 @@
# include <Common/MemorySanitizer.h>
# include <cstddef>
# include <span>
# include <string_view>
namespace DB
{
@ -22,36 +22,125 @@ namespace ErrorCodes
extern const int INCORRECT_DATA;
}
enum class Base64Variant : uint8_t
{
Normal,
Url
};
inline std::string preprocessBase64Url(std::string_view src)
{
std::string padded_src;
padded_src.reserve(src.size() + 3);
// Do symbol substitution as described in https://datatracker.ietf.org/doc/html/rfc4648#section-5
for (auto s : src)
{
switch (s)
{
case '_':
padded_src += '/';
break;
case '-':
padded_src += '+';
break;
default:
padded_src += s;
break;
}
}
/// Insert padding to please aklomp library
size_t remainder = src.size() % 4;
switch (remainder)
{
case 0:
break; // no padding needed
case 1:
padded_src.append("==="); // this case is impossible to occur with valid base64-URL encoded input, however, we'll insert padding anyway
break;
case 2:
padded_src.append("=="); // two bytes padding
break;
default: // remainder == 3
padded_src.append("="); // one byte padding
break;
}
return padded_src;
}
inline size_t postprocessBase64Url(UInt8 * dst, size_t out_len)
{
// Do symbol substitution as described in https://datatracker.ietf.org/doc/html/rfc4648#section-5
for (size_t i = 0; i < out_len; ++i)
{
switch (dst[i])
{
case '/':
dst[i] = '_';
break;
case '+':
dst[i] = '-';
break;
case '=': // stop when padding is detected
return i;
default:
break;
}
}
return out_len;
}
template <Base64Variant variant>
struct Base64Encode
{
static constexpr auto name = "base64Encode";
static constexpr auto name = (variant == Base64Variant::Normal) ? "base64Encode" : "base64UrlEncode";
static size_t getBufferSize(size_t string_length, size_t string_count)
{
return ((string_length - string_count) / 3 + string_count) * 4 + string_count;
}
static size_t perform(const std::span<const UInt8> src, UInt8 * dst)
static size_t perform(std::string_view src, UInt8 * dst)
{
size_t outlen = 0;
base64_encode(reinterpret_cast<const char *>(src.data()), src.size(), reinterpret_cast<char *>(dst), &outlen, 0);
base64_encode(src.data(), src.size(), reinterpret_cast<char *>(dst), &outlen, 0);
/// Base64 library is using AVX-512 with some shuffle operations.
/// Memory sanitizer doesn't understand if there was uninitialized memory in SIMD register but it was not used in the result of shuffle.
__msan_unpoison(dst, outlen);
if constexpr (variant == Base64Variant::Url)
outlen = postprocessBase64Url(dst, outlen);
return outlen;
}
};
template <Base64Variant variant>
struct Base64Decode
{
static constexpr auto name = "base64Decode";
static constexpr auto name = (variant == Base64Variant::Normal) ? "base64Decode" : "base64UrlDecode";
static size_t getBufferSize(size_t string_length, size_t string_count)
{
return ((string_length - string_count) / 4 + string_count) * 3 + string_count;
}
static size_t perform(const std::span<const UInt8> src, UInt8 * dst)
static size_t perform(std::string_view src, UInt8 * dst)
{
int rc;
size_t outlen = 0;
int rc = base64_decode(reinterpret_cast<const char *>(src.data()), src.size(), reinterpret_cast<char *>(dst), &outlen, 0);
if constexpr (variant == Base64Variant::Url)
{
std::string src_padded = preprocessBase64Url(src);
rc = base64_decode(src_padded.data(), src_padded.size(), reinterpret_cast<char *>(dst), &outlen, 0);
}
else
{
rc = base64_decode(src.data(), src.size(), reinterpret_cast<char *>(dst), &outlen, 0);
}
if (rc != 1)
throw Exception(
@ -64,19 +153,29 @@ struct Base64Decode
}
};
template <Base64Variant variant>
struct TryBase64Decode
{
static constexpr auto name = "tryBase64Decode";
static constexpr auto name = (variant == Base64Variant::Normal) ? "tryBase64Decode" : "tryBase64UrlDecode";
static size_t getBufferSize(size_t string_length, size_t string_count)
{
return Base64Decode::getBufferSize(string_length, string_count);
return Base64Decode<variant>::getBufferSize(string_length, string_count);
}
static size_t perform(const std::span<const UInt8> src, UInt8 * dst)
static size_t perform(std::string_view src, UInt8 * dst)
{
int rc;
size_t outlen = 0;
int rc = base64_decode(reinterpret_cast<const char *>(src.data()), src.size(), reinterpret_cast<char *>(dst), &outlen, 0);
if constexpr (variant == Base64Variant::Url)
{
std::string src_padded = preprocessBase64Url(src);
rc = base64_decode(src_padded.data(), src_padded.size(), reinterpret_cast<char *>(dst), &outlen, 0);
}
else
{
rc = base64_decode(src.data(), src.size(), reinterpret_cast<char *>(dst), &outlen, 0);
}
if (rc != 1)
outlen = 0;
@ -139,7 +238,7 @@ private:
auto * dst = dst_chars.data();
auto * dst_pos = dst;
const auto * src = src_chars.data();
const auto * src = reinterpret_cast<const char *>(src_chars.data());
size_t src_offset_prev = 0;
for (size_t row = 0; row < src_row_count; ++row)
@ -147,10 +246,6 @@ private:
const size_t src_length = src_offsets[row] - src_offset_prev - 1;
const size_t outlen = Func::perform({src, src_length}, dst_pos);
/// Base64 library is using AVX-512 with some shuffle operations.
/// Memory sanitizer don't understand if there was uninitialized memory in SIMD register but it was not used in the result of shuffle.
__msan_unpoison(dst_pos, outlen);
src += src_length + 1;
dst_pos += outlen;
*dst_pos = '\0';
@ -179,16 +274,12 @@ private:
auto * dst = dst_chars.data();
auto * dst_pos = dst;
const auto * src = src_chars.data();
const auto * src = reinterpret_cast<const char *>(src_chars.data());
for (size_t row = 0; row < src_row_count; ++row)
{
const auto outlen = Func::perform({src, src_n}, dst_pos);
/// Base64 library is using AVX-512 with some shuffle operations.
/// Memory sanitizer don't understand if there was uninitialized memory in SIMD register but it was not used in the result of shuffle.
__msan_unpoison(dst_pos, outlen);
src += src_n;
dst_pos += outlen;
*dst_pos = '\0';

View File

@ -7,7 +7,14 @@ namespace DB
{
REGISTER_FUNCTION(Base64Decode)
{
factory.registerFunction<FunctionBase64Conversion<Base64Decode>>();
FunctionDocumentation::Description description = R"(Accepts a String and decodes it from base64, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-4). Throws an exception in case of an error. Alias: FROM_BASE64.)";
FunctionDocumentation::Syntax syntax = "base64Decode(encoded)";
FunctionDocumentation::Arguments arguments = {{"encoded", "String column or constant. If the string is not a valid Base64-encoded value, an exception is thrown."}};
FunctionDocumentation::ReturnedValue returned_value = "A string containing the decoded value of the argument.";
FunctionDocumentation::Examples examples = {{"Example", "SELECT base64Decode('Y2xpY2tob3VzZQ==')", "clickhouse"}};
FunctionDocumentation::Categories categories = {"String encoding"};
factory.registerFunction<FunctionBase64Conversion<Base64Decode<Base64Variant::Normal>>>({description, syntax, arguments, returned_value, examples, categories});
/// MySQL compatibility alias.
factory.registerAlias("FROM_BASE64", "base64Decode", FunctionFactory::CaseInsensitive);

View File

@ -7,7 +7,14 @@ namespace DB
{
REGISTER_FUNCTION(Base64Encode)
{
factory.registerFunction<FunctionBase64Conversion<Base64Encode>>();
FunctionDocumentation::Description description = R"(Encodes a String as base64, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-4). Alias: TO_BASE64.)";
FunctionDocumentation::Syntax syntax = "base64Encode(plaintext)";
FunctionDocumentation::Arguments arguments = {{"plaintext", "String column or constant."}};
FunctionDocumentation::ReturnedValue returned_value = "A string containing the encoded value of the argument.";
FunctionDocumentation::Examples examples = {{"Example", "SELECT base64Encode('clickhouse')", "Y2xpY2tob3VzZQ=="}};
FunctionDocumentation::Categories categories = {"String encoding"};
factory.registerFunction<FunctionBase64Conversion<Base64Encode<Base64Variant::Normal>>>({description, syntax, arguments, returned_value, examples, categories});
/// MySQL compatibility alias.
factory.registerAlias("TO_BASE64", "base64Encode", FunctionFactory::CaseInsensitive);

View File

@ -0,0 +1,21 @@
#include <Functions/FunctionBase64Conversion.h>
#if USE_BASE64
#include <Functions/FunctionFactory.h>
namespace DB
{
REGISTER_FUNCTION(Base64UrlDecode)
{
FunctionDocumentation::Description description = R"(Accepts a base64-encoded URL and decodes it from base64 with URL-specific modifications, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-5).)";
FunctionDocumentation::Syntax syntax = "base64UrlDecode(encodedUrl)";
FunctionDocumentation::Arguments arguments = {{"encodedUrl", "String column or constant. If the string is not a valid Base64-encoded value, an exception is thrown."}};
FunctionDocumentation::ReturnedValue returned_value = "A string containing the decoded value of the argument.";
FunctionDocumentation::Examples examples = {{"Example", "SELECT base64UrlDecode('aHR0cDovL2NsaWNraG91c2UuY29t')", "https://clickhouse.com"}};
FunctionDocumentation::Categories categories = {"String encoding"};
factory.registerFunction<FunctionBase64Conversion<Base64Decode<Base64Variant::Url>>>({description, syntax, arguments, returned_value, examples, categories});
}
}
#endif

View File

@ -0,0 +1,21 @@
#include <Functions/FunctionBase64Conversion.h>
#if USE_BASE64
#include <Functions/FunctionFactory.h>
namespace DB
{
REGISTER_FUNCTION(Base64UrlEncode)
{
FunctionDocumentation::Description description = R"(Encodes an URL (String or FixedString) as base64 with URL-specific modifications, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-5).)";
FunctionDocumentation::Syntax syntax = "base64UrlEncode(url)";
FunctionDocumentation::Arguments arguments = {{"url", "String column or constant."}};
FunctionDocumentation::ReturnedValue returned_value = "A string containing the encoded value of the argument.";
FunctionDocumentation::Examples examples = {{"Example", "SELECT base64UrlEncode('https://clickhouse.com')", "aHR0cHM6Ly9jbGlja2hvdXNlLmNvbQ"}};
FunctionDocumentation::Categories categories = {"String encoding"};
factory.registerFunction<FunctionBase64Conversion<Base64Encode<Base64Variant::Url>>>({description, syntax, arguments, returned_value, examples, categories});
}
}
#endif

View File

@ -7,7 +7,14 @@ namespace DB
{
REGISTER_FUNCTION(TryBase64Decode)
{
factory.registerFunction<FunctionBase64Conversion<TryBase64Decode>>();
FunctionDocumentation::Description description = R"(Decodes a String or FixedString from base64, like base64Decode but returns an empty string in case of an error.)";
FunctionDocumentation::Syntax syntax = "tryBase64Decode(encoded)";
FunctionDocumentation::Arguments arguments = {{"encoded", "String column or constant. If the string is not a valid Base64-encoded value, returns an empty string."}};
FunctionDocumentation::ReturnedValue returned_value = "A string containing the decoded value of the argument.";
FunctionDocumentation::Examples examples = {{"valid", "SELECT tryBase64Decode('Y2xpY2tob3VzZQ==')", "clickhouse"}, {"invalid", "SELECT tryBase64Decode('invalid')", ""}};
FunctionDocumentation::Categories categories = {"String encoding"};
factory.registerFunction<FunctionBase64Conversion<TryBase64Decode<Base64Variant::Normal>>>({description, syntax, arguments, returned_value, examples, categories});
}
}

View File

@ -0,0 +1,21 @@
#include <Functions/FunctionBase64Conversion.h>
#if USE_BASE64
#include <Functions/FunctionFactory.h>
namespace DB
{
REGISTER_FUNCTION(TryBase64UrlDecode)
{
FunctionDocumentation::Description description = R"(Decodes an URL from base64, like base64UrlDecode but returns an empty string in case of an error.)";
FunctionDocumentation::Syntax syntax = "tryBase64UrlDecode(encodedUrl)";
FunctionDocumentation::Arguments arguments = {{"encodedUrl", "String column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string."}};
FunctionDocumentation::ReturnedValue returned_value = "A string containing the decoded value of the argument.";
FunctionDocumentation::Examples examples = {{"valid", "SELECT tryBase64UrlDecode('aHR0cHM6Ly9jbGlja2hvdXNlLmNvbQ')", "https://clickhouse.com"}, {"invalid", "SELECT tryBase64UrlDecode('aHR0cHM6Ly9jbGlja')", ""}};
FunctionDocumentation::Categories categories = {"String encoding"};
factory.registerFunction<FunctionBase64Conversion<TryBase64Decode<Base64Variant::Url>>>({description, syntax, arguments, returned_value, examples, categories});
}
}
#endif

View File

@ -51,7 +51,7 @@ ReadBufferFromS3::ReadBufferFromS3(
const String & bucket_,
const String & key_,
const String & version_id_,
const S3Settings::RequestSettings & request_settings_,
const S3::RequestSettings & request_settings_,
const ReadSettings & settings_,
bool use_external_buffer_,
size_t offset_,
@ -318,7 +318,7 @@ size_t ReadBufferFromS3::getFileSize()
if (file_size)
return *file_size;
auto object_size = S3::getObjectSize(*client_ptr, bucket, key, version_id, request_settings);
auto object_size = S3::getObjectSize(*client_ptr, bucket, key, version_id);
file_size = object_size;
return *file_size;

View File

@ -1,6 +1,6 @@
#pragma once
#include <Storages/StorageS3Settings.h>
#include <IO/S3Settings.h>
#include "config.h"
#if USE_AWS_S3
@ -28,7 +28,7 @@ private:
String bucket;
String key;
String version_id;
const S3Settings::RequestSettings request_settings;
const S3::RequestSettings request_settings;
/// These variables are atomic because they can be used for `logging only`
/// (where it is not important to get consistent result)
@ -47,7 +47,7 @@ public:
const String & bucket_,
const String & key_,
const String & version_id_,
const S3Settings::RequestSettings & request_settings_,
const S3::RequestSettings & request_settings_,
const ReadSettings & settings_,
bool use_external_buffer = false,
size_t offset_ = 0,

View File

@ -219,6 +219,9 @@ public:
return client_configuration.for_disk_s3;
}
ThrottlerPtr getPutRequestThrottler() const { return client_configuration.put_request_throttler; }
ThrottlerPtr getGetRequestThrottler() const { return client_configuration.get_request_throttler; }
private:
friend struct ::MockS3::Client;

View File

@ -13,18 +13,12 @@
# include <aws/core/auth/bearer-token-provider/SSOBearerTokenProvider.h>
# include <IO/S3/PocoHTTPClient.h>
# include <IO/S3Defines.h>
namespace DB::S3
{
inline static constexpr uint64_t DEFAULT_EXPIRATION_WINDOW_SECONDS = 120;
inline static constexpr uint64_t DEFAULT_CONNECT_TIMEOUT_MS = 1000;
inline static constexpr uint64_t DEFAULT_REQUEST_TIMEOUT_MS = 30000;
inline static constexpr uint64_t DEFAULT_MAX_CONNECTIONS = 100;
inline static constexpr uint64_t DEFAULT_KEEP_ALIVE_TIMEOUT = 5;
inline static constexpr uint64_t DEFAULT_KEEP_ALIVE_MAX_REQUESTS = 100;
/// In GCP metadata service can be accessed via DNS regardless of IPv4 or IPv6.
static inline constexpr char GCP_METADATA_SERVICE_ENDPOINT[] = "http://metadata.google.internal";

View File

@ -535,7 +535,7 @@ void PocoHTTPClient::makeRequestInternalImpl(
const static std::string_view needle = "<Error>";
if (auto it = std::search(response_string.begin(), response_string.end(), std::default_searcher(needle.begin(), needle.end())); it != response_string.end())
{
LOG_WARNING(log, "Response for request contain <Error> tag in body, settings internal server error (500 code)");
LOG_WARNING(log, "Response for the request contains an <Error> tag in the body, will treat it as an internal server error (code 500)");
response->SetResponseCode(Aws::Http::HttpResponseCode::INTERNAL_SERVER_ERROR);
addMetric(request, S3MetricType::Errors);

View File

@ -56,7 +56,7 @@ namespace
const std::shared_ptr<const S3::Client> & client_ptr_,
const String & dest_bucket_,
const String & dest_key_,
const S3Settings::RequestSettings & request_settings_,
const S3::RequestSettings & request_settings_,
const std::optional<std::map<String, String>> & object_metadata_,
ThreadPoolCallbackRunnerUnsafe<void> schedule_,
bool for_disk_s3_,
@ -66,7 +66,6 @@ namespace
, dest_bucket(dest_bucket_)
, dest_key(dest_key_)
, request_settings(request_settings_)
, upload_settings(request_settings.getUploadSettings())
, object_metadata(object_metadata_)
, schedule(schedule_)
, for_disk_s3(for_disk_s3_)
@ -81,8 +80,7 @@ namespace
std::shared_ptr<const S3::Client> client_ptr;
const String & dest_bucket;
const String & dest_key;
const S3Settings::RequestSettings & request_settings;
const S3Settings::RequestSettings::PartUploadSettings & upload_settings;
const S3::RequestSettings & request_settings;
const std::optional<std::map<String, String>> & object_metadata;
ThreadPoolCallbackRunnerUnsafe<void> schedule;
bool for_disk_s3;
@ -127,8 +125,8 @@ namespace
if (object_metadata.has_value())
request.SetMetadata(object_metadata.value());
const auto & storage_class_name = upload_settings.storage_class_name;
if (!storage_class_name.empty())
const auto & storage_class_name = request_settings.storage_class_name;
if (!storage_class_name.value.empty())
request.SetStorageClass(Aws::S3::Model::StorageClassMapper::GetStorageClassForName(storage_class_name));
client_ptr->setKMSHeaders(request);
@ -187,7 +185,7 @@ namespace
request.SetMultipartUpload(multipart_upload);
size_t max_retries = std::max(request_settings.max_unexpected_write_error_retries, 1UL);
size_t max_retries = std::max<UInt64>(request_settings.max_unexpected_write_error_retries.value, 1UL);
for (size_t retries = 1;; ++retries)
{
ProfileEvents::increment(ProfileEvents::S3CompleteMultipartUpload);
@ -241,7 +239,7 @@ namespace
void checkObjectAfterUpload()
{
LOG_TRACE(log, "Checking object {} exists after upload", dest_key);
S3::checkObjectExists(*client_ptr, dest_bucket, dest_key, {}, request_settings, "Immediately after upload");
S3::checkObjectExists(*client_ptr, dest_bucket, dest_key, {}, "Immediately after upload");
LOG_TRACE(log, "Object {} exists after upload", dest_key);
}
@ -292,9 +290,9 @@ namespace
if (!total_size)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Chosen multipart upload for an empty file. This must not happen");
auto max_part_number = upload_settings.max_part_number;
auto min_upload_part_size = upload_settings.min_upload_part_size;
auto max_upload_part_size = upload_settings.max_upload_part_size;
auto max_part_number = request_settings.max_part_number;
auto min_upload_part_size = request_settings.min_upload_part_size;
auto max_upload_part_size = request_settings.max_upload_part_size;
if (!max_part_number)
throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "max_part_number must not be 0");
@ -467,7 +465,7 @@ namespace
const std::shared_ptr<const S3::Client> & client_ptr_,
const String & dest_bucket_,
const String & dest_key_,
const S3Settings::RequestSettings & request_settings_,
const S3::RequestSettings & request_settings_,
const std::optional<std::map<String, String>> & object_metadata_,
ThreadPoolCallbackRunnerUnsafe<void> schedule_,
bool for_disk_s3_,
@ -481,7 +479,7 @@ namespace
void performCopy()
{
if (size <= upload_settings.max_single_part_upload_size)
if (size <= request_settings.max_single_part_upload_size)
performSinglepartUpload();
else
performMultipartUpload();
@ -514,8 +512,8 @@ namespace
if (object_metadata.has_value())
request.SetMetadata(object_metadata.value());
const auto & storage_class_name = upload_settings.storage_class_name;
if (!storage_class_name.empty())
const auto & storage_class_name = request_settings.storage_class_name;
if (!storage_class_name.value.empty())
request.SetStorageClass(Aws::S3::Model::StorageClassMapper::GetStorageClassForName(storage_class_name));
/// If we don't do it, AWS SDK can mistakenly set it to application/xml, see https://github.com/aws/aws-sdk-cpp/issues/1840
@ -526,7 +524,7 @@ namespace
void processPutRequest(S3::PutObjectRequest & request)
{
size_t max_retries = std::max(request_settings.max_unexpected_write_error_retries, 1UL);
size_t max_retries = std::max<UInt64>(request_settings.max_unexpected_write_error_retries.value, 1UL);
for (size_t retries = 1;; ++retries)
{
ProfileEvents::increment(ProfileEvents::S3PutObject);
@ -649,7 +647,7 @@ namespace
size_t src_size_,
const String & dest_bucket_,
const String & dest_key_,
const S3Settings::RequestSettings & request_settings_,
const S3::RequestSettings & request_settings_,
const ReadSettings & read_settings_,
const std::optional<std::map<String, String>> & object_metadata_,
ThreadPoolCallbackRunnerUnsafe<void> schedule_,
@ -679,7 +677,7 @@ namespace
void performCopy()
{
LOG_TEST(log, "Copy object {} to {} using native copy", src_key, dest_key);
if (!supports_multipart_copy || size <= upload_settings.max_single_operation_copy_size)
if (!supports_multipart_copy || size <= request_settings.max_single_operation_copy_size)
performSingleOperationCopy();
else
performMultipartUploadCopy();
@ -716,8 +714,8 @@ namespace
request.SetMetadataDirective(Aws::S3::Model::MetadataDirective::REPLACE);
}
const auto & storage_class_name = upload_settings.storage_class_name;
if (!storage_class_name.empty())
const auto & storage_class_name = request_settings.storage_class_name;
if (!storage_class_name.value.empty())
request.SetStorageClass(Aws::S3::Model::StorageClassMapper::GetStorageClassForName(storage_class_name));
/// If we don't do it, AWS SDK can mistakenly set it to application/xml, see https://github.com/aws/aws-sdk-cpp/issues/1840
@ -728,7 +726,7 @@ namespace
void processCopyRequest(S3::CopyObjectRequest & request)
{
size_t max_retries = std::max(request_settings.max_unexpected_write_error_retries, 1UL);
size_t max_retries = std::max<UInt64>(request_settings.max_unexpected_write_error_retries.value, 1UL);
for (size_t retries = 1;; ++retries)
{
ProfileEvents::increment(ProfileEvents::S3CopyObject);
@ -852,7 +850,7 @@ void copyDataToS3File(
const std::shared_ptr<const S3::Client> & dest_s3_client,
const String & dest_bucket,
const String & dest_key,
const S3Settings::RequestSettings & settings,
const S3::RequestSettings & settings,
BlobStorageLogWriterPtr blob_storage_log,
const std::optional<std::map<String, String>> & object_metadata,
ThreadPoolCallbackRunnerUnsafe<void> schedule,
@ -883,7 +881,7 @@ void copyS3File(
std::shared_ptr<const S3::Client> dest_s3_client,
const String & dest_bucket,
const String & dest_key,
const S3Settings::RequestSettings & settings,
const S3::RequestSettings & settings,
const ReadSettings & read_settings,
BlobStorageLogWriterPtr blob_storage_log,
const std::optional<std::map<String, String>> & object_metadata,

View File

@ -4,7 +4,7 @@
#if USE_AWS_S3
#include <Storages/StorageS3Settings.h>
#include <IO/S3Settings.h>
#include <Common/threadPoolCallbackRunner.h>
#include <IO/S3/BlobStorageLogWriter.h>
#include <base/types.h>
@ -39,7 +39,7 @@ void copyS3File(
std::shared_ptr<const S3::Client> dest_s3_client,
const String & dest_bucket,
const String & dest_key,
const S3Settings::RequestSettings & settings,
const S3::RequestSettings & settings,
const ReadSettings & read_settings,
BlobStorageLogWriterPtr blob_storage_log,
const std::optional<std::map<String, String>> & object_metadata = std::nullopt,
@ -58,7 +58,7 @@ void copyDataToS3File(
const std::shared_ptr<const S3::Client> & dest_s3_client,
const String & dest_bucket,
const String & dest_key,
const S3Settings::RequestSettings & settings,
const S3::RequestSettings & settings,
BlobStorageLogWriterPtr blob_storage_log,
const std::optional<std::map<String, String>> & object_metadata = std::nullopt,
ThreadPoolCallbackRunnerUnsafe<void> schedule_ = {},

View File

@ -44,7 +44,7 @@ namespace
/// Performs a request to get the size and last modification time of an object.
std::pair<std::optional<ObjectInfo>, Aws::S3::S3Error> tryGetObjectInfo(
const S3::Client & client, const String & bucket, const String & key, const String & version_id,
const S3Settings::RequestSettings & /*request_settings*/, bool with_metadata)
bool with_metadata)
{
auto outcome = headObject(client, bucket, key, version_id);
if (!outcome.IsSuccess())
@ -73,11 +73,10 @@ ObjectInfo getObjectInfo(
const String & bucket,
const String & key,
const String & version_id,
const S3Settings::RequestSettings & request_settings,
bool with_metadata,
bool throw_on_error)
{
auto [object_info, error] = tryGetObjectInfo(client, bucket, key, version_id, request_settings, with_metadata);
auto [object_info, error] = tryGetObjectInfo(client, bucket, key, version_id, with_metadata);
if (object_info)
{
return *object_info;
@ -96,20 +95,18 @@ size_t getObjectSize(
const String & bucket,
const String & key,
const String & version_id,
const S3Settings::RequestSettings & request_settings,
bool throw_on_error)
{
return getObjectInfo(client, bucket, key, version_id, request_settings, {}, throw_on_error).size;
return getObjectInfo(client, bucket, key, version_id, {}, throw_on_error).size;
}
bool objectExists(
const S3::Client & client,
const String & bucket,
const String & key,
const String & version_id,
const S3Settings::RequestSettings & request_settings)
const String & version_id)
{
auto [object_info, error] = tryGetObjectInfo(client, bucket, key, version_id, request_settings, {});
auto [object_info, error] = tryGetObjectInfo(client, bucket, key, version_id, {});
if (object_info)
return true;
@ -126,10 +123,9 @@ void checkObjectExists(
const String & bucket,
const String & key,
const String & version_id,
const S3Settings::RequestSettings & request_settings,
std::string_view description)
{
auto [object_info, error] = tryGetObjectInfo(client, bucket, key, version_id, request_settings, {});
auto [object_info, error] = tryGetObjectInfo(client, bucket, key, version_id, {});
if (object_info)
return;
throw S3Exception(error.GetErrorType(), "{}Object {} in bucket {} suddenly disappeared: {}",

View File

@ -3,7 +3,7 @@
#include "config.h"
#if USE_AWS_S3
#include <Storages/StorageS3Settings.h>
#include <IO/S3Settings.h>
#include <base/types.h>
#include <IO/S3/Client.h>
@ -24,7 +24,6 @@ ObjectInfo getObjectInfo(
const String & bucket,
const String & key,
const String & version_id = {},
const S3Settings::RequestSettings & request_settings = {},
bool with_metadata = false,
bool throw_on_error = true);
@ -33,15 +32,13 @@ size_t getObjectSize(
const String & bucket,
const String & key,
const String & version_id = {},
const S3Settings::RequestSettings & request_settings = {},
bool throw_on_error = true);
bool objectExists(
const S3::Client & client,
const String & bucket,
const String & key,
const String & version_id = {},
const S3Settings::RequestSettings & request_settings = {});
const String & version_id = {});
/// Throws an exception if a specified object doesn't exist. `description` is used as a part of the error message.
void checkObjectExists(
@ -49,7 +46,6 @@ void checkObjectExists(
const String & bucket,
const String & key,
const String & version_id = {},
const S3Settings::RequestSettings & request_settings = {},
std::string_view description = {});
bool isNotFoundError(Aws::S3::S3Errors error);

View File

@ -25,7 +25,7 @@
#include <IO/S3Common.h>
#include <IO/S3/Client.h>
#include <IO/HTTPHeaderEntries.h>
#include <Storages/StorageS3Settings.h>
#include <IO/S3Settings.h>
#include <Poco/Util/ServerApplication.h>
#include "TestPocoHTTPServer.h"
@ -69,7 +69,7 @@ void doReadRequest(std::shared_ptr<const DB::S3::Client> client, const DB::S3::U
UInt64 max_single_read_retries = 1;
DB::ReadSettings read_settings;
DB::S3Settings::RequestSettings request_settings;
DB::S3::RequestSettings request_settings;
request_settings.max_single_read_retries = max_single_read_retries;
DB::ReadBufferFromS3 read_buffer(
client,
@ -88,7 +88,7 @@ void doWriteRequest(std::shared_ptr<const DB::S3::Client> client, const DB::S3::
{
UInt64 max_unexpected_write_error_retries = 1;
DB::S3Settings::RequestSettings request_settings;
DB::S3::RequestSettings request_settings;
request_settings.max_unexpected_write_error_retries = max_unexpected_write_error_retries;
DB::WriteBufferFromS3 write_buffer(
client,

Some files were not shown because too many files have changed in this diff Show More