Merge branch 'master' into variant-new-serialization

This commit is contained in:
Kruglov Pavel 2024-06-18 21:17:40 +02:00 committed by GitHub
commit e53fc186b5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
193 changed files with 7012 additions and 2426 deletions

View File

@ -48,9 +48,7 @@ At a minimum, the following information should be added (but add more as needed)
- [ ] <!---ci_include_stateful--> Allow: Stateful tests - [ ] <!---ci_include_stateful--> Allow: Stateful tests
- [ ] <!---ci_include_integration--> Allow: Integration Tests - [ ] <!---ci_include_integration--> Allow: Integration Tests
- [ ] <!---ci_include_performance--> Allow: Performance tests - [ ] <!---ci_include_performance--> Allow: Performance tests
- [ ] <!---ci_set_normal_builds--> Allow: Normal Builds - [ ] <!---ci_set_builds--> Allow: All Builds
- [ ] <!---ci_set_special_builds--> Allow: Special Builds
- [ ] <!---ci_set_non_required--> Allow: All NOT Required Checks
- [ ] <!---batch_0_1--> Allow: batch 1, 2 for multi-batch jobs - [ ] <!---batch_0_1--> Allow: batch 1, 2 for multi-batch jobs
- [ ] <!---batch_2_3--> Allow: batch 3, 4, 5, 6 for multi-batch jobs - [ ] <!---batch_2_3--> Allow: batch 3, 4, 5, 6 for multi-batch jobs
--- ---
@ -61,6 +59,7 @@ At a minimum, the following information should be added (but add more as needed)
- [ ] <!---ci_exclude_aarch64|release|debug--> Exclude: All with aarch64, release, debug - [ ] <!---ci_exclude_aarch64|release|debug--> Exclude: All with aarch64, release, debug
--- ---
- [ ] <!---do_not_test--> Do not test - [ ] <!---do_not_test--> Do not test
- [ ] <!---woolen_wolfdog--> Woolen Wolfdog
- [ ] <!---upload_all--> Upload binaries for special builds - [ ] <!---upload_all--> Upload binaries for special builds
- [ ] <!---no_merge_commit--> Disable merge-commit - [ ] <!---no_merge_commit--> Disable merge-commit
- [ ] <!---no_ci_cache--> Disable CI cache - [ ] <!---no_ci_cache--> Disable CI cache

View File

@ -70,7 +70,7 @@ jobs:
if: ${{ !failure() && !cancelled() }} if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml uses: ./.github/workflows/reusable_test.yml
with: with:
test_name: Compatibility check (amd64) test_name: Compatibility check (release)
runner_type: style-checker runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }} data: ${{ needs.RunConfig.outputs.data }}
CompatibilityCheckAarch64: CompatibilityCheckAarch64:
@ -194,7 +194,7 @@ jobs:
if: ${{ !failure() && !cancelled() }} if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml uses: ./.github/workflows/reusable_test.yml
with: with:
test_name: Install packages (amd64) test_name: Install packages (release)
runner_type: style-checker runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }} data: ${{ needs.RunConfig.outputs.data }}
run_command: | run_command: |
@ -204,7 +204,7 @@ jobs:
if: ${{ !failure() && !cancelled() }} if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml uses: ./.github/workflows/reusable_test.yml
with: with:
test_name: Install packages (arm64) test_name: Install packages (aarch64)
runner_type: style-checker-aarch64 runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }} data: ${{ needs.RunConfig.outputs.data }}
run_command: | run_command: |

View File

@ -115,25 +115,16 @@ jobs:
data: ${{ needs.RunConfig.outputs.data }} data: ${{ needs.RunConfig.outputs.data }}
################################# Reports ################################# ################################# Reports #################################
# Reports should be run even if Builds_1/2 failed - put them separately in wf (not in Tests_1/2) # Reports should run even if Builds_1/2 fail - run them separately, not in Tests_1/2/3
Builds_1_Report: Builds_Report:
# run report check for failed builds to indicate the CI error # run report check for failed builds to indicate the CI error
if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse build check') }} if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse build check') }}
needs: [RunConfig, Builds_1] needs: [RunConfig, Builds_1, Builds_2]
uses: ./.github/workflows/reusable_test.yml uses: ./.github/workflows/reusable_test.yml
with: with:
test_name: ClickHouse build check test_name: ClickHouse build check
runner_type: style-checker-aarch64 runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }} data: ${{ needs.RunConfig.outputs.data }}
Builds_2_Report:
# run report check for failed builds to indicate the CI error
if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse special build check') }}
needs: [RunConfig, Builds_2]
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse special build check
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
MarkReleaseReady: MarkReleaseReady:
if: ${{ !failure() && !cancelled() }} if: ${{ !failure() && !cancelled() }}
@ -165,7 +156,7 @@ jobs:
FinishCheck: FinishCheck:
if: ${{ !cancelled() }} if: ${{ !cancelled() }}
needs: [RunConfig, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2, Tests_3] needs: [RunConfig, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3]
runs-on: [self-hosted, style-checker-aarch64] runs-on: [self-hosted, style-checker-aarch64]
steps: steps:
- name: Check out repository code - name: Check out repository code

View File

@ -126,8 +126,9 @@ jobs:
with: with:
stage: Builds_2 stage: Builds_2
data: ${{ needs.RunConfig.outputs.data }} data: ${{ needs.RunConfig.outputs.data }}
# stage for running non-required checks without being blocked by required checks (Test_1) if corresponding settings is selected
Tests_2: Tests_2:
needs: [RunConfig, Builds_2] needs: [RunConfig, Builds_1]
if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_2') }} if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_2') }}
uses: ./.github/workflows/reusable_test_stage.yml uses: ./.github/workflows/reusable_test_stage.yml
with: with:
@ -143,29 +144,20 @@ jobs:
data: ${{ needs.RunConfig.outputs.data }} data: ${{ needs.RunConfig.outputs.data }}
################################# Reports ################################# ################################# Reports #################################
# Reports should by run even if Builds_1/2 fail, so put them separately in wf (not in Tests_1/2) # Reports should run even if Builds_1/2 fail - run them separately (not in Tests_1/2/3)
Builds_1_Report: Builds_Report:
# run report check for failed builds to indicate the CI error # run report check for failed builds to indicate the CI error
if: ${{ !cancelled() && needs.StyleCheck.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse build check') }} if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse build check') }}
needs: [RunConfig, StyleCheck, Builds_1] needs: [RunConfig, StyleCheck, Builds_1, Builds_2]
uses: ./.github/workflows/reusable_test.yml uses: ./.github/workflows/reusable_test.yml
with: with:
test_name: ClickHouse build check test_name: ClickHouse build check
runner_type: style-checker-aarch64 runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }} data: ${{ needs.RunConfig.outputs.data }}
Builds_2_Report:
# run report check for failed builds to indicate the CI error
if: ${{ !cancelled() && needs.StyleCheck.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse special build check') }}
needs: [RunConfig, StyleCheck, Builds_2]
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse special build check
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
CheckReadyForMerge: CheckReadyForMerge:
if: ${{ !cancelled() && needs.StyleCheck.result == 'success' }} if: ${{ !cancelled() && needs.StyleCheck.result == 'success' }}
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2] needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2]
runs-on: [self-hosted, style-checker-aarch64] runs-on: [self-hosted, style-checker-aarch64]
steps: steps:
- name: Check out repository code - name: Check out repository code
@ -181,7 +173,7 @@ jobs:
# #
FinishCheck: FinishCheck:
if: ${{ !cancelled() }} if: ${{ !cancelled() }}
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2, Tests_3] needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3]
runs-on: [self-hosted, style-checker-aarch64] runs-on: [self-hosted, style-checker-aarch64]
steps: steps:
- name: Check out repository code - name: Check out repository code

View File

@ -65,7 +65,7 @@ jobs:
if: ${{ !failure() && !cancelled() }} if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml uses: ./.github/workflows/reusable_test.yml
with: with:
test_name: Compatibility check (amd64) test_name: Compatibility check (release)
runner_type: style-checker runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }} data: ${{ needs.RunConfig.outputs.data }}
CompatibilityCheckAarch64: CompatibilityCheckAarch64:
@ -244,7 +244,7 @@ jobs:
if: ${{ !failure() && !cancelled() }} if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml uses: ./.github/workflows/reusable_test.yml
with: with:
test_name: Install packages (amd64) test_name: Install packages (release)
runner_type: style-checker runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }} data: ${{ needs.RunConfig.outputs.data }}
run_command: | run_command: |
@ -254,7 +254,7 @@ jobs:
if: ${{ !failure() && !cancelled() }} if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml uses: ./.github/workflows/reusable_test.yml
with: with:
test_name: Install packages (arm64) test_name: Install packages (aarch64)
runner_type: style-checker-aarch64 runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }} data: ${{ needs.RunConfig.outputs.data }}
run_command: | run_command: |

View File

@ -26,7 +26,7 @@ namespace Poco
{ {
namespace Net namespace Net
{ {
constexpr size_t HTTP_DEFAULT_BUFFER_SIZE = 8 * 1024; constexpr size_t HTTP_DEFAULT_BUFFER_SIZE = 1024 * 1024;
typedef Poco::BasicBufferedStreamBuf<char, std::char_traits<char>> HTTPBasicStreamBuf; typedef Poco::BasicBufferedStreamBuf<char, std::char_traits<char>> HTTPBasicStreamBuf;

View File

@ -330,27 +330,26 @@ void SSLManager::initDefaultContext(bool server)
else else
_ptrDefaultClientContext->disableProtocols(disabledProtocols); _ptrDefaultClientContext->disableProtocols(disabledProtocols);
/// Temporarily disabled during the transition from boringssl to OpenSSL due to tsan issues. bool cacheSessions = config.getBool(prefix + CFG_CACHE_SESSIONS, false);
/// bool cacheSessions = config.getBool(prefix + CFG_CACHE_SESSIONS, false); if (server)
/// if (server) {
/// { std::string sessionIdContext = config.getString(prefix + CFG_SESSION_ID_CONTEXT, config.getString("application.name", ""));
/// std::string sessionIdContext = config.getString(prefix + CFG_SESSION_ID_CONTEXT, config.getString("application.name", "")); _ptrDefaultServerContext->enableSessionCache(cacheSessions, sessionIdContext);
/// _ptrDefaultServerContext->enableSessionCache(cacheSessions, sessionIdContext); if (config.hasProperty(prefix + CFG_SESSION_CACHE_SIZE))
/// if (config.hasProperty(prefix + CFG_SESSION_CACHE_SIZE)) {
/// { int cacheSize = config.getInt(prefix + CFG_SESSION_CACHE_SIZE);
/// int cacheSize = config.getInt(prefix + CFG_SESSION_CACHE_SIZE); _ptrDefaultServerContext->setSessionCacheSize(cacheSize);
/// _ptrDefaultServerContext->setSessionCacheSize(cacheSize); }
/// } if (config.hasProperty(prefix + CFG_SESSION_TIMEOUT))
/// if (config.hasProperty(prefix + CFG_SESSION_TIMEOUT)) {
/// { int timeout = config.getInt(prefix + CFG_SESSION_TIMEOUT);
/// int timeout = config.getInt(prefix + CFG_SESSION_TIMEOUT); _ptrDefaultServerContext->setSessionTimeout(timeout);
/// _ptrDefaultServerContext->setSessionTimeout(timeout); }
/// } }
/// } else
/// else {
/// { _ptrDefaultClientContext->enableSessionCache(cacheSessions);
/// _ptrDefaultClientContext->enableSessionCache(cacheSessions); }
/// }
bool extendedVerification = config.getBool(prefix + CFG_EXTENDED_VERIFICATION, false); bool extendedVerification = config.getBool(prefix + CFG_EXTENDED_VERIFICATION, false);
if (server) if (server)
_ptrDefaultServerContext->enableExtendedCertificateVerification(extendedVerification); _ptrDefaultServerContext->enableExtendedCertificateVerification(extendedVerification);

2
contrib/openssl vendored

@ -1 +1 @@
Subproject commit 67c0b63e578e4c751ac9edf490f5a96124fff8dc Subproject commit e0d6ae2bf93cf6dc26bb86aa39992bc6a410869a

View File

@ -254,7 +254,7 @@ function run_tests()
set +e set +e
clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \
--test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ --no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
| ts '%Y-%m-%d %H:%M:%S' \ | ts '%Y-%m-%d %H:%M:%S' \
| tee -a test_output/test_result.txt | tee -a test_output/test_result.txt
set -e set -e
@ -379,6 +379,10 @@ fi
tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||: tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
rm -rf /var/lib/clickhouse/data/system/*/
tar -chf /test_output/store.tar /var/lib/clickhouse/store ||:
tar -chf /test_output/metadata.tar /var/lib/clickhouse/metadata/*.sql ||:
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
rg -Fa "<Fatal>" /var/log/clickhouse-server/clickhouse-server1.log ||: rg -Fa "<Fatal>" /var/log/clickhouse-server/clickhouse-server1.log ||:
rg -Fa "<Fatal>" /var/log/clickhouse-server/clickhouse-server2.log ||: rg -Fa "<Fatal>" /var/log/clickhouse-server/clickhouse-server2.log ||:

View File

@ -89,10 +89,6 @@ function configure()
# since we run clickhouse from root # since we run clickhouse from root
sudo chown root: /var/lib/clickhouse sudo chown root: /var/lib/clickhouse
# Set more frequent update period of asynchronous metrics to more frequently update information about real memory usage (less chance of OOM).
echo "<clickhouse><asynchronous_metrics_update_period_s>1</asynchronous_metrics_update_period_s></clickhouse>" \
> /etc/clickhouse-server/config.d/asynchronous_metrics_update_period_s.xml
local total_mem local total_mem
total_mem=$(awk '/MemTotal/ { print $(NF-1) }' /proc/meminfo) # KiB total_mem=$(awk '/MemTotal/ { print $(NF-1) }' /proc/meminfo) # KiB
total_mem=$(( total_mem*1024 )) # bytes total_mem=$(( total_mem*1024 )) # bytes

View File

@ -1490,6 +1490,8 @@ Differs from [PrettySpaceNoEscapes](#prettyspacenoescapes) in that up to 10,000
- [output_format_pretty_color](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_color) - use ANSI escape sequences to paint colors in Pretty formats. Default value - `true`. - [output_format_pretty_color](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_color) - use ANSI escape sequences to paint colors in Pretty formats. Default value - `true`.
- [output_format_pretty_grid_charset](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_grid_charset) - Charset for printing grid borders. Available charsets: ASCII, UTF-8. Default value - `UTF-8`. - [output_format_pretty_grid_charset](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_grid_charset) - Charset for printing grid borders. Available charsets: ASCII, UTF-8. Default value - `UTF-8`.
- [output_format_pretty_row_numbers](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_row_numbers) - Add row numbers before each row for pretty output format. Default value - `true`. - [output_format_pretty_row_numbers](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_row_numbers) - Add row numbers before each row for pretty output format. Default value - `true`.
- [output_format_pretty_display_footer_column_names](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_display_footer_column_names) - Display column names in the footer if table contains many rows. Default value - `true`.
- [output_format_pretty_display_footer_column_names_min_rows](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_display_footer_column_names_min_rows) - Sets the minimum number of rows for which a footer will be displayed if [output_format_pretty_display_footer_column_names](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_display_footer_column_names) is enabled. Default value - 50.
## RowBinary {#rowbinary} ## RowBinary {#rowbinary}

View File

@ -508,7 +508,7 @@ Now `rule` can configure `method`, `headers`, `url`, `handler`:
- `headers` are responsible for matching the header part of the HTTP request. It is compatible with RE2s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the header portion of the HTTP request. - `headers` are responsible for matching the header part of the HTTP request. It is compatible with RE2s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the header portion of the HTTP request.
- `handler` contains the main processing part. Now `handler` can configure `type`, `status`, `content_type`, `response_content`, `query`, `query_param_name`. - `handler` contains the main processing part. Now `handler` can configure `type`, `status`, `content_type`, `http_response_headers`, `response_content`, `query`, `query_param_name`.
`type` currently supports three types: [predefined_query_handler](#predefined_query_handler), [dynamic_query_handler](#dynamic_query_handler), [static](#static). `type` currently supports three types: [predefined_query_handler](#predefined_query_handler), [dynamic_query_handler](#dynamic_query_handler), [static](#static).
- `query` — use with `predefined_query_handler` type, executes query when the handler is called. - `query` — use with `predefined_query_handler` type, executes query when the handler is called.
@ -519,6 +519,8 @@ Now `rule` can configure `method`, `headers`, `url`, `handler`:
- `content_type` — use with any type, response [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type). - `content_type` — use with any type, response [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type).
- `http_response_headers` — use with any type, response headers map. Could be used to set content type as well.
- `response_content` — use with `static` type, response content sent to client, when using the prefix file:// or config://, find the content from the file or configuration sends to client. - `response_content` — use with `static` type, response content sent to client, when using the prefix file:// or config://, find the content from the file or configuration sends to client.
Next are the configuration methods for different `type`. Next are the configuration methods for different `type`.
@ -616,6 +618,33 @@ Return a message.
<type>static</type> <type>static</type>
<status>402</status> <status>402</status>
<content_type>text/html; charset=UTF-8</content_type> <content_type>text/html; charset=UTF-8</content_type>
<http_response_headers>
<Content-Language>en</Content-Language>
<X-My-Custom-Header>43</X-My-Custom-Header>
</http_response_headers>
<response_content>Say Hi!</response_content>
</handler>
</rule>
<defaults/>
</http_handlers>
```
`http_response_headers` could be used to set content type instead of `content_type`.
``` xml
<http_handlers>
<rule>
<methods>GET</methods>
<headers><XXX>xxx</XXX></headers>
<url>/hi</url>
<handler>
<type>static</type>
<status>402</status>
<http_response_headers>
<Content-Type>text/html; charset=UTF-8</Content-Type>
<Content-Language>en</Content-Language>
<X-My-Custom-Header>43</X-My-Custom-Header>
</http_response_headers>
<response_content>Say Hi!</response_content> <response_content>Say Hi!</response_content>
</handler> </handler>
</rule> </rule>
@ -696,6 +725,9 @@ Find the content from the file send to client.
<handler> <handler>
<type>static</type> <type>static</type>
<content_type>text/html; charset=UTF-8</content_type> <content_type>text/html; charset=UTF-8</content_type>
<http_response_headers>
<ETag>737060cd8c284d8af7ad3082f209582d</ETag>
</http_response_headers>
<response_content>file:///absolute_path_file.html</response_content> <response_content>file:///absolute_path_file.html</response_content>
</handler> </handler>
</rule> </rule>
@ -706,6 +738,9 @@ Find the content from the file send to client.
<handler> <handler>
<type>static</type> <type>static</type>
<content_type>text/html; charset=UTF-8</content_type> <content_type>text/html; charset=UTF-8</content_type>
<http_response_headers>
<ETag>737060cd8c284d8af7ad3082f209582d</ETag>
</http_response_headers>
<response_content>file://./relative_path_file.html</response_content> <response_content>file://./relative_path_file.html</response_content>
</handler> </handler>
</rule> </rule>

View File

@ -59,10 +59,10 @@ For that, we need to use `jemalloc`'s tool called [jeprof](https://github.com/je
If thats the case, we recommend installing an [alternative implementation](https://github.com/gimli-rs/addr2line) of the tool. If thats the case, we recommend installing an [alternative implementation](https://github.com/gimli-rs/addr2line) of the tool.
``` ```
git clone https://github.com/gimli-rs/addr2line git clone https://github.com/gimli-rs/addr2line.git --depth=1 --branch=0.23.0
cd addr2line cd addr2line
cargo b --examples -r cargo build --features bin --release
cp ./target/release/examples/addr2line path/to/current/addr2line cp ./target/release/addr2line path/to/current/addr2line
``` ```
::: :::

View File

@ -3084,3 +3084,21 @@ This setting is only necessary for the migration period and will become obsolete
Type: Bool Type: Bool
Default: 1 Default: 1
## merge_workload {#merge_workload}
Used to regulate how resources are utilized and shared between merges and other workloads. Specified value is used as `workload` setting value for all background merges. Can be overridden by a merge tree setting.
Default value: "default"
**See Also**
- [Workload Scheduling](/docs/en/operations/workload-scheduling.md)
## mutation_workload {#mutation_workload}
Used to regulate how resources are utilized and shared between mutations and other workloads. Specified value is used as `workload` setting value for all background mutations. Can be overridden by a merge tree setting.
Default value: "default"
**See Also**
- [Workload Scheduling](/docs/en/operations/workload-scheduling.md)

View File

@ -981,6 +981,24 @@ This mode allows to use significantly less memory for storing discriminators in
Default value: true Default value: true
## merge_workload
Used to regulate how resources are utilized and shared between merges and other workloads. Specified value is used as `workload` setting value for background merges of this table. If not specified (empty string), then server setting `merge_workload` is used instead.
Default value: an empty string
**See Also**
- [Workload Scheduling](/docs/en/operations/workload-scheduling.md)
## mutation_workload
Used to regulate how resources are utilized and shared between mutations and other workloads. Specified value is used as `workload` setting value for background mutations of this table. If not specified (empty string), then server setting `mutation_workload` is used instead.
Default value: an empty string
**See Also**
- [Workload Scheduling](/docs/en/operations/workload-scheduling.md)
### optimize_row_order ### optimize_row_order
Controls if the row order should be optimized during inserts to improve the compressability of the newly inserted table part. Controls if the row order should be optimized during inserts to improve the compressability of the newly inserted table part.

View File

@ -1706,6 +1706,43 @@ Result:
└────────────┘ └────────────┘
``` ```
## output_format_pretty_display_footer_column_names
Display column names in the footer if there are many table rows.
Possible values:
- 0 — No column names are displayed in the footer.
- 1 — Column names are displayed in the footer if row count is greater than or equal to the threshold value set by [output_format_pretty_display_footer_column_names_min_rows](#output_format_pretty_display_footer_column_names_min_rows) (50 by default).
Default value: `1`.
**Example**
Query:
```sql
SELECT *, toTypeName(*) FROM (SELECT * FROM system.numbers LIMIT 1000);
```
Result:
```response
┌─number─┬─toTypeName(number)─┐
1. │ 0 │ UInt64 │
2. │ 1 │ UInt64 │
3. │ 2 │ UInt64 │
...
999. │ 998 │ UInt64 │
1000. │ 999 │ UInt64 │
└─number─┴─toTypeName(number)─┘
```
## output_format_pretty_display_footer_column_names_min_rows
Sets the minimum number of rows for which a footer with column names will be displayed if setting [output_format_pretty_display_footer_column_names](#output_format_pretty_display_footer_column_names) is enabled.
Default value: `50`.
## Template format settings {#template-format-settings} ## Template format settings {#template-format-settings}
### format_template_resultset {#format_template_resultset} ### format_template_resultset {#format_template_resultset}

View File

@ -47,6 +47,8 @@ Example:
Queries can be marked with setting `workload` to distinguish different workloads. If `workload` is not set, than value "default" is used. Note that you are able to specify the other value using settings profiles. Setting constraints can be used to make `workload` constant if you want all queries from the user to be marked with fixed value of `workload` setting. Queries can be marked with setting `workload` to distinguish different workloads. If `workload` is not set, than value "default" is used. Note that you are able to specify the other value using settings profiles. Setting constraints can be used to make `workload` constant if you want all queries from the user to be marked with fixed value of `workload` setting.
It is possible to assign a `workload` setting for background activities. Merges and mutations are using `merge_workload` and `mutation_workload` server settings correspondingly. These values can also be overridden for specific tables using `merge_workload` and `mutation_workload` merge tree settings
Let's consider an example of a system with two different workloads: "production" and "development". Let's consider an example of a system with two different workloads: "production" and "development".
```sql ```sql
@ -151,6 +153,9 @@ Example:
</clickhouse> </clickhouse>
``` ```
## See also ## See also
- [system.scheduler](/docs/en/operations/system-tables/scheduler.md) - [system.scheduler](/docs/en/operations/system-tables/scheduler.md)
- [merge_workload](/docs/en/operations/settings/merge-tree-settings.md#merge_workload) merge tree setting
- [merge_workload](/docs/en/operations/server-configuration-parameters/settings.md#merge_workload) global server setting
- [mutation_workload](/docs/en/operations/settings/merge-tree-settings.md#mutation_workload) merge tree setting
- [mutation_workload](/docs/en/operations/server-configuration-parameters/settings.md#mutation_workload) global server setting

View File

@ -0,0 +1,90 @@
---
slug: /en/sql-reference/aggregate-functions/reference/groupconcat
sidebar_position: 363
sidebar_label: groupConcat
title: groupConcat
---
Calculates a concatenated string from a group of strings, optionally separated by a delimiter, and optionally limited by a maximum number of elements.
**Syntax**
``` sql
groupConcat(expression [, delimiter] [, limit]);
```
**Arguments**
- `expression` — The expression or column name that outputs strings to be concatenated..
- `delimiter` — A [string](../../../sql-reference/data-types/string.md) that will be used to separate concatenated values. This parameter is optional and defaults to an empty string if not specified.
- `limit` — A positive [integer](../../../sql-reference/data-types/int-uint.md) specifying the maximum number of elements to concatenate. If more elements are present, excess elements are ignored. This parameter is optional.
:::note
If delimiter is specified without limit, it must be the first parameter following the expression. If both delimiter and limit are specified, delimiter must precede limit.
:::
**Returned value**
- Returns a [string](../../../sql-reference/data-types/string.md) consisting of the concatenated values of the column or expression. If the group has no elements or only null elements, and the function does not specify a handling for only null values, the result is a nullable string with a null value.
**Examples**
Input table:
``` text
┌─id─┬─name─┐
│ 1 │ John│
│ 2 │ Jane│
│ 3 │ Bob│
└────┴──────┘
```
1. Basic usage without a delimiter:
Query:
``` sql
SELECT groupConcat(Name) FROM Employees;
```
Result:
``` text
JohnJaneBob
```
This concatenates all names into one continuous string without any separator.
2. Using comma as a delimiter:
Query:
``` sql
SELECT groupConcat(Name, ', ', 2) FROM Employees;
```
Result:
``` text
John, Jane, Bob
```
This output shows the names separated by a comma followed by a space.
3. Limiting the number of concatenated elements
Query:
``` sql
SELECT groupConcat(Name, ', ', 2) FROM Employees;
```
Result:
``` text
John, Jane
```
This query limits the output to the first two names, even though there are more names in the table.

View File

@ -2178,6 +2178,32 @@ Result:
Alias: levenshteinDistance Alias: levenshteinDistance
## editDistanceUTF8
Calculates the [edit distance](https://en.wikipedia.org/wiki/Edit_distance) between two UTF8 strings.
**Syntax**
```sql
editDistanceUTF8(string1, string2)
```
**Examples**
``` sql
SELECT editDistanceUTF8('我是谁', '我是我');
```
Result:
``` text
┌─editDistanceUTF8('我是谁', '我是我')──┐
│ 1 │
└─────────────────────────────────────┘
```
Alias: levenshteinDistanceUTF8
## damerauLevenshteinDistance ## damerauLevenshteinDistance
Calculates the [Damerau-Levenshtein distance](https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance) between two byte strings. Calculates the [Damerau-Levenshtein distance](https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance) between two byte strings.

View File

@ -414,6 +414,8 @@ $ curl -v 'http://localhost:8123/predefined_query'
- `content_type` — используется со всеми типами, возвращает [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type). - `content_type` — используется со всеми типами, возвращает [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type).
- `http_response_headers` — используется со всеми типами чтобы добавить кастомные хедеры в ответ. Может использоваться в том числе для задания хедера `Content-Type` вместо `content_type`.
- `response_content` — используется с типом`static`, содержимое ответа, отправленное клиенту, при использовании префикса file:// or config://, находит содержимое из файла или конфигурации, отправленного клиенту. - `response_content` — используется с типом`static`, содержимое ответа, отправленное клиенту, при использовании префикса file:// or config://, находит содержимое из файла или конфигурации, отправленного клиенту.
Далее приведены методы настройки для различных типов. Далее приведены методы настройки для различных типов.
@ -509,6 +511,33 @@ max_final_threads 2
<type>static</type> <type>static</type>
<status>402</status> <status>402</status>
<content_type>text/html; charset=UTF-8</content_type> <content_type>text/html; charset=UTF-8</content_type>
<http_response_headers>
<Content-Language>en</Content-Language>
<X-My-Custom-Header>43</X-My-Custom-Header>
</http_response_headers>
<response_content>Say Hi!</response_content>
</handler>
</rule>
<defaults/>
</http_handlers>
```
`http_response_headers` так же может использоваться для определения `Content-Type` вместо `content_type`.
``` xml
<http_handlers>
<rule>
<methods>GET</methods>
<headers><XXX>xxx</XXX></headers>
<url>/hi</url>
<handler>
<type>static</type>
<status>402</status>
<http_response_headers>
<Content-Type>text/html; charset=UTF-8</Content-Type>
<Content-Language>en</Content-Language>
<X-My-Custom-Header>43</X-My-Custom-Header>
</http_response_headers>
<response_content>Say Hi!</response_content> <response_content>Say Hi!</response_content>
</handler> </handler>
</rule> </rule>
@ -589,6 +618,9 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
<handler> <handler>
<type>static</type> <type>static</type>
<content_type>text/html; charset=UTF-8</content_type> <content_type>text/html; charset=UTF-8</content_type>
<http_response_headers>
<ETag>737060cd8c284d8af7ad3082f209582d</ETag>
</http_response_headers>
<response_content>file:///absolute_path_file.html</response_content> <response_content>file:///absolute_path_file.html</response_content>
</handler> </handler>
</rule> </rule>
@ -599,6 +631,9 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
<handler> <handler>
<type>static</type> <type>static</type>
<content_type>text/html; charset=UTF-8</content_type> <content_type>text/html; charset=UTF-8</content_type>
<http_response_headers>
<ETag>737060cd8c284d8af7ad3082f209582d</ETag>
</http_response_headers>
<response_content>file://./relative_path_file.html</response_content> <response_content>file://./relative_path_file.html</response_content>
</handler> </handler>
</rule> </rule>

View File

@ -1609,6 +1609,10 @@ try
0, // We don't need any threads one all the parts will be deleted 0, // We don't need any threads one all the parts will be deleted
new_server_settings.max_parts_cleaning_thread_pool_size); new_server_settings.max_parts_cleaning_thread_pool_size);
global_context->setMergeWorkload(new_server_settings.merge_workload);
global_context->setMutationWorkload(new_server_settings.mutation_workload);
if (config->has("resources")) if (config->has("resources"))
{ {
global_context->getResourceManager()->updateConfiguration(*config); global_context->getResourceManager()->updateConfiguration(*config);

View File

@ -371,7 +371,7 @@
<!-- Enables asynchronous loading of databases and tables to speedup server startup. <!-- Enables asynchronous loading of databases and tables to speedup server startup.
Queries to not yet loaded entity will be blocked until load is finished. Queries to not yet loaded entity will be blocked until load is finished.
--> -->
<!-- <async_load_databases>true</async_load_databases> --> <async_load_databases>true</async_load_databases>
<!-- On memory constrained environments you may have to set this to value larger than 1. <!-- On memory constrained environments you may have to set this to value larger than 1.
--> -->
@ -1396,6 +1396,14 @@
<!-- <host_name>replica</host_name> --> <!-- <host_name>replica</host_name> -->
</distributed_ddl> </distributed_ddl>
<!-- Used to regulate how resources are utilized and shared between merges, mutations and other workloads.
Specified value is used as `workload` setting value for background merge or mutation.
-->
<!--
<merge_workload>merges_and_mutations</merge_workload>
<mutation_workload>merges_and_mutations</mutation_workload>
-->
<!-- Settings to fine-tune MergeTree tables. See documentation in source code, in MergeTreeSettings.h --> <!-- Settings to fine-tune MergeTree tables. See documentation in source code, in MergeTreeSettings.h -->
<!-- <!--
<merge_tree> <merge_tree>

View File

@ -31,6 +31,7 @@ namespace DB
{ {
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int AUTHENTICATION_FAILED;
extern const int SUPPORT_IS_DISABLED; extern const int SUPPORT_IS_DISABLED;
extern const int BAD_ARGUMENTS; extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
@ -90,8 +91,10 @@ bool AuthenticationData::Util::checkPasswordBcrypt(std::string_view password [[m
{ {
#if USE_BCRYPT #if USE_BCRYPT
int ret = bcrypt_checkpw(password.data(), reinterpret_cast<const char *>(password_bcrypt.data())); int ret = bcrypt_checkpw(password.data(), reinterpret_cast<const char *>(password_bcrypt.data()));
/// Before 24.6 we didn't validate hashes on creation, so it could be that the stored hash is invalid
/// and it could not be decoded by the library
if (ret == -1) if (ret == -1)
throw Exception(ErrorCodes::LOGICAL_ERROR, "BCrypt library failed: bcrypt_checkpw returned {}", ret); throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Internal failure decoding Bcrypt hash");
return (ret == 0); return (ret == 0);
#else #else
throw Exception( throw Exception(
@ -230,6 +233,17 @@ void AuthenticationData::setPasswordHashBinary(const Digest & hash)
throw Exception(ErrorCodes::BAD_ARGUMENTS, throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Password hash for the 'BCRYPT_PASSWORD' authentication type has length {} " "Password hash for the 'BCRYPT_PASSWORD' authentication type has length {} "
"but must be 59 or 60 bytes.", hash.size()); "but must be 59 or 60 bytes.", hash.size());
auto resized = hash;
resized.resize(64);
#if USE_BCRYPT
/// Verify that it is a valid hash
int ret = bcrypt_checkpw("", reinterpret_cast<const char *>(resized.data()));
if (ret == -1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Could not decode the provided hash with 'bcrypt_hash'");
#endif
password_hash = hash; password_hash = hash;
password_hash.resize(64); password_hash.resize(64);
return; return;

View File

@ -0,0 +1,265 @@
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <Columns/IColumn.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnString.h>
#include <Core/ServerSettings.h>
#include <Core/ColumnWithTypeAndName.h>
#include <Common/ArenaAllocator.h>
#include <Common/assert_cast.h>
#include <Interpreters/castColumn.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int BAD_ARGUMENTS;
}
namespace
{
struct GroupConcatDataBase
{
UInt64 data_size = 0;
UInt64 allocated_size = 0;
char * data = nullptr;
void checkAndUpdateSize(UInt64 add, Arena * arena)
{
if (data_size + add >= allocated_size)
{
auto old_size = allocated_size;
allocated_size = std::max(2 * allocated_size, data_size + add);
data = arena->realloc(data, old_size, allocated_size);
}
}
void insertChar(const char * str, UInt64 str_size, Arena * arena)
{
checkAndUpdateSize(str_size, arena);
memcpy(data + data_size, str, str_size);
data_size += str_size;
}
};
struct GroupConcatData : public GroupConcatDataBase
{
using Offset = UInt64;
using Allocator = MixedAlignedArenaAllocator<alignof(Offset), 4096>;
using Offsets = PODArray<Offset, 32, Allocator>;
/// offset[i * 2] - beginning of the i-th row, offset[i * 2 + 1] - end of the i-th row
Offsets offsets;
UInt64 num_rows = 0;
UInt64 getSize(size_t i) const { return offsets[i * 2 + 1] - offsets[i * 2]; }
UInt64 getString(size_t i) const { return offsets[i * 2]; }
void insert(const IColumn * column, const SerializationPtr & serialization, size_t row_num, Arena * arena)
{
WriteBufferFromOwnString buff;
serialization->serializeText(*column, row_num, buff, {});
auto string = buff.stringView();
checkAndUpdateSize(string.size(), arena);
memcpy(data + data_size, string.data(), string.size());
offsets.push_back(data_size, arena);
data_size += string.size();
offsets.push_back(data_size, arena);
num_rows++;
}
};
template <bool has_limit>
class GroupConcatImpl final
: public IAggregateFunctionDataHelper<GroupConcatData, GroupConcatImpl<has_limit>>
{
static constexpr auto name = "groupConcat";
SerializationPtr serialization;
UInt64 limit;
const String delimiter;
public:
GroupConcatImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 limit_, const String & delimiter_)
: IAggregateFunctionDataHelper<GroupConcatData, GroupConcatImpl<has_limit>>(
{data_type_}, parameters_, std::make_shared<DataTypeString>())
, serialization(this->argument_types[0]->getDefaultSerialization())
, limit(limit_)
, delimiter(delimiter_)
{
}
String getName() const override { return name; }
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
auto & cur_data = this->data(place);
if constexpr (has_limit)
if (cur_data.num_rows >= limit)
return;
if (cur_data.data_size != 0)
cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena);
cur_data.insert(columns[0], serialization, row_num, arena);
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
auto & cur_data = this->data(place);
auto & rhs_data = this->data(rhs);
if (rhs_data.data_size == 0)
return;
if constexpr (has_limit)
{
UInt64 new_elems_count = std::min(rhs_data.num_rows, limit - cur_data.num_rows);
for (UInt64 i = 0; i < new_elems_count; ++i)
{
if (cur_data.data_size != 0)
cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena);
cur_data.offsets.push_back(cur_data.data_size, arena);
cur_data.insertChar(rhs_data.data + rhs_data.getString(i), rhs_data.getSize(i), arena);
cur_data.num_rows++;
cur_data.offsets.push_back(cur_data.data_size, arena);
}
}
else
{
if (cur_data.data_size != 0)
cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena);
cur_data.insertChar(rhs_data.data, rhs_data.data_size, arena);
}
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
auto & cur_data = this->data(place);
writeVarUInt(cur_data.data_size, buf);
writeVarUInt(cur_data.allocated_size, buf);
buf.write(cur_data.data, cur_data.data_size);
if constexpr (has_limit)
{
writeVarUInt(cur_data.num_rows, buf);
for (const auto & offset : cur_data.offsets)
writeVarUInt(offset, buf);
}
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
auto & cur_data = this->data(place);
readVarUInt(cur_data.data_size, buf);
readVarUInt(cur_data.allocated_size, buf);
buf.readStrict(cur_data.data, cur_data.data_size);
if constexpr (has_limit)
{
readVarUInt(cur_data.num_rows, buf);
cur_data.offsets.resize_exact(cur_data.num_rows * 2, arena);
for (auto & offset : cur_data.offsets)
readVarUInt(offset, buf);
}
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
auto & cur_data = this->data(place);
if (cur_data.data_size == 0)
{
auto column_nullable = IColumn::mutate(makeNullable(to.getPtr()));
column_nullable->insertDefault();
return;
}
auto & column_string = assert_cast<ColumnString &>(to);
column_string.insertData(cur_data.data, cur_data.data_size);
}
bool allocatesMemoryInArena() const override { return true; }
};
AggregateFunctionPtr createAggregateFunctionGroupConcat(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertUnary(name, argument_types);
bool has_limit = false;
UInt64 limit = 0;
String delimiter;
if (parameters.size() > 2)
throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION,
"Incorrect number of parameters for aggregate function {}, should be 0, 1 or 2, got: {}", name, parameters.size());
if (!parameters.empty())
{
auto type = parameters[0].getType();
if (type != Field::Types::String)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First parameter for aggregate function {} should be string", name);
delimiter = parameters[0].get<String>();
}
if (parameters.size() == 2)
{
auto type = parameters[1].getType();
if (type != Field::Types::Int64 && type != Field::Types::UInt64)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter for aggregate function {} should be a positive number", name);
if ((type == Field::Types::Int64 && parameters[1].get<Int64>() <= 0) ||
(type == Field::Types::UInt64 && parameters[1].get<UInt64>() == 0))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter for aggregate function {} should be a positive number, got: {}", name, parameters[1].get<Int64>());
has_limit = true;
limit = parameters[1].get<UInt64>();
}
if (has_limit)
return std::make_shared<GroupConcatImpl</* has_limit= */ true>>(argument_types[0], parameters, limit, delimiter);
else
return std::make_shared<GroupConcatImpl</* has_limit= */ false>>(argument_types[0], parameters, limit, delimiter);
}
}
void registerAggregateFunctionGroupConcat(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
factory.registerFunction("groupConcat", { createAggregateFunctionGroupConcat, properties });
factory.registerAlias("group_concat", "groupConcat", AggregateFunctionFactory::CaseInsensitive);
}
}

View File

@ -19,6 +19,7 @@ void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factor
void registerAggregateFunctionGroupUniqArray(AggregateFunctionFactory &); void registerAggregateFunctionGroupUniqArray(AggregateFunctionFactory &);
void registerAggregateFunctionGroupArrayInsertAt(AggregateFunctionFactory &); void registerAggregateFunctionGroupArrayInsertAt(AggregateFunctionFactory &);
void registerAggregateFunctionGroupArrayIntersect(AggregateFunctionFactory &); void registerAggregateFunctionGroupArrayIntersect(AggregateFunctionFactory &);
void registerAggregateFunctionGroupConcat(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantile(AggregateFunctionFactory &); void registerAggregateFunctionsQuantile(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileDeterministic(AggregateFunctionFactory &); void registerAggregateFunctionsQuantileDeterministic(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileExact(AggregateFunctionFactory &); void registerAggregateFunctionsQuantileExact(AggregateFunctionFactory &);
@ -120,6 +121,7 @@ void registerAggregateFunctions()
registerAggregateFunctionGroupUniqArray(factory); registerAggregateFunctionGroupUniqArray(factory);
registerAggregateFunctionGroupArrayInsertAt(factory); registerAggregateFunctionGroupArrayInsertAt(factory);
registerAggregateFunctionGroupArrayIntersect(factory); registerAggregateFunctionGroupArrayIntersect(factory);
registerAggregateFunctionGroupConcat(factory);
registerAggregateFunctionsQuantile(factory); registerAggregateFunctionsQuantile(factory);
registerAggregateFunctionsQuantileDeterministic(factory); registerAggregateFunctionsQuantileDeterministic(factory);
registerAggregateFunctionsQuantileExact(factory); registerAggregateFunctionsQuantileExact(factory);

View File

@ -985,18 +985,18 @@ std::string QueryAnalyzer::rewriteAggregateFunctionNameIfNeeded(
{ {
result_aggregate_function_name = settings.count_distinct_implementation; result_aggregate_function_name = settings.count_distinct_implementation;
} }
else if (aggregate_function_name_lowercase == "countdistinctif" || aggregate_function_name_lowercase == "countifdistinct") else if (aggregate_function_name_lowercase == "countifdistinct" ||
(settings.rewrite_count_distinct_if_with_count_distinct_implementation && aggregate_function_name_lowercase == "countdistinctif"))
{ {
result_aggregate_function_name = settings.count_distinct_implementation; result_aggregate_function_name = settings.count_distinct_implementation;
result_aggregate_function_name += "If"; result_aggregate_function_name += "If";
} }
else if (aggregate_function_name_lowercase.ends_with("ifdistinct"))
/// Replace aggregateFunctionIfDistinct into aggregateFunctionDistinctIf to make execution more optimal
if (result_aggregate_function_name.ends_with("ifdistinct"))
{ {
/// Replace aggregateFunctionIfDistinct into aggregateFunctionDistinctIf to make execution more optimal
size_t prefix_length = result_aggregate_function_name.size() - strlen("ifdistinct"); size_t prefix_length = result_aggregate_function_name.size() - strlen("ifdistinct");
result_aggregate_function_name = result_aggregate_function_name.substr(0, prefix_length) + "DistinctIf"; result_aggregate_function_name = result_aggregate_function_name.substr(0, prefix_length) + "DistinctIf";
} }
bool need_add_or_null = settings.aggregate_functions_null_for_empty && !result_aggregate_function_name.ends_with("OrNull"); bool need_add_or_null = settings.aggregate_functions_null_for_empty && !result_aggregate_function_name.ends_with("OrNull");
if (need_add_or_null) if (need_add_or_null)

View File

@ -9,6 +9,8 @@
#include <Interpreters/convertFieldToType.h> #include <Interpreters/convertFieldToType.h>
#include <Interpreters/Set.h> #include <Interpreters/Set.h>
#include <Common/assert_cast.h>
namespace DB namespace DB
{ {
@ -54,8 +56,9 @@ size_t getCompoundTypeDepth(const IDataType & type)
} }
template <typename Collection> template <typename Collection>
Block createBlockFromCollection(const Collection & collection, const DataTypes & block_types, bool transform_null_in) Block createBlockFromCollection(const Collection & collection, const DataTypes& value_types, const DataTypes & block_types, bool transform_null_in)
{ {
assert(collection.size() == value_types.size());
size_t columns_size = block_types.size(); size_t columns_size = block_types.size();
MutableColumns columns(columns_size); MutableColumns columns(columns_size);
for (size_t i = 0; i < columns_size; ++i) for (size_t i = 0; i < columns_size; ++i)
@ -66,13 +69,17 @@ Block createBlockFromCollection(const Collection & collection, const DataTypes &
Row tuple_values; Row tuple_values;
for (const auto & value : collection) for (size_t collection_index = 0; collection_index < collection.size(); ++collection_index)
{ {
const auto & value = collection[collection_index];
if (columns_size == 1) if (columns_size == 1)
{ {
auto field = convertFieldToTypeStrict(value, *block_types[0]); const DataTypePtr & data_type = value_types[collection_index];
auto field = convertFieldToTypeStrict(value, *data_type, *block_types[0]);
if (!field) if (!field)
{
continue; continue;
}
bool need_insert_null = transform_null_in && block_types[0]->isNullable(); bool need_insert_null = transform_null_in && block_types[0]->isNullable();
if (!field->isNull() || need_insert_null) if (!field->isNull() || need_insert_null)
@ -87,6 +94,9 @@ Block createBlockFromCollection(const Collection & collection, const DataTypes &
value.getTypeName()); value.getTypeName());
const auto & tuple = value.template get<const Tuple &>(); const auto & tuple = value.template get<const Tuple &>();
const DataTypePtr & value_type = value_types[collection_index];
const DataTypes & tuple_value_type = typeid_cast<const DataTypeTuple *>(value_type.get())->getElements();
size_t tuple_size = tuple.size(); size_t tuple_size = tuple.size();
if (tuple_size != columns_size) if (tuple_size != columns_size)
@ -101,7 +111,7 @@ Block createBlockFromCollection(const Collection & collection, const DataTypes &
size_t i = 0; size_t i = 0;
for (; i < tuple_size; ++i) for (; i < tuple_size; ++i)
{ {
auto converted_field = convertFieldToTypeStrict(tuple[i], *block_types[i]); auto converted_field = convertFieldToTypeStrict(tuple[i], *tuple_value_type[i], *block_types[i]);
if (!converted_field) if (!converted_field)
break; break;
tuple_values[i] = std::move(*converted_field); tuple_values[i] = std::move(*converted_field);
@ -147,20 +157,28 @@ Block getSetElementsForConstantValue(const DataTypePtr & expression_type, const
if (lhs_type_depth == rhs_type_depth) if (lhs_type_depth == rhs_type_depth)
{ {
/// 1 in 1; (1, 2) in (1, 2); identity(tuple(tuple(tuple(1)))) in tuple(tuple(tuple(1))); etc. /// 1 in 1; (1, 2) in (1, 2); identity(tuple(tuple(tuple(1)))) in tuple(tuple(tuple(1))); etc.
Array array{value}; Array array{value};
result_block = createBlockFromCollection(array, set_element_types, transform_null_in); DataTypes value_types{value_type};
result_block = createBlockFromCollection(array, value_types, set_element_types, transform_null_in);
} }
else if (lhs_type_depth + 1 == rhs_type_depth) else if (lhs_type_depth + 1 == rhs_type_depth)
{ {
/// 1 in (1, 2); (1, 2) in ((1, 2), (3, 4)) /// 1 in (1, 2); (1, 2) in ((1, 2), (3, 4))
WhichDataType rhs_which_type(value_type); WhichDataType rhs_which_type(value_type);
if (rhs_which_type.isArray()) if (rhs_which_type.isArray())
result_block = createBlockFromCollection(value.get<const Array &>(), set_element_types, transform_null_in); {
const DataTypeArray * value_array_type = assert_cast<const DataTypeArray *>(value_type.get());
size_t value_array_size = value.get<const Array &>().size();
DataTypes value_types(value_array_size, value_array_type->getNestedType());
result_block = createBlockFromCollection(value.get<const Array &>(), value_types, set_element_types, transform_null_in);
}
else if (rhs_which_type.isTuple()) else if (rhs_which_type.isTuple())
result_block = createBlockFromCollection(value.get<const Tuple &>(), set_element_types, transform_null_in); {
const DataTypeTuple * value_tuple_type = assert_cast<const DataTypeTuple *>(value_type.get());
const DataTypes & value_types = value_tuple_type->getElements();
result_block = createBlockFromCollection(value.get<const Tuple &>(), value_types, set_element_types, transform_null_in);
}
else else
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Unsupported type at the right-side of IN. Expected Array or Tuple. Actual {}", "Unsupported type at the right-side of IN. Expected Array or Tuple. Actual {}",

View File

@ -44,13 +44,12 @@
#include <Parsers/ASTIdentifier.h> #include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTColumnDeclaration.h> #include <Parsers/ASTColumnDeclaration.h>
#include <Parsers/ASTFunction.h> #include <Parsers/ASTFunction.h>
#include <Parsers/Kusto/ParserKQLStatement.h>
#include <Parsers/PRQL/ParserPRQLQuery.h> #include <Parsers/PRQL/ParserPRQLQuery.h>
#include <Parsers/Kusto/ParserKQLStatement.h>
#include <Parsers/Kusto/parseKQLQuery.h> #include <Parsers/Kusto/parseKQLQuery.h>
#include <Processors/Formats/Impl/NullFormat.h> #include <Processors/Formats/Impl/NullFormat.h>
#include <Processors/Formats/IInputFormat.h> #include <Processors/Formats/IInputFormat.h>
#include <Processors/Formats/IOutputFormat.h>
#include <Processors/QueryPlan/QueryPlan.h> #include <Processors/QueryPlan/QueryPlan.h>
#include <Processors/QueryPlan/BuildQueryPipelineSettings.h> #include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h> #include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>

View File

@ -255,6 +255,17 @@ void HedgedConnections::sendCancel()
if (!sent_query || cancelled) if (!sent_query || cancelled)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot cancel. Either no query sent or already cancelled."); throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot cancel. Either no query sent or already cancelled.");
/// All hedged connections should be stopped, since otherwise before the
/// HedgedConnectionsFactory will be destroyed (that will happen from
/// QueryPipeline dtor) they could still do some work.
/// And not only this does not make sense, but it also could lead to
/// use-after-free of the current_thread, since the thread from which they
/// had been created differs from the thread where the dtor of
/// QueryPipeline will be called and the initial thread could be already
/// destroyed (especially when the system is under pressure).
if (hedged_connections_factory.hasEventsInProcess())
hedged_connections_factory.stopChoosingReplicas();
cancelled = true; cancelled = true;
for (auto & offset_status : offset_states) for (auto & offset_status : offset_states)

View File

@ -11,10 +11,10 @@
#include <Poco/Util/XMLConfiguration.h> #include <Poco/Util/XMLConfiguration.h>
#include <boost/noncopyable.hpp> #include <boost/noncopyable.hpp>
#include <boost/intrusive/list.hpp>
#include <chrono> #include <chrono>
#include <deque> #include <deque>
#include <queue>
#include <algorithm> #include <algorithm>
#include <functional> #include <functional>
#include <memory> #include <memory>
@ -30,6 +30,8 @@ namespace ErrorCodes
} }
class ISchedulerNode; class ISchedulerNode;
class EventQueue;
using EventId = UInt64;
inline const Poco::Util::AbstractConfiguration & emptyConfig() inline const Poco::Util::AbstractConfiguration & emptyConfig()
{ {
@ -82,6 +84,115 @@ struct SchedulerNodeInfo
} }
}; };
/*
* Node of hierarchy for scheduling requests for resource. Base class for all
* kinds of scheduling elements (queues, policies, constraints and schedulers).
*
* Root node is a scheduler, which has it's thread to dequeue requests,
* execute requests (see ResourceRequest) and process events in a thread-safe manner.
* Immediate children of the scheduler represent independent resources.
* Each resource has it's own hierarchy to achieve required scheduling policies.
* Non-leaf nodes do not hold requests, but keep scheduling state
* (e.g. consumption history, amount of in-flight requests, etc).
* Leafs of hierarchy are queues capable of holding pending requests.
*
* scheduler (SchedulerRoot)
* / \
* constraint constraint (SemaphoreConstraint)
* | |
* policy policy (PriorityPolicy)
* / \ / \
* q1 q2 q3 q4 (FifoQueue)
*
* Dequeueing request from an inner node will dequeue request from one of active leaf-queues in its subtree.
* Node is considered to be active iff:
* - it has at least one pending request in one of leaves of it's subtree;
* - and enforced constraints, if any, are satisfied
* (e.g. amount of concurrent requests is not greater than some number).
*
* All methods must be called only from scheduler thread for thread-safety.
*/
class ISchedulerNode : public boost::intrusive::list_base_hook<>, private boost::noncopyable
{
public:
explicit ISchedulerNode(EventQueue * event_queue_, const Poco::Util::AbstractConfiguration & config = emptyConfig(), const String & config_prefix = {})
: event_queue(event_queue_)
, info(config, config_prefix)
{}
virtual ~ISchedulerNode() = default;
/// Checks if two nodes configuration is equal
virtual bool equals(ISchedulerNode * other)
{
return info.equals(other->info);
}
/// Attach new child
virtual void attachChild(const std::shared_ptr<ISchedulerNode> & child) = 0;
/// Detach and destroy child
virtual void removeChild(ISchedulerNode * child) = 0;
/// Get attached child by name
virtual ISchedulerNode * getChild(const String & child_name) = 0;
/// Activation of child due to the first pending request
/// Should be called on leaf node (i.e. queue) to propagate activation signal through chain to the root
virtual void activateChild(ISchedulerNode * child) = 0;
/// Returns true iff node is active
virtual bool isActive() = 0;
/// Returns number of active children
virtual size_t activeChildren() = 0;
/// Returns the first request to be executed as the first component of resulting pair.
/// The second pair component is `true` iff node is still active after dequeueing.
virtual std::pair<ResourceRequest *, bool> dequeueRequest() = 0;
/// Returns full path string using names of every parent
String getPath()
{
String result;
ISchedulerNode * ptr = this;
while (ptr->parent)
{
result = "/" + ptr->basename + result;
ptr = ptr->parent;
}
return result.empty() ? "/" : result;
}
/// Attach to a parent (used by attachChild)
virtual void setParent(ISchedulerNode * parent_)
{
parent = parent_;
}
protected:
/// Notify parents about the first pending request or constraint becoming satisfied.
/// Postponed to be handled in scheduler thread, so it is intended to be called from outside.
void scheduleActivation();
public:
EventQueue * const event_queue;
String basename;
SchedulerNodeInfo info;
ISchedulerNode * parent = nullptr;
EventId activation_event_id = 0; // Valid for `ISchedulerNode` placed in EventQueue::activations
/// Introspection
std::atomic<UInt64> dequeued_requests{0};
std::atomic<UInt64> canceled_requests{0};
std::atomic<ResourceCost> dequeued_cost{0};
std::atomic<ResourceCost> canceled_cost{0};
std::atomic<UInt64> busy_periods{0};
};
using SchedulerNodePtr = std::shared_ptr<ISchedulerNode>;
/* /*
* Simple waitable thread-safe FIFO task queue. * Simple waitable thread-safe FIFO task queue.
* Intended to hold postponed events for later handling (usually by scheduler thread). * Intended to hold postponed events for later handling (usually by scheduler thread).
@ -89,57 +200,70 @@ struct SchedulerNodeInfo
class EventQueue class EventQueue
{ {
public: public:
using Event = std::function<void()>; using Task = std::function<void()>;
static constexpr EventId not_postponed = 0;
using TimePoint = std::chrono::system_clock::time_point; using TimePoint = std::chrono::system_clock::time_point;
using Duration = std::chrono::system_clock::duration; using Duration = std::chrono::system_clock::duration;
static constexpr UInt64 not_postponed = 0;
struct Event
{
const EventId event_id;
Task task;
Event(EventId event_id_, Task && task_)
: event_id(event_id_)
, task(std::move(task_))
{}
};
struct Postponed struct Postponed
{ {
TimePoint key; TimePoint key;
UInt64 id; // for canceling EventId event_id; // for canceling
std::unique_ptr<Event> event; std::unique_ptr<Task> task;
Postponed(TimePoint key_, UInt64 id_, Event && event_) Postponed(TimePoint key_, EventId event_id_, Task && task_)
: key(key_) : key(key_)
, id(id_) , event_id(event_id_)
, event(std::make_unique<Event>(std::move(event_))) , task(std::make_unique<Task>(std::move(task_)))
{} {}
bool operator<(const Postponed & rhs) const bool operator<(const Postponed & rhs) const
{ {
return std::tie(key, id) > std::tie(rhs.key, rhs.id); // reversed for min-heap return std::tie(key, event_id) > std::tie(rhs.key, rhs.event_id); // reversed for min-heap
} }
}; };
/// Add an `event` to be processed after `until` time point. /// Add an `event` to be processed after `until` time point.
/// Returns a unique id for canceling. /// Returns a unique event id for canceling.
[[nodiscard]] UInt64 postpone(TimePoint until, Event && event) [[nodiscard]] EventId postpone(TimePoint until, Task && task)
{ {
std::unique_lock lock{mutex}; std::unique_lock lock{mutex};
if (postponed.empty() || until < postponed.front().key) if (postponed.empty() || until < postponed.front().key)
pending.notify_one(); pending.notify_one();
auto id = ++last_id; auto event_id = ++last_event_id;
postponed.emplace_back(until, id, std::move(event)); postponed.emplace_back(until, event_id, std::move(task));
std::push_heap(postponed.begin(), postponed.end()); std::push_heap(postponed.begin(), postponed.end());
return id; return event_id;
} }
/// Cancel a postponed event using its unique id. /// Cancel a postponed event using its unique id.
/// NOTE: Only postponed events can be canceled. /// NOTE: Only postponed events can be canceled.
/// NOTE: If you need to cancel enqueued event, consider doing your actions inside another enqueued /// NOTE: If you need to cancel enqueued event, consider doing your actions inside another enqueued
/// NOTE: event instead. This ensures that all previous events are processed. /// NOTE: event instead. This ensures that all previous events are processed.
bool cancelPostponed(UInt64 postponed_id) bool cancelPostponed(EventId postponed_event_id)
{ {
if (postponed_id == not_postponed) if (postponed_event_id == not_postponed)
return false; return false;
std::unique_lock lock{mutex}; std::unique_lock lock{mutex};
for (auto i = postponed.begin(), e = postponed.end(); i != e; ++i) for (auto i = postponed.begin(), e = postponed.end(); i != e; ++i)
{ {
if (i->id == postponed_id) if (i->event_id == postponed_event_id)
{ {
postponed.erase(i); postponed.erase(i);
// It is O(n), but we do not expect either big heaps or frequent cancels. So it is fine. // It is O(n), but we do not expect neither big heaps nor frequent cancels. So it is fine.
std::make_heap(postponed.begin(), postponed.end()); std::make_heap(postponed.begin(), postponed.end());
return true; return true;
} }
@ -148,11 +272,23 @@ public:
} }
/// Add an `event` for immediate processing /// Add an `event` for immediate processing
void enqueue(Event && event) void enqueue(Task && task)
{ {
std::unique_lock lock{mutex}; std::unique_lock lock{mutex};
bool was_empty = queue.empty(); bool was_empty = events.empty() && activations.empty();
queue.emplace_back(event); auto event_id = ++last_event_id;
events.emplace_back(event_id, std::move(task));
if (was_empty)
pending.notify_one();
}
/// Add an activation `event` for immediate processing. Activations use a separate queue for performance reasons.
void enqueueActivation(ISchedulerNode * node)
{
std::unique_lock lock{mutex};
bool was_empty = events.empty() && activations.empty();
node->activation_event_id = ++last_event_id;
activations.push_back(*node);
if (was_empty) if (was_empty)
pending.notify_one(); pending.notify_one();
} }
@ -163,7 +299,7 @@ public:
bool forceProcess() bool forceProcess()
{ {
std::unique_lock lock{mutex}; std::unique_lock lock{mutex};
if (!queue.empty()) if (!events.empty() || !activations.empty())
{ {
processQueue(std::move(lock)); processQueue(std::move(lock));
return true; return true;
@ -181,7 +317,7 @@ public:
bool tryProcess() bool tryProcess()
{ {
std::unique_lock lock{mutex}; std::unique_lock lock{mutex};
if (!queue.empty()) if (!events.empty() || !activations.empty())
{ {
processQueue(std::move(lock)); processQueue(std::move(lock));
return true; return true;
@ -205,7 +341,7 @@ public:
std::unique_lock lock{mutex}; std::unique_lock lock{mutex};
while (true) while (true)
{ {
if (!queue.empty()) if (!events.empty() || !activations.empty())
{ {
processQueue(std::move(lock)); processQueue(std::move(lock));
return; return;
@ -269,141 +405,63 @@ private:
void processQueue(std::unique_lock<std::mutex> && lock) void processQueue(std::unique_lock<std::mutex> && lock)
{ {
Event event = std::move(queue.front()); if (events.empty())
queue.pop_front(); return processActivation(std::move(lock));
if (activations.empty())
return processEvent(std::move(lock));
if (activations.front().activation_event_id < events.front().event_id)
return processActivation(std::move(lock));
else
return processEvent(std::move(lock));
}
void processActivation(std::unique_lock<std::mutex> && lock)
{
ISchedulerNode * node = &activations.front();
activations.pop_front();
node->activation_event_id = 0;
lock.unlock(); // do not hold queue mutex while processing events lock.unlock(); // do not hold queue mutex while processing events
event(); node->parent->activateChild(node);
}
void processEvent(std::unique_lock<std::mutex> && lock)
{
Task task = std::move(events.front().task);
events.pop_front();
lock.unlock(); // do not hold queue mutex while processing events
task();
} }
void processPostponed(std::unique_lock<std::mutex> && lock) void processPostponed(std::unique_lock<std::mutex> && lock)
{ {
Event event = std::move(*postponed.front().event); Task task = std::move(*postponed.front().task);
std::pop_heap(postponed.begin(), postponed.end()); std::pop_heap(postponed.begin(), postponed.end());
postponed.pop_back(); postponed.pop_back();
lock.unlock(); // do not hold queue mutex while processing events lock.unlock(); // do not hold queue mutex while processing events
event(); task();
} }
std::mutex mutex; std::mutex mutex;
std::condition_variable pending; std::condition_variable pending;
std::deque<Event> queue;
// `events` and `activations` logically represent one ordered queue. To preserve the common order we use `EventId`
// Activations are stored in a separate queue for performance reasons (mostly to avoid any allocations)
std::deque<Event> events;
boost::intrusive::list<ISchedulerNode> activations;
std::vector<Postponed> postponed; std::vector<Postponed> postponed;
UInt64 last_id = 0; EventId last_event_id = 0;
std::atomic<TimePoint> manual_time{TimePoint()}; // for tests only std::atomic<TimePoint> manual_time{TimePoint()}; // for tests only
}; };
/* inline void ISchedulerNode::scheduleActivation()
* Node of hierarchy for scheduling requests for resource. Base class for all
* kinds of scheduling elements (queues, policies, constraints and schedulers).
*
* Root node is a scheduler, which has it's thread to dequeue requests,
* execute requests (see ResourceRequest) and process events in a thread-safe manner.
* Immediate children of the scheduler represent independent resources.
* Each resource has it's own hierarchy to achieve required scheduling policies.
* Non-leaf nodes do not hold requests, but keep scheduling state
* (e.g. consumption history, amount of in-flight requests, etc).
* Leafs of hierarchy are queues capable of holding pending requests.
*
* scheduler (SchedulerRoot)
* / \
* constraint constraint (SemaphoreConstraint)
* | |
* policy policy (PriorityPolicy)
* / \ / \
* q1 q2 q3 q4 (FifoQueue)
*
* Dequeueing request from an inner node will dequeue request from one of active leaf-queues in its subtree.
* Node is considered to be active iff:
* - it has at least one pending request in one of leaves of it's subtree;
* - and enforced constraints, if any, are satisfied
* (e.g. amount of concurrent requests is not greater than some number).
*
* All methods must be called only from scheduler thread for thread-safety.
*/
class ISchedulerNode : private boost::noncopyable
{ {
public: if (likely(parent))
explicit ISchedulerNode(EventQueue * event_queue_, const Poco::Util::AbstractConfiguration & config = emptyConfig(), const String & config_prefix = {})
: event_queue(event_queue_)
, info(config, config_prefix)
{}
virtual ~ISchedulerNode() = default;
/// Checks if two nodes configuration is equal
virtual bool equals(ISchedulerNode * other)
{ {
return info.equals(other->info); // The same as `enqueue([this] { parent->activateChild(this); });` but faster
event_queue->enqueueActivation(this);
} }
}
/// Attach new child
virtual void attachChild(const std::shared_ptr<ISchedulerNode> & child) = 0;
/// Detach and destroy child
virtual void removeChild(ISchedulerNode * child) = 0;
/// Get attached child by name
virtual ISchedulerNode * getChild(const String & child_name) = 0;
/// Activation of child due to the first pending request
/// Should be called on leaf node (i.e. queue) to propagate activation signal through chain to the root
virtual void activateChild(ISchedulerNode * child) = 0;
/// Returns true iff node is active
virtual bool isActive() = 0;
/// Returns number of active children
virtual size_t activeChildren() = 0;
/// Returns the first request to be executed as the first component of resulting pair.
/// The second pair component is `true` iff node is still active after dequeueing.
virtual std::pair<ResourceRequest *, bool> dequeueRequest() = 0;
/// Returns full path string using names of every parent
String getPath()
{
String result;
ISchedulerNode * ptr = this;
while (ptr->parent)
{
result = "/" + ptr->basename + result;
ptr = ptr->parent;
}
return result.empty() ? "/" : result;
}
/// Attach to a parent (used by attachChild)
virtual void setParent(ISchedulerNode * parent_)
{
parent = parent_;
}
protected:
/// Notify parents about the first pending request or constraint becoming satisfied.
/// Postponed to be handled in scheduler thread, so it is intended to be called from outside.
void scheduleActivation()
{
if (likely(parent))
{
event_queue->enqueue([this] { parent->activateChild(this); });
}
}
public:
EventQueue * const event_queue;
String basename;
SchedulerNodeInfo info;
ISchedulerNode * parent = nullptr;
/// Introspection
std::atomic<UInt64> dequeued_requests{0};
std::atomic<UInt64> canceled_requests{0};
std::atomic<ResourceCost> dequeued_cost{0};
std::atomic<ResourceCost> canceled_cost{0};
std::atomic<UInt64> busy_periods{0};
};
using SchedulerNodePtr = std::shared_ptr<ISchedulerNode>;
} }

View File

@ -0,0 +1,143 @@
#include <chrono>
#include <gtest/gtest.h>
#include <Common/Scheduler/ISchedulerNode.h>
using namespace DB;
class FakeSchedulerNode : public ISchedulerNode
{
public:
explicit FakeSchedulerNode(String & log_, EventQueue * event_queue_, const Poco::Util::AbstractConfiguration & config = emptyConfig(), const String & config_prefix = {})
: ISchedulerNode(event_queue_, config, config_prefix)
, log(log_)
{}
void attachChild(const SchedulerNodePtr & child) override
{
log += " +" + child->basename;
}
void removeChild(ISchedulerNode * child) override
{
log += " -" + child->basename;
}
ISchedulerNode * getChild(const String & /* child_name */) override
{
return nullptr;
}
void activateChild(ISchedulerNode * child) override
{
log += " A" + child->basename;
}
bool isActive() override
{
return false;
}
size_t activeChildren() override
{
return 0;
}
std::pair<ResourceRequest *, bool> dequeueRequest() override
{
log += " D";
return {nullptr, false};
}
private:
String & log;
};
struct QueueTest {
String log;
EventQueue event_queue;
FakeSchedulerNode root_node;
QueueTest()
: root_node(log, &event_queue)
{}
SchedulerNodePtr makeNode(const String & name)
{
auto node = std::make_shared<FakeSchedulerNode>(log, &event_queue);
node->basename = name;
node->setParent(&root_node);
return std::static_pointer_cast<ISchedulerNode>(node);
}
void process(EventQueue::TimePoint now, const String & expected_log, size_t limit = size_t(-1))
{
event_queue.setManualTime(now);
for (;limit > 0; limit--)
{
if (!event_queue.tryProcess())
break;
}
EXPECT_EQ(log, expected_log);
log.clear();
}
void activate(const SchedulerNodePtr & node)
{
event_queue.enqueueActivation(node.get());
}
void event(const String & text)
{
event_queue.enqueue([this, text] { log += " " + text; });
}
EventId postpone(EventQueue::TimePoint until, const String & text)
{
return event_queue.postpone(until, [this, text] { log += " " + text; });
}
void cancel(EventId event_id)
{
event_queue.cancelPostponed(event_id);
}
};
TEST(SchedulerEventQueue, Smoke)
{
QueueTest t;
using namespace std::chrono_literals;
EventQueue::TimePoint start = std::chrono::system_clock::now();
t.process(start, "", 0);
// Activations
auto node1 = t.makeNode("1");
auto node2 = t.makeNode("2");
t.activate(node2);
t.activate(node1);
t.process(start + 42s, " A2 A1");
// Events
t.event("E1");
t.event("E2");
t.process(start + 100s, " E1 E2");
// Postponed events
t.postpone(start + 200s, "P200");
auto p190 = t.postpone(start + 200s, "P190");
t.postpone(start + 150s, "P150");
t.postpone(start + 175s, "P175");
t.process(start + 180s, " P150 P175");
t.event("E3");
t.cancel(p190);
t.process(start + 300s, " E3 P200");
// Ordering of events and activations
t.event("E1");
t.activate(node1);
t.event("E2");
t.activate(node2);
t.process(start + 300s, " E1 A1 E2 A2");
}

View File

@ -5,8 +5,6 @@
#include <Common/Scheduler/Nodes/FairPolicy.h> #include <Common/Scheduler/Nodes/FairPolicy.h>
#include <Common/Scheduler/Nodes/ThrottlerConstraint.h> #include <Common/Scheduler/Nodes/ThrottlerConstraint.h>
#include "Common/Scheduler/ISchedulerNode.h"
#include "Common/Scheduler/ResourceRequest.h"
using namespace DB; using namespace DB;

View File

@ -85,9 +85,18 @@ StatusFile::StatusFile(std::string path_, FillFunction fill_)
/// Write information about current server instance to the file. /// Write information about current server instance to the file.
WriteBufferFromFileDescriptor out(fd, 1024); WriteBufferFromFileDescriptor out(fd, 1024);
fill(out); try
/// Finalize here to avoid throwing exceptions in destructor. {
out.finalize(); fill(out);
/// Finalize here to avoid throwing exceptions in destructor.
out.finalize();
}
catch (...)
{
/// Finalize in case of exception to avoid throwing exceptions in destructor
out.finalize();
throw;
}
} }
catch (...) catch (...)
{ {

View File

@ -609,7 +609,10 @@ void KeeperStorage::UncommittedState::commit(int64_t commit_zxid)
uncommitted_auth.pop_front(); uncommitted_auth.pop_front();
if (uncommitted_auth.empty()) if (uncommitted_auth.empty())
session_and_auth.erase(add_auth->session_id); session_and_auth.erase(add_auth->session_id);
}
else if (auto * close_session = std::get_if<CloseSessionDelta>(&front_delta.operation))
{
closed_sessions.erase(close_session->session_id);
} }
deltas.pop_front(); deltas.pop_front();
@ -682,6 +685,10 @@ void KeeperStorage::UncommittedState::rollback(int64_t rollback_zxid)
session_and_auth.erase(add_auth->session_id); session_and_auth.erase(add_auth->session_id);
} }
} }
else if (auto * close_session = std::get_if<CloseSessionDelta>(&delta_it->operation))
{
closed_sessions.erase(close_session->session_id);
}
} }
if (delta_it == deltas.rend()) if (delta_it == deltas.rend())
@ -878,6 +885,10 @@ Coordination::Error KeeperStorage::commit(int64_t commit_zxid)
session_and_auth[operation.session_id].emplace_back(std::move(operation.auth_id)); session_and_auth[operation.session_id].emplace_back(std::move(operation.auth_id));
return Coordination::Error::ZOK; return Coordination::Error::ZOK;
} }
else if constexpr (std::same_as<DeltaType, KeeperStorage::CloseSessionDelta>)
{
return Coordination::Error::ZOK;
}
else else
{ {
// shouldn't be called in any process functions // shouldn't be called in any process functions
@ -2366,12 +2377,15 @@ void KeeperStorage::preprocessRequest(
ephemerals.erase(session_ephemerals); ephemerals.erase(session_ephemerals);
} }
new_deltas.emplace_back(transaction.zxid, CloseSessionDelta{session_id});
uncommitted_state.closed_sessions.insert(session_id);
new_digest = calculateNodesDigest(new_digest, new_deltas); new_digest = calculateNodesDigest(new_digest, new_deltas);
return; return;
} }
if (check_acl && !request_processor->checkAuth(*this, session_id, false)) if ((check_acl && !request_processor->checkAuth(*this, session_id, false)) ||
uncommitted_state.closed_sessions.contains(session_id)) // Is session closed but not committed yet
{ {
uncommitted_state.deltas.emplace_back(new_last_zxid, Coordination::Error::ZNOAUTH); uncommitted_state.deltas.emplace_back(new_last_zxid, Coordination::Error::ZNOAUTH);
return; return;

View File

@ -314,8 +314,13 @@ public:
AuthID auth_id; AuthID auth_id;
}; };
struct CloseSessionDelta
{
int64_t session_id;
};
using Operation = std:: using Operation = std::
variant<CreateNodeDelta, RemoveNodeDelta, UpdateNodeDelta, SetACLDelta, AddAuthDelta, ErrorDelta, SubDeltaEnd, FailedMultiDelta>; variant<CreateNodeDelta, RemoveNodeDelta, UpdateNodeDelta, SetACLDelta, AddAuthDelta, ErrorDelta, SubDeltaEnd, FailedMultiDelta, CloseSessionDelta>;
struct Delta struct Delta
{ {
@ -351,6 +356,7 @@ public:
std::shared_ptr<Node> tryGetNodeFromStorage(StringRef path) const; std::shared_ptr<Node> tryGetNodeFromStorage(StringRef path) const;
std::unordered_map<int64_t, std::list<const AuthID *>> session_and_auth; std::unordered_map<int64_t, std::list<const AuthID *>> session_and_auth;
std::unordered_set<int64_t> closed_sessions;
struct UncommittedNode struct UncommittedNode
{ {

View File

@ -2019,6 +2019,67 @@ TEST_P(CoordinationTest, TestCreateNodeWithAuthSchemeForAclWhenAuthIsPrecommitte
EXPECT_EQ(acls[0].permissions, 31); EXPECT_EQ(acls[0].permissions, 31);
} }
TEST_P(CoordinationTest, TestPreprocessWhenCloseSessionIsPrecommitted)
{
using namespace Coordination;
using namespace DB;
ChangelogDirTest snapshots("./snapshots");
setSnapshotDirectory("./snapshots");
ResponsesQueue queue(std::numeric_limits<size_t>::max());
SnapshotsQueue snapshots_queue{1};
int64_t session_id = 1;
size_t term = 0;
auto state_machine = std::make_shared<KeeperStateMachine>(queue, snapshots_queue, keeper_context, nullptr);
state_machine->init();
auto & storage = state_machine->getStorageUnsafe();
const auto & uncommitted_state = storage.uncommitted_state;
// Create first node for the session
String node_path_1 = "/node_1";
std::shared_ptr<ZooKeeperCreateRequest> create_req_1 = std::make_shared<ZooKeeperCreateRequest>();
create_req_1->path = node_path_1;
auto create_entry_1 = getLogEntryFromZKRequest(term, session_id, state_machine->getNextZxid(), create_req_1);
state_machine->pre_commit(1, create_entry_1->get_buf());
EXPECT_TRUE(uncommitted_state.nodes.contains(node_path_1));
state_machine->commit(1, create_entry_1->get_buf());
EXPECT_TRUE(storage.container.contains(node_path_1));
// Close session
std::shared_ptr<ZooKeeperCloseRequest> close_req = std::make_shared<ZooKeeperCloseRequest>();
auto close_entry = getLogEntryFromZKRequest(term, session_id, state_machine->getNextZxid(), close_req);
// Pre-commit close session
state_machine->pre_commit(2, close_entry->get_buf());
// Try to create second node after close session is pre-committed
String node_path_2 = "/node_2";
std::shared_ptr<ZooKeeperCreateRequest> create_req_2 = std::make_shared<ZooKeeperCreateRequest>();
create_req_2->path = node_path_2;
auto create_entry_2 = getLogEntryFromZKRequest(term, session_id, state_machine->getNextZxid(), create_req_2);
// Pre-commit creating second node
state_machine->pre_commit(3, create_entry_2->get_buf());
// Second node wasn't created
EXPECT_FALSE(uncommitted_state.nodes.contains(node_path_2));
// Rollback pre-committed closing session
state_machine->rollback(3, create_entry_2->get_buf());
state_machine->rollback(2, close_entry->get_buf());
// Pre-commit creating second node
state_machine->pre_commit(2, create_entry_2->get_buf());
// Now second node was created
EXPECT_TRUE(uncommitted_state.nodes.contains(node_path_2));
state_machine->commit(2, create_entry_2->get_buf());
EXPECT_TRUE(storage.container.contains(node_path_1));
EXPECT_TRUE(storage.container.contains(node_path_2));
}
TEST_P(CoordinationTest, TestSetACLWithAuthSchemeForAclWhenAuthIsPrecommitted) TEST_P(CoordinationTest, TestSetACLWithAuthSchemeForAclWhenAuthIsPrecommitted)
{ {
using namespace Coordination; using namespace Coordination;

View File

@ -146,6 +146,8 @@ namespace DB
M(UInt64, global_profiler_real_time_period_ns, 0, "Period for real clock timer of global profiler (in nanoseconds). Set 0 value to turn off the real clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ M(UInt64, global_profiler_real_time_period_ns, 0, "Period for real clock timer of global profiler (in nanoseconds). Set 0 value to turn off the real clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \
M(UInt64, global_profiler_cpu_time_period_ns, 0, "Period for CPU clock timer of global profiler (in nanoseconds). Set 0 value to turn off the CPU clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ M(UInt64, global_profiler_cpu_time_period_ns, 0, "Period for CPU clock timer of global profiler (in nanoseconds). Set 0 value to turn off the CPU clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \
M(Bool, enable_azure_sdk_logging, false, "Enables logging from Azure sdk", 0) \ M(Bool, enable_azure_sdk_logging, false, "Enables logging from Azure sdk", 0) \
M(String, merge_workload, "default", "Name of workload to be used to access resources for all merges (may be overridden by a merge tree setting)", 0) \
M(String, mutation_workload, "default", "Name of workload to be used to access resources for all mutations (may be overridden by a merge tree setting)", 0) \
M(Double, gwp_asan_force_sample_probability, 0, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \ M(Double, gwp_asan_force_sample_probability, 0, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \
/// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp

View File

@ -1144,7 +1144,9 @@ class IColumn;
M(UInt64, output_format_pretty_max_value_width, 10000, "Maximum width of value to display in Pretty formats. If greater - it will be cut.", 0) \ M(UInt64, output_format_pretty_max_value_width, 10000, "Maximum width of value to display in Pretty formats. If greater - it will be cut.", 0) \
M(UInt64, output_format_pretty_max_value_width_apply_for_single_value, false, "Only cut values (see the `output_format_pretty_max_value_width` setting) when it is not a single value in a block. Otherwise output it entirely, which is useful for the `SHOW CREATE TABLE` query.", 0) \ M(UInt64, output_format_pretty_max_value_width_apply_for_single_value, false, "Only cut values (see the `output_format_pretty_max_value_width` setting) when it is not a single value in a block. Otherwise output it entirely, which is useful for the `SHOW CREATE TABLE` query.", 0) \
M(UInt64Auto, output_format_pretty_color, "auto", "Use ANSI escape sequences in Pretty formats. 0 - disabled, 1 - enabled, 'auto' - enabled if a terminal.", 0) \ M(UInt64Auto, output_format_pretty_color, "auto", "Use ANSI escape sequences in Pretty formats. 0 - disabled, 1 - enabled, 'auto' - enabled if a terminal.", 0) \
M(String, output_format_pretty_grid_charset, "UTF-8", "Charset for printing grid borders. Available charsets: ASCII, UTF-8 (default one).", 0) \ M(String, output_format_pretty_grid_charset, "UTF-8", "Charset for printing grid borders. Available charsets: ASCII, UTF-8 (default one).", 0) \
M(UInt64, output_format_pretty_display_footer_column_names, true, "Display column names in the footer if there are 999 or more rows.", 0) \
M(UInt64, output_format_pretty_display_footer_column_names_min_rows, 50, "Sets the minimum threshold value of rows for which to enable displaying column names in the footer. 50 (default)", 0) \
M(UInt64, output_format_parquet_row_group_size, 1000000, "Target row group size in rows.", 0) \ M(UInt64, output_format_parquet_row_group_size, 1000000, "Target row group size in rows.", 0) \
M(UInt64, output_format_parquet_row_group_size_bytes, 512 * 1024 * 1024, "Target row group size in bytes, before compression.", 0) \ M(UInt64, output_format_parquet_row_group_size_bytes, 512 * 1024 * 1024, "Target row group size in bytes, before compression.", 0) \
M(Bool, output_format_parquet_string_as_string, true, "Use Parquet String type instead of Binary for String columns.", 0) \ M(Bool, output_format_parquet_string_as_string, true, "Use Parquet String type instead of Binary for String columns.", 0) \

View File

@ -108,6 +108,8 @@ static const std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges
{"enable_vertical_final", false, true, "Enable vertical final by default again after fixing bug"}, {"enable_vertical_final", false, true, "Enable vertical final by default again after fixing bug"},
{"parallel_replicas_custom_key_range_lower", 0, 0, "Add settings to control the range filter when using parallel replicas with dynamic shards"}, {"parallel_replicas_custom_key_range_lower", 0, 0, "Add settings to control the range filter when using parallel replicas with dynamic shards"},
{"parallel_replicas_custom_key_range_upper", 0, 0, "Add settings to control the range filter when using parallel replicas with dynamic shards. A value of 0 disables the upper limit"}, {"parallel_replicas_custom_key_range_upper", 0, 0, "Add settings to control the range filter when using parallel replicas with dynamic shards. A value of 0 disables the upper limit"},
{"output_format_pretty_display_footer_column_names", 0, 1, "Add a setting to display column names in the footer if there are many rows. Threshold value is controlled by output_format_pretty_display_footer_column_names_min_rows."},
{"output_format_pretty_display_footer_column_names_min_rows", 0, 50, "Add a setting to control the threshold value for setting output_format_pretty_display_footer_column_names_min_rows. Default 50."},
{"output_format_csv_serialize_tuple_into_separate_columns", true, true, "A new way of how interpret tuples in CSV format was added."}, {"output_format_csv_serialize_tuple_into_separate_columns", true, true, "A new way of how interpret tuples in CSV format was added."},
{"input_format_csv_deserialize_separate_columns_into_tuple", true, true, "A new way of how interpret tuples in CSV format was added."}, {"input_format_csv_deserialize_separate_columns_into_tuple", true, true, "A new way of how interpret tuples in CSV format was added."},
{"input_format_csv_try_infer_strings_from_quoted_tuples", true, true, "A new way of how interpret tuples in CSV format was added."}, {"input_format_csv_try_infer_strings_from_quoted_tuples", true, true, "A new way of how interpret tuples in CSV format was added."},

View File

@ -171,7 +171,7 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams(
} }
/// If we started to read a new column, reinitialize variant column in deserialization state. /// If we started to read a new column, reinitialize variant column in deserialization state.
if (!variant_element_state->variant || result_column->empty()) if (!variant_element_state->variant || mutable_column->empty())
{ {
variant_element_state->variant = mutable_column->cloneEmpty(); variant_element_state->variant = mutable_column->cloneEmpty();

View File

@ -175,8 +175,7 @@ Columns DirectDictionary<dictionary_key_type>::getColumns(
if (!mask_filled) if (!mask_filled)
(*default_mask)[requested_key_index] = 1; (*default_mask)[requested_key_index] = 1;
Field value{}; result_column->insertDefault();
result_column->insert(value);
} }
else else
{ {

View File

@ -181,6 +181,8 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
format_settings.pretty.highlight_digit_groups = settings.output_format_pretty_highlight_digit_groups; format_settings.pretty.highlight_digit_groups = settings.output_format_pretty_highlight_digit_groups;
format_settings.pretty.output_format_pretty_row_numbers = settings.output_format_pretty_row_numbers; format_settings.pretty.output_format_pretty_row_numbers = settings.output_format_pretty_row_numbers;
format_settings.pretty.output_format_pretty_single_large_number_tip_threshold = settings.output_format_pretty_single_large_number_tip_threshold; format_settings.pretty.output_format_pretty_single_large_number_tip_threshold = settings.output_format_pretty_single_large_number_tip_threshold;
format_settings.pretty.output_format_pretty_display_footer_column_names = settings.output_format_pretty_display_footer_column_names;
format_settings.pretty.output_format_pretty_display_footer_column_names_min_rows = settings.output_format_pretty_display_footer_column_names_min_rows;
format_settings.protobuf.input_flatten_google_wrappers = settings.input_format_protobuf_flatten_google_wrappers; format_settings.protobuf.input_flatten_google_wrappers = settings.input_format_protobuf_flatten_google_wrappers;
format_settings.protobuf.output_nullables_with_google_wrappers = settings.output_format_protobuf_nullables_with_google_wrappers; format_settings.protobuf.output_nullables_with_google_wrappers = settings.output_format_protobuf_nullables_with_google_wrappers;
format_settings.protobuf.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference; format_settings.protobuf.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference;

View File

@ -289,6 +289,8 @@ struct FormatSettings
bool output_format_pretty_row_numbers = false; bool output_format_pretty_row_numbers = false;
UInt64 output_format_pretty_single_large_number_tip_threshold = 1'000'000; UInt64 output_format_pretty_single_large_number_tip_threshold = 1'000'000;
UInt64 output_format_pretty_display_footer_column_names = 1;
UInt64 output_format_pretty_display_footer_column_names_min_rows = 50;
enum class Charset : uint8_t enum class Charset : uint8_t
{ {

View File

@ -113,6 +113,36 @@ struct ByteHammingDistanceImpl
} }
}; };
void parseUTF8String(const char * __restrict data, size_t size, std::function<void(UInt32)> utf8_consumer, std::function<void(unsigned char)> ascii_consumer = nullptr)
{
const char * end = data + size;
while (data < end)
{
size_t len = UTF8::seqLength(*data);
if (len == 1)
{
if (ascii_consumer)
ascii_consumer(static_cast<unsigned char>(*data));
else
utf8_consumer(static_cast<UInt32>(*data));
++data;
}
else
{
auto code_point = UTF8::convertUTF8ToCodePoint(data, end - data);
if (code_point.has_value())
{
utf8_consumer(code_point.value());
data += len;
}
else
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal UTF-8 sequence, while processing '{}'", StringRef(data, end - data));
}
}
}
}
template <bool is_utf8> template <bool is_utf8>
struct ByteJaccardIndexImpl struct ByteJaccardIndexImpl
{ {
@ -138,57 +168,28 @@ struct ByteJaccardIndexImpl
haystack_set.fill(0); haystack_set.fill(0);
needle_set.fill(0); needle_set.fill(0);
while (haystack < haystack_end) if constexpr (is_utf8)
{ {
size_t len = 1; parseUTF8String(
if constexpr (is_utf8) haystack,
len = UTF8::seqLength(*haystack); haystack_size,
[&](UInt32 data) { haystack_utf8_set.insert(data); },
if (len == 1) [&](unsigned char data) { haystack_set[data] = 1; });
parseUTF8String(
needle, needle_size, [&](UInt32 data) { needle_utf8_set.insert(data); }, [&](unsigned char data) { needle_set[data] = 1; });
}
else
{
while (haystack < haystack_end)
{ {
haystack_set[static_cast<unsigned char>(*haystack)] = 1; haystack_set[static_cast<unsigned char>(*haystack)] = 1;
++haystack; ++haystack;
} }
else while (needle < needle_end)
{
auto code_point = UTF8::convertUTF8ToCodePoint(haystack, haystack_end - haystack);
if (code_point.has_value())
{
haystack_utf8_set.insert(code_point.value());
haystack += len;
}
else
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal UTF-8 sequence, while processing '{}'", StringRef(haystack, haystack_end - haystack));
}
}
}
while (needle < needle_end)
{
size_t len = 1;
if constexpr (is_utf8)
len = UTF8::seqLength(*needle);
if (len == 1)
{ {
needle_set[static_cast<unsigned char>(*needle)] = 1; needle_set[static_cast<unsigned char>(*needle)] = 1;
++needle; ++needle;
} }
else
{
auto code_point = UTF8::convertUTF8ToCodePoint(needle, needle_end - needle);
if (code_point.has_value())
{
needle_utf8_set.insert(code_point.value());
needle += len;
}
else
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal UTF-8 sequence, while processing '{}'", StringRef(needle, needle_end - needle));
}
}
} }
UInt8 intersection = 0; UInt8 intersection = 0;
@ -226,6 +227,7 @@ struct ByteJaccardIndexImpl
static constexpr size_t max_string_size = 1u << 16; static constexpr size_t max_string_size = 1u << 16;
template<bool is_utf8>
struct ByteEditDistanceImpl struct ByteEditDistanceImpl
{ {
using ResultType = UInt64; using ResultType = UInt64;
@ -242,6 +244,16 @@ struct ByteEditDistanceImpl
ErrorCodes::TOO_LARGE_STRING_SIZE, ErrorCodes::TOO_LARGE_STRING_SIZE,
"The string size is too big for function editDistance, should be at most {}", max_string_size); "The string size is too big for function editDistance, should be at most {}", max_string_size);
PaddedPODArray<UInt32> haystack_utf8;
PaddedPODArray<UInt32> needle_utf8;
if constexpr (is_utf8)
{
parseUTF8String(haystack, haystack_size, [&](UInt32 data) { haystack_utf8.push_back(data); });
parseUTF8String(needle, needle_size, [&](UInt32 data) { needle_utf8.push_back(data); });
haystack_size = haystack_utf8.size();
needle_size = needle_utf8.size();
}
PaddedPODArray<ResultType> distances0(haystack_size + 1, 0); PaddedPODArray<ResultType> distances0(haystack_size + 1, 0);
PaddedPODArray<ResultType> distances1(haystack_size + 1, 0); PaddedPODArray<ResultType> distances1(haystack_size + 1, 0);
@ -261,9 +273,16 @@ struct ByteEditDistanceImpl
insertion = distances1[pos_haystack] + 1; insertion = distances1[pos_haystack] + 1;
substitution = distances0[pos_haystack]; substitution = distances0[pos_haystack];
if (*(needle + pos_needle) != *(haystack + pos_haystack)) if constexpr (is_utf8)
substitution += 1; {
if (needle_utf8[pos_needle] != haystack_utf8[pos_haystack])
substitution += 1;
}
else
{
if (*(needle + pos_needle) != *(haystack + pos_haystack))
substitution += 1;
}
distances1[pos_haystack + 1] = std::min(deletion, std::min(substitution, insertion)); distances1[pos_haystack + 1] = std::min(deletion, std::min(substitution, insertion));
} }
distances0.swap(distances1); distances0.swap(distances1);
@ -457,7 +476,12 @@ struct NameEditDistance
{ {
static constexpr auto name = "editDistance"; static constexpr auto name = "editDistance";
}; };
using FunctionEditDistance = FunctionsStringSimilarity<FunctionStringDistanceImpl<ByteEditDistanceImpl>, NameEditDistance>; using FunctionEditDistance = FunctionsStringSimilarity<FunctionStringDistanceImpl<ByteEditDistanceImpl<false>>, NameEditDistance>;
struct NameEditDistanceUTF8
{
static constexpr auto name = "editDistanceUTF8";
};
using FunctionEditDistanceUTF8 = FunctionsStringSimilarity<FunctionStringDistanceImpl<ByteEditDistanceImpl<true>>, NameEditDistanceUTF8>;
struct NameDamerauLevenshteinDistance struct NameDamerauLevenshteinDistance
{ {
@ -499,6 +523,10 @@ REGISTER_FUNCTION(StringDistance)
FunctionDocumentation{.description = R"(Calculates the edit distance between two byte-strings.)"}); FunctionDocumentation{.description = R"(Calculates the edit distance between two byte-strings.)"});
factory.registerAlias("levenshteinDistance", NameEditDistance::name); factory.registerAlias("levenshteinDistance", NameEditDistance::name);
factory.registerFunction<FunctionEditDistanceUTF8>(
FunctionDocumentation{.description = R"(Calculates the edit distance between two UTF8 strings.)"});
factory.registerAlias("levenshteinDistanceUTF8", NameEditDistanceUTF8::name);
factory.registerFunction<FunctionDamerauLevenshteinDistance>( factory.registerFunction<FunctionDamerauLevenshteinDistance>(
FunctionDocumentation{.description = R"(Calculates the Damerau-Levenshtein distance two between two byte-string.)"}); FunctionDocumentation{.description = R"(Calculates the Damerau-Levenshtein distance two between two byte-string.)"});

View File

@ -535,7 +535,7 @@ void PocoHTTPClient::makeRequestInternalImpl(
const static std::string_view needle = "<Error>"; const static std::string_view needle = "<Error>";
if (auto it = std::search(response_string.begin(), response_string.end(), std::default_searcher(needle.begin(), needle.end())); it != response_string.end()) if (auto it = std::search(response_string.begin(), response_string.end(), std::default_searcher(needle.begin(), needle.end())); it != response_string.end())
{ {
LOG_WARNING(log, "Response for request contain <Error> tag in body, settings internal server error (500 code)"); LOG_WARNING(log, "Response for the request contains an <Error> tag in the body, will treat it as an internal server error (code 500)");
response->SetResponseCode(Aws::Http::HttpResponseCode::INTERNAL_SERVER_ERROR); response->SetResponseCode(Aws::Http::HttpResponseCode::INTERNAL_SERVER_ERROR);
addMetric(request, S3MetricType::Errors); addMetric(request, S3MetricType::Errors);

View File

@ -77,7 +77,15 @@ WriteBufferFromFile::~WriteBufferFromFile()
if (fd < 0) if (fd < 0)
return; return;
finalize(); try
{
finalize();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
int err = ::close(fd); int err = ::close(fd);
/// Everything except for EBADF should be ignored in dtor, since all of /// Everything except for EBADF should be ignored in dtor, since all of
/// others (EINTR/EIO/ENOSPC/EDQUOT) could be possible during writing to /// others (EINTR/EIO/ENOSPC/EDQUOT) could be possible during writing to

View File

@ -105,7 +105,14 @@ WriteBufferFromFileDescriptor::WriteBufferFromFileDescriptor(
WriteBufferFromFileDescriptor::~WriteBufferFromFileDescriptor() WriteBufferFromFileDescriptor::~WriteBufferFromFileDescriptor()
{ {
finalize(); try
{
finalize();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
} }
void WriteBufferFromFileDescriptor::finalizeImpl() void WriteBufferFromFileDescriptor::finalizeImpl()

View File

@ -4,6 +4,7 @@
#include <Common/typeid_cast.h> #include <Common/typeid_cast.h>
#include <Common/FieldVisitorsAccurateComparison.h> #include <Common/FieldVisitorsAccurateComparison.h>
#include <Common/checkStackSize.h> #include <Common/checkStackSize.h>
#include <Common/assert_cast.h>
#include <Core/ColumnNumbers.h> #include <Core/ColumnNumbers.h>
#include <Core/ColumnWithTypeAndName.h> #include <Core/ColumnWithTypeAndName.h>
@ -102,7 +103,7 @@ static size_t getTypeDepth(const DataTypePtr & type)
/// 33.33 in the set is converted to 33.3, but it is not equal to 33.3 in the column, so the result should still be empty. /// 33.33 in the set is converted to 33.3, but it is not equal to 33.3 in the column, so the result should still be empty.
/// We can not include values that don't represent any possible value from the type of filtered column to the set. /// We can not include values that don't represent any possible value from the type of filtered column to the set.
template<typename Collection> template<typename Collection>
static Block createBlockFromCollection(const Collection & collection, const DataTypes & types, bool transform_null_in) static Block createBlockFromCollection(const Collection & collection, const DataTypes & value_types, const DataTypes & types, bool transform_null_in)
{ {
size_t columns_num = types.size(); size_t columns_num = types.size();
MutableColumns columns(columns_num); MutableColumns columns(columns_num);
@ -113,11 +114,12 @@ static Block createBlockFromCollection(const Collection & collection, const Data
} }
Row tuple_values; Row tuple_values;
for (const auto & value : collection) for (size_t collection_index = 0; collection_index < collection.size(); ++collection_index)
{ {
const auto& value = collection[collection_index];
if (columns_num == 1) if (columns_num == 1)
{ {
auto field = convertFieldToTypeStrict(value, *types[0]); auto field = convertFieldToTypeStrict(value, *value_types[collection_index], *types[0]);
bool need_insert_null = transform_null_in && types[0]->isNullable(); bool need_insert_null = transform_null_in && types[0]->isNullable();
if (field && (!field->isNull() || need_insert_null)) if (field && (!field->isNull() || need_insert_null))
columns[0]->insert(*field); columns[0]->insert(*field);
@ -130,7 +132,6 @@ static Block createBlockFromCollection(const Collection & collection, const Data
const auto & tuple = value.template get<const Tuple &>(); const auto & tuple = value.template get<const Tuple &>();
size_t tuple_size = tuple.size(); size_t tuple_size = tuple.size();
if (tuple_size != columns_num) if (tuple_size != columns_num)
throw Exception(ErrorCodes::INCORRECT_ELEMENT_OF_SET, "Incorrect size of tuple in set: {} instead of {}", throw Exception(ErrorCodes::INCORRECT_ELEMENT_OF_SET, "Incorrect size of tuple in set: {} instead of {}",
tuple_size, columns_num); tuple_size, columns_num);
@ -138,10 +139,13 @@ static Block createBlockFromCollection(const Collection & collection, const Data
if (tuple_values.empty()) if (tuple_values.empty())
tuple_values.resize(tuple_size); tuple_values.resize(tuple_size);
const DataTypePtr & value_type = value_types[collection_index];
const DataTypes & tuple_value_type = typeid_cast<const DataTypeTuple *>(value_type.get())->getElements();
size_t i = 0; size_t i = 0;
for (; i < tuple_size; ++i) for (; i < tuple_size; ++i)
{ {
auto converted_field = convertFieldToTypeStrict(tuple[i], *types[i]); auto converted_field = convertFieldToTypeStrict(tuple[i], *tuple_value_type[i], *types[i]);
if (!converted_field) if (!converted_field)
break; break;
tuple_values[i] = std::move(*converted_field); tuple_values[i] = std::move(*converted_field);
@ -317,16 +321,25 @@ Block createBlockForSet(
if (left_type_depth == right_type_depth) if (left_type_depth == right_type_depth)
{ {
Array array{right_arg_value}; Array array{right_arg_value};
block = createBlockFromCollection(array, set_element_types, tranform_null_in); DataTypes value_types{right_arg_type};
block = createBlockFromCollection(array, value_types, set_element_types, tranform_null_in);
} }
/// 1 in (1, 2); (1, 2) in ((1, 2), (3, 4)); etc. /// 1 in (1, 2); (1, 2) in ((1, 2), (3, 4)); etc.
else if (left_type_depth + 1 == right_type_depth) else if (left_type_depth + 1 == right_type_depth)
{ {
auto type_index = right_arg_type->getTypeId(); auto type_index = right_arg_type->getTypeId();
if (type_index == TypeIndex::Tuple) if (type_index == TypeIndex::Tuple)
block = createBlockFromCollection(right_arg_value.get<const Tuple &>(), set_element_types, tranform_null_in); {
const DataTypes & value_types = assert_cast<const DataTypeTuple *>(right_arg_type.get())->getElements();
block = createBlockFromCollection(right_arg_value.get<const Tuple &>(), value_types, set_element_types, tranform_null_in);
}
else if (type_index == TypeIndex::Array) else if (type_index == TypeIndex::Array)
block = createBlockFromCollection(right_arg_value.get<const Array &>(), set_element_types, tranform_null_in); {
const auto* right_arg_array_type = assert_cast<const DataTypeArray *>(right_arg_type.get());
size_t right_arg_array_size = right_arg_value.get<const Array &>().size();
DataTypes value_types(right_arg_array_size, right_arg_array_type->getNestedType());
block = createBlockFromCollection(right_arg_value.get<const Array &>(), value_types, set_element_types, tranform_null_in);
}
else else
throw_unsupported_type(right_arg_type); throw_unsupported_type(right_arg_type);
} }

View File

@ -281,6 +281,8 @@ struct ContextSharedPart : boost::noncopyable
String default_profile_name; /// Default profile name used for default values. String default_profile_name; /// Default profile name used for default values.
String system_profile_name; /// Profile used by system processes String system_profile_name; /// Profile used by system processes
String buffer_profile_name; /// Profile used by Buffer engine for flushing to the underlying String buffer_profile_name; /// Profile used by Buffer engine for flushing to the underlying
String merge_workload TSA_GUARDED_BY(mutex); /// Workload setting value that is used by all merges
String mutation_workload TSA_GUARDED_BY(mutex); /// Workload setting value that is used by all mutations
std::unique_ptr<AccessControl> access_control TSA_GUARDED_BY(mutex); std::unique_ptr<AccessControl> access_control TSA_GUARDED_BY(mutex);
mutable OnceFlag resource_manager_initialized; mutable OnceFlag resource_manager_initialized;
mutable ResourceManagerPtr resource_manager; mutable ResourceManagerPtr resource_manager;
@ -1561,11 +1563,36 @@ ResourceManagerPtr Context::getResourceManager() const
ClassifierPtr Context::getWorkloadClassifier() const ClassifierPtr Context::getWorkloadClassifier() const
{ {
std::lock_guard lock(mutex); std::lock_guard lock(mutex);
// NOTE: Workload cannot be changed after query start, and getWorkloadClassifier() should not be called before proper `workload` is set
if (!classifier) if (!classifier)
classifier = getResourceManager()->acquire(getSettingsRef().workload); classifier = getResourceManager()->acquire(getSettingsRef().workload);
return classifier; return classifier;
} }
String Context::getMergeWorkload() const
{
SharedLockGuard lock(shared->mutex);
return shared->merge_workload;
}
void Context::setMergeWorkload(const String & value)
{
std::lock_guard lock(shared->mutex);
shared->merge_workload = value;
}
String Context::getMutationWorkload() const
{
SharedLockGuard lock(shared->mutex);
return shared->mutation_workload;
}
void Context::setMutationWorkload(const String & value)
{
std::lock_guard lock(shared->mutex);
shared->mutation_workload = value;
}
Scalars Context::getScalars() const Scalars Context::getScalars() const
{ {
@ -2513,6 +2540,20 @@ void Context::makeQueryContext()
backups_query_throttler.reset(); backups_query_throttler.reset();
} }
void Context::makeQueryContextForMerge(const MergeTreeSettings & merge_tree_settings)
{
makeQueryContext();
classifier.reset(); // It is assumed that there are no active queries running using this classifier, otherwise this will lead to crashes
settings.workload = merge_tree_settings.merge_workload.value.empty() ? getMergeWorkload() : merge_tree_settings.merge_workload;
}
void Context::makeQueryContextForMutate(const MergeTreeSettings & merge_tree_settings)
{
makeQueryContext();
classifier.reset(); // It is assumed that there are no active queries running using this classifier, otherwise this will lead to crashes
settings.workload = merge_tree_settings.mutation_workload.value.empty() ? getMutationWorkload() : merge_tree_settings.mutation_workload;
}
void Context::makeSessionContext() void Context::makeSessionContext()
{ {
session_context = shared_from_this(); session_context = shared_from_this();

View File

@ -622,6 +622,10 @@ public:
/// Resource management related /// Resource management related
ResourceManagerPtr getResourceManager() const; ResourceManagerPtr getResourceManager() const;
ClassifierPtr getWorkloadClassifier() const; ClassifierPtr getWorkloadClassifier() const;
String getMergeWorkload() const;
void setMergeWorkload(const String & value);
String getMutationWorkload() const;
void setMutationWorkload(const String & value);
/// We have to copy external tables inside executeQuery() to track limits. Therefore, set callback for it. Must set once. /// We have to copy external tables inside executeQuery() to track limits. Therefore, set callback for it. Must set once.
void setExternalTablesInitializer(ExternalTablesInitializer && initializer); void setExternalTablesInitializer(ExternalTablesInitializer && initializer);
@ -907,6 +911,8 @@ public:
void setSessionContext(ContextMutablePtr context_) { session_context = context_; } void setSessionContext(ContextMutablePtr context_) { session_context = context_; }
void makeQueryContext(); void makeQueryContext();
void makeQueryContextForMerge(const MergeTreeSettings & merge_tree_settings);
void makeQueryContextForMutate(const MergeTreeSettings & merge_tree_settings);
void makeSessionContext(); void makeSessionContext();
void makeGlobalContext(); void makeGlobalContext();

View File

@ -615,9 +615,9 @@ static bool decimalEqualsFloat(Field field, Float64 float_value)
return decimal_to_float == float_value; return decimal_to_float == float_value;
} }
std::optional<Field> convertFieldToTypeStrict(const Field & from_value, const IDataType & to_type) std::optional<Field> convertFieldToTypeStrict(const Field & from_value, const IDataType & from_type, const IDataType & to_type)
{ {
Field result_value = convertFieldToType(from_value, to_type); Field result_value = convertFieldToType(from_value, to_type, &from_type);
if (Field::isDecimal(from_value.getType()) && Field::isDecimal(result_value.getType())) if (Field::isDecimal(from_value.getType()) && Field::isDecimal(result_value.getType()))
{ {

View File

@ -22,6 +22,6 @@ Field convertFieldToTypeOrThrow(const Field & from_value, const IDataType & to_t
/// Applies stricter rules than convertFieldToType, doesn't allow loss of precision converting to Decimal. /// Applies stricter rules than convertFieldToType, doesn't allow loss of precision converting to Decimal.
/// Returns `Field` if the conversion was successful and the result is equal to the original value, otherwise returns nullopt. /// Returns `Field` if the conversion was successful and the result is equal to the original value, otherwise returns nullopt.
std::optional<Field> convertFieldToTypeStrict(const Field & from_value, const IDataType & to_type); std::optional<Field> convertFieldToTypeStrict(const Field & from_value, const IDataType & from_type, const IDataType & to_type);
} }

View File

@ -2179,7 +2179,7 @@ public:
bool parse(IParser::Pos & pos, Expected & expected, Action & /*action*/) override bool parse(IParser::Pos & pos, Expected & expected, Action & /*action*/) override
{ {
/// kql(table|project ...) /// kql('table|project ...')
/// 0. Parse the kql query /// 0. Parse the kql query
/// 1. Parse closing token /// 1. Parse closing token
if (state == 0) if (state == 0)

View File

@ -853,7 +853,7 @@ Please note that the functions listed below only take constant parameters for no
## KQL() function ## KQL() function
- create table - create table
`CREATE TABLE kql_table4 ENGINE = Memory AS select *, now() as new_column From kql(Customers | project LastName,Age);` `CREATE TABLE kql_table4 ENGINE = Memory AS select *, now() as new_column From kql($$Customers | project LastName,Age$$);`
verify the content of `kql_table` verify the content of `kql_table`
`select * from kql_table` `select * from kql_table`
@ -867,12 +867,12 @@ Please note that the functions listed below only take constant parameters for no
Age Nullable(UInt8) Age Nullable(UInt8)
) ENGINE = Memory; ) ENGINE = Memory;
``` ```
`INSERT INTO temp select * from kql(Customers|project FirstName,LastName,Age);` `INSERT INTO temp select * from kql($$Customers|project FirstName,LastName,Age$$);`
verify the content of `temp` verify the content of `temp`
`select * from temp` `select * from temp`
- Select from kql() - Select from kql(...)
`Select * from kql(Customers|project FirstName)` `Select * from kql($$Customers|project FirstName$$)`
## KQL operators: ## KQL operators:
- Tabular expression statements - Tabular expression statements
@ -993,4 +993,3 @@ Please note that the functions listed below only take constant parameters for no
- dcount() - dcount()
- dcountif() - dcountif()
- bin - bin

View File

@ -301,8 +301,8 @@ String IParserKQLFunction::kqlCallToExpression(
}); });
const auto kql_call = std::format("{}({})", function_name, params_str); const auto kql_call = std::format("{}({})", function_name, params_str);
DB::Tokens call_tokens(kql_call.c_str(), kql_call.c_str() + kql_call.length()); Tokens call_tokens(kql_call.data(), kql_call.data() + kql_call.length(), 0, true);
DB::IParser::Pos tokens_pos(call_tokens, max_depth, max_backtracks); IParser::Pos tokens_pos(call_tokens, max_depth, max_backtracks);
return DB::IParserKQLFunction::getExpression(tokens_pos); return DB::IParserKQLFunction::getExpression(tokens_pos);
} }

View File

@ -11,7 +11,7 @@ bool ParserKQLDistinct::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
expr = getExprFromToken(pos); expr = getExprFromToken(pos);
Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); Tokens tokens(expr.data(), expr.data() + expr.size(), 0, true);
IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks);
if (!ParserNotEmptyExpressionList(false).parse(new_pos, select_expression_list, expected)) if (!ParserNotEmptyExpressionList(false).parse(new_pos, select_expression_list, expected))

View File

@ -22,7 +22,7 @@ bool ParserKQLExtend ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
String except_str; String except_str;
String new_extend_str; String new_extend_str;
Tokens ntokens(extend_expr.c_str(), extend_expr.c_str() + extend_expr.size()); Tokens ntokens(extend_expr.data(), extend_expr.data() + extend_expr.size(), 0, true);
IParser::Pos npos(ntokens, pos.max_depth, pos.max_backtracks); IParser::Pos npos(ntokens, pos.max_depth, pos.max_backtracks);
String alias; String alias;
@ -76,7 +76,7 @@ bool ParserKQLExtend ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
apply_alias(); apply_alias();
String expr = std::format("SELECT * {}, {} from prev", except_str, new_extend_str); String expr = std::format("SELECT * {}, {} from prev", except_str, new_extend_str);
Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); Tokens tokens(expr.data(), expr.data() + expr.size(), 0, true);
IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks);
if (!ParserSelectQuery().parse(new_pos, select_query, expected)) if (!ParserSelectQuery().parse(new_pos, select_query, expected))

View File

@ -13,7 +13,7 @@ bool ParserKQLFilter::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
String expr = getExprFromToken(pos); String expr = getExprFromToken(pos);
ASTPtr where_expression; ASTPtr where_expression;
Tokens token_filter(expr.c_str(), expr.c_str() + expr.size()); Tokens token_filter(expr.data(), expr.data() + expr.size(), 0, true);
IParser::Pos pos_filter(token_filter, pos.max_depth, pos.max_backtracks); IParser::Pos pos_filter(token_filter, pos.max_depth, pos.max_backtracks);
if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, where_expression, expected)) if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, where_expression, expected))
return false; return false;

View File

@ -13,7 +13,7 @@ bool ParserKQLLimit::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
auto expr = getExprFromToken(pos); auto expr = getExprFromToken(pos);
Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); Tokens tokens(expr.data(), expr.data() + expr.size(), 0, true);
IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks);
if (!ParserExpressionWithOptionalAlias(false).parse(new_pos, limit_length, expected)) if (!ParserExpressionWithOptionalAlias(false).parse(new_pos, limit_length, expected))

View File

@ -298,7 +298,7 @@ bool ParserKQLMVExpand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
return false; return false;
const String setting_str = "enable_unaligned_array_join = 1"; const String setting_str = "enable_unaligned_array_join = 1";
Tokens token_settings(setting_str.c_str(), setting_str.c_str() + setting_str.size()); Tokens token_settings(setting_str.data(), setting_str.data() + setting_str.size(), 0, true);
IParser::Pos pos_settings(token_settings, pos.max_depth, pos.max_backtracks); IParser::Pos pos_settings(token_settings, pos.max_depth, pos.max_backtracks);
if (!ParserSetQuery(true).parse(pos_settings, setting, expected)) if (!ParserSetQuery(true).parse(pos_settings, setting, expected))

View File

@ -173,7 +173,7 @@ bool ParserKQLMakeSeries ::makeSeries(KQLMakeSeries & kql_make_series, ASTPtr &
auto date_type_cast = [&](String & src) auto date_type_cast = [&](String & src)
{ {
Tokens tokens(src.c_str(), src.c_str() + src.size()); Tokens tokens(src.data(), src.data() + src.size(), 0, true);
IParser::Pos pos(tokens, max_depth, max_backtracks); IParser::Pos pos(tokens, max_depth, max_backtracks);
String res; String res;
while (isValidKQLPos(pos)) while (isValidKQLPos(pos))
@ -200,7 +200,7 @@ bool ParserKQLMakeSeries ::makeSeries(KQLMakeSeries & kql_make_series, ASTPtr &
auto get_group_expression_alias = [&] auto get_group_expression_alias = [&]
{ {
std::vector<String> group_expression_tokens; std::vector<String> group_expression_tokens;
Tokens tokens(group_expression.c_str(), group_expression.c_str() + group_expression.size()); Tokens tokens(group_expression.data(), group_expression.data() + group_expression.size(), 0, true);
IParser::Pos pos(tokens, max_depth, max_backtracks); IParser::Pos pos(tokens, max_depth, max_backtracks);
while (isValidKQLPos(pos)) while (isValidKQLPos(pos))
{ {
@ -413,7 +413,7 @@ bool ParserKQLMakeSeries ::parseImpl(Pos & pos, ASTPtr & node, Expected & expect
makeSeries(kql_make_series, node, pos.max_depth, pos.max_backtracks); makeSeries(kql_make_series, node, pos.max_depth, pos.max_backtracks);
Tokens token_main_query(kql_make_series.main_query.c_str(), kql_make_series.main_query.c_str() + kql_make_series.main_query.size()); Tokens token_main_query(kql_make_series.main_query.data(), kql_make_series.main_query.data() + kql_make_series.main_query.size(), 0, true);
IParser::Pos pos_main_query(token_main_query, pos.max_depth, pos.max_backtracks); IParser::Pos pos_main_query(token_main_query, pos.max_depth, pos.max_backtracks);
if (!ParserNotEmptyExpressionList(true).parse(pos_main_query, select_expression_list, expected)) if (!ParserNotEmptyExpressionList(true).parse(pos_main_query, select_expression_list, expected))

View File

@ -1,20 +1,26 @@
#include <Parsers/ASTLiteral.h> #include <Parsers/ASTLiteral.h>
#include <Parsers/CommonParsers.h> #include <Parsers/CommonParsers.h>
#include <Parsers/Kusto/KustoFunctions/IParserKQLFunction.h> #include <Parsers/Kusto/KustoFunctions/IParserKQLFunction.h>
#include <Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h>
#include <Parsers/Kusto/ParserKQLOperators.h> #include <Parsers/Kusto/ParserKQLOperators.h>
#include <Parsers/Kusto/ParserKQLQuery.h>
#include <Parsers/Kusto/ParserKQLStatement.h> #include <Parsers/Kusto/ParserKQLStatement.h>
#include <Parsers/Kusto/Utilities.h> #include <Parsers/Kusto/Utilities.h>
#include <Parsers/ASTFunction.h> #include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h> #include <Parsers/ASTIdentifier.h>
#include <Parsers/formatAST.h> #include <Parsers/formatAST.h>
#include "KustoFunctions/IParserKQLFunction.h"
namespace DB
{
namespace ErrorCodes
{
extern const int SYNTAX_ERROR;
}
namespace namespace
{ {
enum class KQLOperatorValue : uint16_t enum class KQLOperatorValue
{ {
none, none,
between, between,
@ -56,7 +62,8 @@ enum class KQLOperatorValue : uint16_t
not_startswith_cs, not_startswith_cs,
}; };
const std::unordered_map<String, KQLOperatorValue> KQLOperator = { const std::unordered_map<String, KQLOperatorValue> KQLOperator =
{
{"between", KQLOperatorValue::between}, {"between", KQLOperatorValue::between},
{"!between", KQLOperatorValue::not_between}, {"!between", KQLOperatorValue::not_between},
{"contains", KQLOperatorValue::contains}, {"contains", KQLOperatorValue::contains},
@ -96,44 +103,37 @@ const std::unordered_map<String, KQLOperatorValue> KQLOperator = {
{"!startswith_cs", KQLOperatorValue::not_startswith_cs}, {"!startswith_cs", KQLOperatorValue::not_startswith_cs},
}; };
void rebuildSubqueryForInOperator(DB::ASTPtr & node, bool useLowerCase) void rebuildSubqueryForInOperator(ASTPtr & node, bool useLowerCase)
{ {
//A sub-query for in operator in kql can have multiple columns, but only takes the first column. //A sub-query for in operator in kql can have multiple columns, but only takes the first column.
//A sub-query for in operator in ClickHouse can not have multiple columns //A sub-query for in operator in ClickHouse can not have multiple columns
//So only take the first column if there are multiple columns. //So only take the first column if there are multiple columns.
//select * not working for subquery. (a tabular statement without project) //select * not working for subquery. (a tabular statement without project)
const auto selectColumns = node->children[0]->children[0]->as<DB::ASTSelectQuery>()->select(); const auto selectColumns = node->children[0]->children[0]->as<ASTSelectQuery>()->select();
while (selectColumns->children.size() > 1) while (selectColumns->children.size() > 1)
selectColumns->children.pop_back(); selectColumns->children.pop_back();
if (useLowerCase) if (useLowerCase)
{ {
auto args = std::make_shared<DB::ASTExpressionList>(); auto args = std::make_shared<ASTExpressionList>();
args->children.push_back(selectColumns->children[0]); args->children.push_back(selectColumns->children[0]);
auto func_lower = std::make_shared<DB::ASTFunction>(); auto func_lower = std::make_shared<ASTFunction>();
func_lower->name = "lower"; func_lower->name = "lower";
func_lower->children.push_back(selectColumns->children[0]); func_lower->children.push_back(selectColumns->children[0]);
func_lower->arguments = args; func_lower->arguments = args;
if (selectColumns->children[0]->as<DB::ASTIdentifier>()) if (selectColumns->children[0]->as<ASTIdentifier>())
func_lower->alias = std::move(selectColumns->children[0]->as<DB::ASTIdentifier>()->alias); func_lower->alias = std::move(selectColumns->children[0]->as<ASTIdentifier>()->alias);
else if (selectColumns->children[0]->as<DB::ASTFunction>()) else if (selectColumns->children[0]->as<ASTFunction>())
func_lower->alias = std::move(selectColumns->children[0]->as<DB::ASTFunction>()->alias); func_lower->alias = std::move(selectColumns->children[0]->as<ASTFunction>()->alias);
auto funcs = std::make_shared<DB::ASTExpressionList>(); auto funcs = std::make_shared<ASTExpressionList>();
funcs->children.push_back(func_lower); funcs->children.push_back(func_lower);
selectColumns->children[0] = std::move(funcs); selectColumns->children[0] = std::move(funcs);
} }
} }
} }
namespace DB
{
namespace ErrorCodes
{
extern const int SYNTAX_ERROR;
}
String KQLOperators::genHasAnyAllOpExpr(std::vector<String> & tokens, IParser::Pos & token_pos, String kql_op, String ch_op) String KQLOperators::genHasAnyAllOpExpr(std::vector<String> & tokens, IParser::Pos & token_pos, String kql_op, String ch_op)
{ {
@ -166,7 +166,7 @@ String KQLOperators::genHasAnyAllOpExpr(std::vector<String> & tokens, IParser::P
return new_expr; return new_expr;
} }
String genEqOpExprCis(std::vector<String> & tokens, DB::IParser::Pos & token_pos, const String & ch_op) String genEqOpExprCis(std::vector<String> & tokens, IParser::Pos & token_pos, const String & ch_op)
{ {
String tmp_arg(token_pos->begin, token_pos->end); String tmp_arg(token_pos->begin, token_pos->end);
@ -178,30 +178,30 @@ String genEqOpExprCis(std::vector<String> & tokens, DB::IParser::Pos & token_pos
new_expr += ch_op + " "; new_expr += ch_op + " ";
++token_pos; ++token_pos;
if (token_pos->type == DB::TokenType::StringLiteral || token_pos->type == DB::TokenType::QuotedIdentifier) if (token_pos->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)
new_expr += "lower('" + DB::IParserKQLFunction::escapeSingleQuotes(String(token_pos->begin + 1, token_pos->end - 1)) + "')"; new_expr += "lower('" + IParserKQLFunction::escapeSingleQuotes(String(token_pos->begin + 1, token_pos->end - 1)) + "')";
else else
new_expr += "lower(" + DB::IParserKQLFunction::getExpression(token_pos) + ")"; new_expr += "lower(" + IParserKQLFunction::getExpression(token_pos) + ")";
tokens.pop_back(); tokens.pop_back();
return new_expr; return new_expr;
} }
String genInOpExprCis(std::vector<String> & tokens, DB::IParser::Pos & token_pos, const String & kql_op, const String & ch_op) String genInOpExprCis(std::vector<String> & tokens, IParser::Pos & token_pos, const String & kql_op, const String & ch_op)
{ {
DB::ParserKQLTableFunction kqlfun_p; ParserKQLTableFunction kqlfun_p;
DB::ParserToken s_lparen(DB::TokenType::OpeningRoundBracket); ParserToken s_lparen(TokenType::OpeningRoundBracket);
DB::ASTPtr select; ASTPtr select;
DB::Expected expected; Expected expected;
String new_expr; String new_expr;
++token_pos; ++token_pos;
if (!s_lparen.ignore(token_pos, expected)) if (!s_lparen.ignore(token_pos, expected))
throw DB::Exception(DB::ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op);
if (tokens.empty()) if (tokens.empty())
throw DB::Exception(DB::ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op);
new_expr = "lower(" + tokens.back() + ") "; new_expr = "lower(" + tokens.back() + ") ";
tokens.pop_back(); tokens.pop_back();
@ -218,39 +218,39 @@ String genInOpExprCis(std::vector<String> & tokens, DB::IParser::Pos & token_pos
--token_pos; --token_pos;
new_expr += ch_op; new_expr += ch_op;
while (isValidKQLPos(token_pos) && token_pos->type != DB::TokenType::PipeMark && token_pos->type != DB::TokenType::Semicolon) while (isValidKQLPos(token_pos) && token_pos->type != TokenType::PipeMark && token_pos->type != TokenType::Semicolon)
{ {
auto tmp_arg = String(token_pos->begin, token_pos->end); auto tmp_arg = String(token_pos->begin, token_pos->end);
if (token_pos->type != DB::TokenType::Comma && token_pos->type != DB::TokenType::ClosingRoundBracket if (token_pos->type != TokenType::Comma && token_pos->type != TokenType::ClosingRoundBracket
&& token_pos->type != DB::TokenType::OpeningRoundBracket && token_pos->type != DB::TokenType::OpeningSquareBracket && token_pos->type != TokenType::OpeningRoundBracket && token_pos->type != TokenType::OpeningSquareBracket
&& token_pos->type != DB::TokenType::ClosingSquareBracket && tmp_arg != "~" && tmp_arg != "dynamic") && token_pos->type != TokenType::ClosingSquareBracket && tmp_arg != "~" && tmp_arg != "dynamic")
{ {
if (token_pos->type == DB::TokenType::StringLiteral || token_pos->type == DB::TokenType::QuotedIdentifier) if (token_pos->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)
new_expr += "lower('" + DB::IParserKQLFunction::escapeSingleQuotes(String(token_pos->begin + 1, token_pos->end - 1)) + "')"; new_expr += "lower('" + IParserKQLFunction::escapeSingleQuotes(String(token_pos->begin + 1, token_pos->end - 1)) + "')";
else else
new_expr += "lower(" + tmp_arg + ")"; new_expr += "lower(" + tmp_arg + ")";
} }
else if (tmp_arg != "~" && tmp_arg != "dynamic" && tmp_arg != "[" && tmp_arg != "]") else if (tmp_arg != "~" && tmp_arg != "dynamic" && tmp_arg != "[" && tmp_arg != "]")
new_expr += tmp_arg; new_expr += tmp_arg;
if (token_pos->type == DB::TokenType::ClosingRoundBracket) if (token_pos->type == TokenType::ClosingRoundBracket)
break; break;
++token_pos; ++token_pos;
} }
return new_expr; return new_expr;
} }
std::string genInOpExpr(DB::IParser::Pos & token_pos, const std::string & kql_op, const std::string & ch_op) std::string genInOpExpr(IParser::Pos & token_pos, const std::string & kql_op, const std::string & ch_op)
{ {
DB::ParserKQLTableFunction kqlfun_p; ParserKQLTableFunction kqlfun_p;
DB::ParserToken s_lparen(DB::TokenType::OpeningRoundBracket); ParserToken s_lparen(TokenType::OpeningRoundBracket);
DB::ASTPtr select; ASTPtr select;
DB::Expected expected; Expected expected;
++token_pos; ++token_pos;
if (!s_lparen.ignore(token_pos, expected)) if (!s_lparen.ignore(token_pos, expected))
throw DB::Exception(DB::ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op);
auto pos = token_pos; auto pos = token_pos;
if (kqlfun_p.parse(pos, select, expected)) if (kqlfun_p.parse(pos, select, expected))

View File

@ -9,7 +9,7 @@ bool ParserKQLPrint::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
ASTPtr select_expression_list; ASTPtr select_expression_list;
const String expr = getExprFromToken(pos); const String expr = getExprFromToken(pos);
Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); Tokens tokens(expr.data(), expr.data() + expr.size(), 0, true);
IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks);
if (!ParserNotEmptyExpressionList(true).parse(new_pos, select_expression_list, expected)) if (!ParserNotEmptyExpressionList(true).parse(new_pos, select_expression_list, expected))

View File

@ -11,7 +11,7 @@ bool ParserKQLProject ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
expr = getExprFromToken(pos); expr = getExprFromToken(pos);
Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); Tokens tokens(expr.data(), expr.data() + expr.size(), 0, true);
IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks);
if (!ParserNotEmptyExpressionList(false).parse(new_pos, select_expression_list, expected)) if (!ParserNotEmptyExpressionList(false).parse(new_pos, select_expression_list, expected))

View File

@ -37,7 +37,7 @@ bool ParserKQLBase::parseByString(String expr, ASTPtr & node, uint32_t max_depth
{ {
Expected expected; Expected expected;
Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); Tokens tokens(expr.data(), expr.data() + expr.size(), 0, true);
IParser::Pos pos(tokens, max_depth, max_backtracks); IParser::Pos pos(tokens, max_depth, max_backtracks);
return parse(pos, node, expected); return parse(pos, node, expected);
} }
@ -45,7 +45,7 @@ bool ParserKQLBase::parseByString(String expr, ASTPtr & node, uint32_t max_depth
bool ParserKQLBase::parseSQLQueryByString(ParserPtr && parser, String & query, ASTPtr & select_node, uint32_t max_depth, uint32_t max_backtracks) bool ParserKQLBase::parseSQLQueryByString(ParserPtr && parser, String & query, ASTPtr & select_node, uint32_t max_depth, uint32_t max_backtracks)
{ {
Expected expected; Expected expected;
Tokens token_subquery(query.c_str(), query.c_str() + query.size()); Tokens token_subquery(query.data(), query.data() + query.size(), 0, true);
IParser::Pos pos_subquery(token_subquery, max_depth, max_backtracks); IParser::Pos pos_subquery(token_subquery, max_depth, max_backtracks);
if (!parser->parse(pos_subquery, select_node, expected)) if (!parser->parse(pos_subquery, select_node, expected))
return false; return false;
@ -123,7 +123,7 @@ bool ParserKQLBase::setSubQuerySource(ASTPtr & select_query, ASTPtr & source, bo
String ParserKQLBase::getExprFromToken(const String & text, uint32_t max_depth, uint32_t max_backtracks) String ParserKQLBase::getExprFromToken(const String & text, uint32_t max_depth, uint32_t max_backtracks)
{ {
Tokens tokens(text.c_str(), text.c_str() + text.size()); Tokens tokens(text.data(), text.data() + text.size(), 0, true);
IParser::Pos pos(tokens, max_depth, max_backtracks); IParser::Pos pos(tokens, max_depth, max_backtracks);
return getExprFromToken(pos); return getExprFromToken(pos);
@ -522,7 +522,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
--last_pos; --last_pos;
String sub_query = std::format("({})", String(operation_pos.front().second->begin, last_pos->end)); String sub_query = std::format("({})", String(operation_pos.front().second->begin, last_pos->end));
Tokens token_subquery(sub_query.c_str(), sub_query.c_str() + sub_query.size()); Tokens token_subquery(sub_query.data(), sub_query.data() + sub_query.size(), 0, true);
IParser::Pos pos_subquery(token_subquery, pos.max_depth, pos.max_backtracks); IParser::Pos pos_subquery(token_subquery, pos.max_depth, pos.max_backtracks);
if (!ParserKQLSubquery().parse(pos_subquery, tables, expected)) if (!ParserKQLSubquery().parse(pos_subquery, tables, expected))
@ -543,7 +543,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
auto oprator = getOperator(op_str); auto oprator = getOperator(op_str);
if (oprator) if (oprator)
{ {
Tokens token_clause(op_calsue.c_str(), op_calsue.c_str() + op_calsue.size()); Tokens token_clause(op_calsue.data(), op_calsue.data() + op_calsue.size(), 0, true);
IParser::Pos pos_clause(token_clause, pos.max_depth, pos.max_backtracks); IParser::Pos pos_clause(token_clause, pos.max_depth, pos.max_backtracks);
if (!oprator->parse(pos_clause, node, expected)) if (!oprator->parse(pos_clause, node, expected))
return false; return false;
@ -576,7 +576,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
if (!node->as<ASTSelectQuery>()->select()) if (!node->as<ASTSelectQuery>()->select())
{ {
auto expr = String("*"); auto expr = String("*");
Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); Tokens tokens(expr.data(), expr.data() + expr.size(), 0, true);
IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks);
if (!std::make_unique<ParserKQLProject>()->parse(new_pos, node, expected)) if (!std::make_unique<ParserKQLProject>()->parse(new_pos, node, expected))
return false; return false;

View File

@ -18,7 +18,7 @@ bool ParserKQLSort::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
auto expr = getExprFromToken(pos); auto expr = getExprFromToken(pos);
Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); Tokens tokens(expr.data(), expr.data() + expr.size(), 0, true);
IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks);
auto pos_backup = new_pos; auto pos_backup = new_pos;

View File

@ -2,13 +2,13 @@
#include <Parsers/ASTSelectWithUnionQuery.h> #include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/CommonParsers.h> #include <Parsers/CommonParsers.h>
#include <Parsers/IParserBase.h> #include <Parsers/IParserBase.h>
#include <Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h>
#include <Parsers/Kusto/ParserKQLQuery.h> #include <Parsers/Kusto/ParserKQLQuery.h>
#include <Parsers/Kusto/ParserKQLStatement.h> #include <Parsers/Kusto/ParserKQLStatement.h>
#include <Parsers/Kusto/Utilities.h> #include <Parsers/Kusto/Utilities.h>
#include <Parsers/ParserSetQuery.h> #include <Parsers/ParserSetQuery.h>
#include <Parsers/ASTLiteral.h> #include <Parsers/ASTLiteral.h>
namespace DB namespace DB
{ {
@ -63,6 +63,8 @@ bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & exp
bool ParserKQLTableFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) bool ParserKQLTableFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{ {
/// TODO: This code is idiotic, see https://github.com/ClickHouse/ClickHouse/issues/61742
ParserToken lparen(TokenType::OpeningRoundBracket); ParserToken lparen(TokenType::OpeningRoundBracket);
ASTPtr string_literal; ASTPtr string_literal;
@ -101,13 +103,16 @@ bool ParserKQLTableFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
++pos; ++pos;
} }
Tokens token_kql(kql_statement.data(), kql_statement.data() + kql_statement.size()); Tokens tokens_kql(kql_statement.data(), kql_statement.data() + kql_statement.size(), 0, true);
IParser::Pos pos_kql(token_kql, pos.max_depth, pos.max_backtracks); IParser::Pos pos_kql(tokens_kql, pos.max_depth, pos.max_backtracks);
Expected kql_expected; Expected kql_expected;
kql_expected.enable_highlighting = false; kql_expected.enable_highlighting = false;
if (!ParserKQLWithUnionQuery().parse(pos_kql, node, kql_expected)) if (!ParserKQLWithUnionQuery().parse(pos_kql, node, kql_expected))
return false; return false;
++pos; ++pos;
return true; return true;
} }
} }

View File

@ -45,7 +45,7 @@ protected:
class ParserKQLTableFunction : public IParserBase class ParserKQLTableFunction : public IParserBase
{ {
protected: protected:
const char * getName() const override { return "KQL() function"; } const char * getName() const override { return "KQL function"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
}; };

View File

@ -194,7 +194,7 @@ bool ParserKQLSummarize::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
String converted_columns = getExprFromToken(expr_columns, pos.max_depth, pos.max_backtracks); String converted_columns = getExprFromToken(expr_columns, pos.max_depth, pos.max_backtracks);
Tokens token_converted_columns(converted_columns.c_str(), converted_columns.c_str() + converted_columns.size()); Tokens token_converted_columns(converted_columns.data(), converted_columns.data() + converted_columns.size(), 0, true);
IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth, pos.max_backtracks); IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth, pos.max_backtracks);
if (!ParserNotEmptyExpressionList(true).parse(pos_converted_columns, select_expression_list, expected)) if (!ParserNotEmptyExpressionList(true).parse(pos_converted_columns, select_expression_list, expected))
@ -206,7 +206,7 @@ bool ParserKQLSummarize::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
{ {
String converted_groupby = getExprFromToken(expr_groupby, pos.max_depth, pos.max_backtracks); String converted_groupby = getExprFromToken(expr_groupby, pos.max_depth, pos.max_backtracks);
Tokens token_converted_groupby(converted_groupby.c_str(), converted_groupby.c_str() + converted_groupby.size()); Tokens token_converted_groupby(converted_groupby.data(), converted_groupby.data() + converted_groupby.size(), 0, true);
IParser::Pos postoken_converted_groupby(token_converted_groupby, pos.max_depth, pos.max_backtracks); IParser::Pos postoken_converted_groupby(token_converted_groupby, pos.max_depth, pos.max_backtracks);
if (!ParserNotEmptyExpressionList(false).parse(postoken_converted_groupby, group_expression_list, expected)) if (!ParserNotEmptyExpressionList(false).parse(postoken_converted_groupby, group_expression_list, expected))

View File

@ -21,6 +21,7 @@ class Tokens
{ {
private: private:
std::vector<Token> data; std::vector<Token> data;
size_t max_pos = 0;
Lexer lexer; Lexer lexer;
bool skip_insignificant; bool skip_insignificant;
@ -35,10 +36,16 @@ public:
while (true) while (true)
{ {
if (index < data.size()) if (index < data.size())
{
max_pos = std::max(max_pos, index);
return data[index]; return data[index];
}
if (!data.empty() && data.back().isEnd()) if (!data.empty() && data.back().isEnd())
{
max_pos = data.size() - 1;
return data.back(); return data.back();
}
Token token = lexer.nextToken(); Token token = lexer.nextToken();
@ -51,7 +58,12 @@ public:
{ {
if (data.empty()) if (data.empty())
return (*this)[0]; return (*this)[0];
return data.back(); return data[max_pos];
}
void reset()
{
max_pos = 0;
} }
}; };

View File

@ -4,6 +4,7 @@
#include <Parsers/ParserQuery.h> #include <Parsers/ParserQuery.h>
#include <Parsers/ASTInsertQuery.h> #include <Parsers/ASTInsertQuery.h>
#include <Parsers/ASTExplainQuery.h> #include <Parsers/ASTExplainQuery.h>
#include <Parsers/CommonParsers.h>
#include <Parsers/Lexer.h> #include <Parsers/Lexer.h>
#include <Parsers/TokenIterator.h> #include <Parsers/TokenIterator.h>
#include <Common/StringUtils.h> #include <Common/StringUtils.h>
@ -285,6 +286,33 @@ ASTPtr tryParseQuery(
} }
Expected expected; Expected expected;
/** A shortcut - if Lexer found invalid tokens, fail early without full parsing.
* But there are certain cases when invalid tokens are permitted:
* 1. INSERT queries can have arbitrary data after the FORMAT clause, that is parsed by a different parser.
* 2. It can also be the case when there are multiple queries separated by semicolons, and the first queries are ok
* while subsequent queries have syntax errors.
*
* This shortcut is needed to avoid complex backtracking in case of obviously erroneous queries.
*/
IParser::Pos lookahead(token_iterator);
if (!ParserKeyword(Keyword::INSERT_INTO).ignore(lookahead))
{
while (lookahead->type != TokenType::Semicolon && lookahead->type != TokenType::EndOfStream)
{
if (lookahead->isError())
{
out_error_message = getLexicalErrorMessage(query_begin, all_queries_end, *lookahead, hilite, query_description);
return nullptr;
}
++lookahead;
}
/// We should not spoil the info about maximum parsed position in the original iterator.
tokens.reset();
}
ASTPtr res; ASTPtr res;
const bool parse_res = parser.parse(token_iterator, res, expected); const bool parse_res = parser.parse(token_iterator, res, expected);
const auto last_token = token_iterator.max(); const auto last_token = token_iterator.max();

View File

@ -116,6 +116,12 @@ struct GridSymbols
const char * dash = ""; const char * dash = "";
const char * bold_bar = ""; const char * bold_bar = "";
const char * bar = ""; const char * bar = "";
const char * bold_right_separator_footer = "";
const char * bold_left_separator_footer = "";
const char * bold_middle_separator_footer = "";
const char * bold_left_bottom_corner = "";
const char * bold_right_bottom_corner = "";
const char * bold_bottom_separator = "";
}; };
GridSymbols utf8_grid_symbols; GridSymbols utf8_grid_symbols;
@ -182,47 +188,58 @@ void PrettyBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port_kind
Widths name_widths; Widths name_widths;
calculateWidths(header, chunk, widths, max_widths, name_widths); calculateWidths(header, chunk, widths, max_widths, name_widths);
const GridSymbols & grid_symbols = format_settings.pretty.charset == FormatSettings::Pretty::Charset::UTF8 ? const GridSymbols & grid_symbols
utf8_grid_symbols : = format_settings.pretty.charset == FormatSettings::Pretty::Charset::UTF8 ? utf8_grid_symbols : ascii_grid_symbols;
ascii_grid_symbols;
/// Create separators /// Create separators
WriteBufferFromOwnString top_separator; WriteBufferFromOwnString top_separator;
WriteBufferFromOwnString middle_names_separator; WriteBufferFromOwnString middle_names_separator;
WriteBufferFromOwnString middle_values_separator; WriteBufferFromOwnString middle_values_separator;
WriteBufferFromOwnString bottom_separator; WriteBufferFromOwnString bottom_separator;
WriteBufferFromOwnString footer_top_separator;
WriteBufferFromOwnString footer_bottom_separator;
top_separator << grid_symbols.bold_left_top_corner; top_separator << grid_symbols.bold_left_top_corner;
middle_names_separator << grid_symbols.bold_left_separator; middle_names_separator << grid_symbols.bold_left_separator;
middle_values_separator << grid_symbols.left_separator; middle_values_separator << grid_symbols.left_separator;
bottom_separator << grid_symbols.left_bottom_corner; bottom_separator << grid_symbols.left_bottom_corner;
footer_top_separator << grid_symbols.bold_left_separator_footer;
footer_bottom_separator << grid_symbols.bold_left_bottom_corner;
for (size_t i = 0; i < num_columns; ++i) for (size_t i = 0; i < num_columns; ++i)
{ {
if (i != 0) if (i != 0)
{ {
top_separator << grid_symbols.bold_top_separator; top_separator << grid_symbols.bold_top_separator;
middle_names_separator << grid_symbols.bold_middle_separator; middle_names_separator << grid_symbols.bold_middle_separator;
middle_values_separator << grid_symbols.middle_separator; middle_values_separator << grid_symbols.middle_separator;
bottom_separator << grid_symbols.bottom_separator; bottom_separator << grid_symbols.bottom_separator;
footer_top_separator << grid_symbols.bold_middle_separator_footer;
footer_bottom_separator << grid_symbols.bold_bottom_separator;
} }
for (size_t j = 0; j < max_widths[i] + 2; ++j) for (size_t j = 0; j < max_widths[i] + 2; ++j)
{ {
top_separator << grid_symbols.bold_dash; top_separator << grid_symbols.bold_dash;
middle_names_separator << grid_symbols.bold_dash; middle_names_separator << grid_symbols.bold_dash;
middle_values_separator << grid_symbols.dash; middle_values_separator << grid_symbols.dash;
bottom_separator << grid_symbols.dash; bottom_separator << grid_symbols.dash;
footer_top_separator << grid_symbols.bold_dash;
footer_bottom_separator << grid_symbols.bold_dash;
} }
} }
top_separator << grid_symbols.bold_right_top_corner << "\n"; top_separator << grid_symbols.bold_right_top_corner << "\n";
middle_names_separator << grid_symbols.bold_right_separator << "\n"; middle_names_separator << grid_symbols.bold_right_separator << "\n";
middle_values_separator << grid_symbols.right_separator << "\n"; middle_values_separator << grid_symbols.right_separator << "\n";
bottom_separator << grid_symbols.right_bottom_corner << "\n"; bottom_separator << grid_symbols.right_bottom_corner << "\n";
footer_top_separator << grid_symbols.bold_right_separator_footer << "\n";
footer_bottom_separator << grid_symbols.bold_right_bottom_corner << "\n";
std::string top_separator_s = top_separator.str(); std::string top_separator_s = top_separator.str();
std::string middle_names_separator_s = middle_names_separator.str(); std::string middle_names_separator_s = middle_names_separator.str();
std::string middle_values_separator_s = middle_values_separator.str(); std::string middle_values_separator_s = middle_values_separator.str();
std::string bottom_separator_s = bottom_separator.str(); std::string bottom_separator_s = bottom_separator.str();
std::string footer_top_separator_s = footer_top_separator.str();
std::string footer_bottom_separator_s = footer_bottom_separator.str();
if (format_settings.pretty.output_format_pretty_row_numbers) if (format_settings.pretty.output_format_pretty_row_numbers)
{ {
@ -239,43 +256,47 @@ void PrettyBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port_kind
} }
/// Names /// Names
writeCString(grid_symbols.bold_bar, out); auto write_names = [&]() -> void
writeCString(" ", out);
for (size_t i = 0; i < num_columns; ++i)
{ {
if (i != 0) writeCString(grid_symbols.bold_bar, out);
writeCString(" ", out);
for (size_t i = 0; i < num_columns; ++i)
{ {
writeCString(" ", out); if (i != 0)
writeCString(grid_symbols.bold_bar, out); {
writeCString(" ", out); writeCString(" ", out);
writeCString(grid_symbols.bold_bar, out);
writeCString(" ", out);
}
const auto & col = header.getByPosition(i);
if (color)
writeCString("\033[1m", out);
if (col.type->shouldAlignRightInPrettyFormats())
{
for (size_t k = 0; k < max_widths[i] - name_widths[i]; ++k)
writeChar(' ', out);
writeString(col.name, out);
}
else
{
writeString(col.name, out);
for (size_t k = 0; k < max_widths[i] - name_widths[i]; ++k)
writeChar(' ', out);
}
if (color)
writeCString("\033[0m", out);
} }
writeCString(" ", out);
const auto & col = header.getByPosition(i); writeCString(grid_symbols.bold_bar, out);
writeCString("\n", out);
if (color) };
writeCString("\033[1m", out); write_names();
if (col.type->shouldAlignRightInPrettyFormats())
{
for (size_t k = 0; k < max_widths[i] - name_widths[i]; ++k)
writeChar(' ', out);
writeString(col.name, out);
}
else
{
writeString(col.name, out);
for (size_t k = 0; k < max_widths[i] - name_widths[i]; ++k)
writeChar(' ', out);
}
if (color)
writeCString("\033[0m", out);
}
writeCString(" ", out);
writeCString(grid_symbols.bold_bar, out);
writeCString("\n", out);
if (format_settings.pretty.output_format_pretty_row_numbers) if (format_settings.pretty.output_format_pretty_row_numbers)
{ {
@ -317,9 +338,15 @@ void PrettyBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port_kind
if (j != 0) if (j != 0)
writeCString(grid_symbols.bar, out); writeCString(grid_symbols.bar, out);
const auto & type = *header.getByPosition(j).type; const auto & type = *header.getByPosition(j).type;
writeValueWithPadding(*columns[j], *serializations[j], i, writeValueWithPadding(
*columns[j],
*serializations[j],
i,
widths[j].empty() ? max_widths[j] : widths[j][i], widths[j].empty() ? max_widths[j] : widths[j][i],
max_widths[j], cut_to_width, type.shouldAlignRightInPrettyFormats(), isNumber(type)); max_widths[j],
cut_to_width,
type.shouldAlignRightInPrettyFormats(),
isNumber(type));
} }
writeCString(grid_symbols.bar, out); writeCString(grid_symbols.bar, out);
@ -332,8 +359,33 @@ void PrettyBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port_kind
/// Write left blank /// Write left blank
writeString(String(row_number_width, ' '), out); writeString(String(row_number_width, ' '), out);
} }
writeString(bottom_separator_s, out);
/// output column names in the footer
if ((num_rows >= format_settings.pretty.output_format_pretty_display_footer_column_names_min_rows) && format_settings.pretty.output_format_pretty_display_footer_column_names)
{
writeString(footer_top_separator_s, out);
if (format_settings.pretty.output_format_pretty_row_numbers)
{
/// Write left blank
writeString(String(row_number_width, ' '), out);
}
/// output header names
write_names();
if (format_settings.pretty.output_format_pretty_row_numbers)
{
/// Write left blank
writeString(String(row_number_width, ' '), out);
}
writeString(footer_bottom_separator_s, out);
}
else
{
writeString(bottom_separator_s, out);
}
total_rows += num_rows; total_rows += num_rows;
} }

View File

@ -57,7 +57,8 @@ PrettyCompactBlockOutputFormat::PrettyCompactBlockOutputFormat(WriteBuffer & out
void PrettyCompactBlockOutputFormat::writeHeader( void PrettyCompactBlockOutputFormat::writeHeader(
const Block & block, const Block & block,
const Widths & max_widths, const Widths & max_widths,
const Widths & name_widths) const Widths & name_widths,
const bool write_footer)
{ {
if (format_settings.pretty.output_format_pretty_row_numbers) if (format_settings.pretty.output_format_pretty_row_numbers)
{ {
@ -70,14 +71,20 @@ void PrettyCompactBlockOutputFormat::writeHeader(
ascii_grid_symbols; ascii_grid_symbols;
/// Names /// Names
writeCString(grid_symbols.left_top_corner, out); if (write_footer)
writeCString(grid_symbols.left_bottom_corner, out);
else
writeCString(grid_symbols.left_top_corner, out);
writeCString(grid_symbols.dash, out); writeCString(grid_symbols.dash, out);
for (size_t i = 0; i < max_widths.size(); ++i) for (size_t i = 0; i < max_widths.size(); ++i)
{ {
if (i != 0) if (i != 0)
{ {
writeCString(grid_symbols.dash, out); writeCString(grid_symbols.dash, out);
writeCString(grid_symbols.top_separator, out); if (write_footer)
writeCString(grid_symbols.bottom_separator, out);
else
writeCString(grid_symbols.top_separator, out);
writeCString(grid_symbols.dash, out); writeCString(grid_symbols.dash, out);
} }
@ -107,7 +114,10 @@ void PrettyCompactBlockOutputFormat::writeHeader(
} }
} }
writeCString(grid_symbols.dash, out); writeCString(grid_symbols.dash, out);
writeCString(grid_symbols.right_top_corner, out); if (write_footer)
writeCString(grid_symbols.right_bottom_corner, out);
else
writeCString(grid_symbols.right_top_corner, out);
writeCString("\n", out); writeCString("\n", out);
} }
@ -195,13 +205,19 @@ void PrettyCompactBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind po
Widths name_widths; Widths name_widths;
calculateWidths(header, chunk, widths, max_widths, name_widths); calculateWidths(header, chunk, widths, max_widths, name_widths);
writeHeader(header, max_widths, name_widths); writeHeader(header, max_widths, name_widths, false);
for (size_t i = 0; i < num_rows && total_rows + i < max_rows; ++i) for (size_t i = 0; i < num_rows && total_rows + i < max_rows; ++i)
writeRow(i, header, chunk, widths, max_widths); writeRow(i, header, chunk, widths, max_widths);
if ((num_rows >= format_settings.pretty.output_format_pretty_display_footer_column_names_min_rows) && format_settings.pretty.output_format_pretty_display_footer_column_names)
writeBottom(max_widths); {
writeHeader(header, max_widths, name_widths, true);
}
else
{
writeBottom(max_widths);
}
total_rows += num_rows; total_rows += num_rows;
} }

View File

@ -17,7 +17,7 @@ public:
String getName() const override { return "PrettyCompactBlockOutputFormat"; } String getName() const override { return "PrettyCompactBlockOutputFormat"; }
private: private:
void writeHeader(const Block & block, const Widths & max_widths, const Widths & name_widths); void writeHeader(const Block & block, const Widths & max_widths, const Widths & name_widths, bool write_footer);
void writeBottom(const Widths & max_widths); void writeBottom(const Widths & max_widths);
void writeRow( void writeRow(
size_t row_num, size_t row_num,

View File

@ -36,39 +36,46 @@ void PrettySpaceBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port
if (format_settings.pretty.output_format_pretty_row_numbers) if (format_settings.pretty.output_format_pretty_row_numbers)
writeString(String(row_number_width, ' '), out); writeString(String(row_number_width, ' '), out);
/// Names /// Names
for (size_t i = 0; i < num_columns; ++i) auto write_names = [&](const bool is_footer) -> void
{ {
if (i != 0) for (size_t i = 0; i < num_columns; ++i)
writeCString(" ", out);
else
writeChar(' ', out);
const ColumnWithTypeAndName & col = header.getByPosition(i);
if (col.type->shouldAlignRightInPrettyFormats())
{ {
for (ssize_t k = 0; k < std::max(0z, static_cast<ssize_t>(max_widths[i] - name_widths[i])); ++k) if (i != 0)
writeCString(" ", out);
else
writeChar(' ', out); writeChar(' ', out);
if (color) const ColumnWithTypeAndName & col = header.getByPosition(i);
writeCString("\033[1m", out);
writeString(col.name, out);
if (color)
writeCString("\033[0m", out);
}
else
{
if (color)
writeCString("\033[1m", out);
writeString(col.name, out);
if (color)
writeCString("\033[0m", out);
for (ssize_t k = 0; k < std::max(0z, static_cast<ssize_t>(max_widths[i] - name_widths[i])); ++k) if (col.type->shouldAlignRightInPrettyFormats())
writeChar(' ', out); {
for (ssize_t k = 0; k < std::max(0z, static_cast<ssize_t>(max_widths[i] - name_widths[i])); ++k)
writeChar(' ', out);
if (color)
writeCString("\033[1m", out);
writeString(col.name, out);
if (color)
writeCString("\033[0m", out);
}
else
{
if (color)
writeCString("\033[1m", out);
writeString(col.name, out);
if (color)
writeCString("\033[0m", out);
for (ssize_t k = 0; k < std::max(0z, static_cast<ssize_t>(max_widths[i] - name_widths[i])); ++k)
writeChar(' ', out);
}
} }
} if (!is_footer)
writeCString("\n\n", out); writeCString("\n\n", out);
else
writeCString("\n", out);
};
write_names(false);
for (size_t row = 0; row < num_rows && total_rows + row < max_rows; ++row) for (size_t row = 0; row < num_rows && total_rows + row < max_rows; ++row)
{ {
@ -95,11 +102,19 @@ void PrettySpaceBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port
writeValueWithPadding( writeValueWithPadding(
*columns[column], *serializations[column], row, cur_width, max_widths[column], cut_to_width, type.shouldAlignRightInPrettyFormats(), isNumber(type)); *columns[column], *serializations[column], row, cur_width, max_widths[column], cut_to_width, type.shouldAlignRightInPrettyFormats(), isNumber(type));
} }
writeReadableNumberTip(chunk); writeReadableNumberTip(chunk);
writeChar('\n', out); writeChar('\n', out);
} }
/// Write blank line between last row and footer
if ((num_rows >= format_settings.pretty.output_format_pretty_display_footer_column_names_min_rows) && format_settings.pretty.output_format_pretty_display_footer_column_names)
writeCString("\n", out);
/// Write left blank
if ((num_rows >= format_settings.pretty.output_format_pretty_display_footer_column_names_min_rows) && format_settings.pretty.output_format_pretty_row_numbers && format_settings.pretty.output_format_pretty_display_footer_column_names)
writeString(String(row_number_width, ' '), out);
/// Write footer
if ((num_rows >= format_settings.pretty.output_format_pretty_display_footer_column_names_min_rows) && format_settings.pretty.output_format_pretty_display_footer_column_names)
write_names(true);
total_rows += num_rows; total_rows += num_rows;
} }

View File

@ -30,7 +30,6 @@
#include <Common/scope_guard_safe.h> #include <Common/scope_guard_safe.h>
#include <Common/setThreadName.h> #include <Common/setThreadName.h>
#include <Common/typeid_cast.h> #include <Common/typeid_cast.h>
#include <Common/re2.h>
#include <Parsers/ASTSetQuery.h> #include <Parsers/ASTSetQuery.h>
#include <Processors/Formats/IOutputFormat.h> #include <Processors/Formats/IOutputFormat.h>
#include <Formats/FormatFactory.h> #include <Formats/FormatFactory.h>
@ -44,6 +43,7 @@
#include <Poco/Base64Decoder.h> #include <Poco/Base64Decoder.h>
#include <Poco/Base64Encoder.h> #include <Poco/Base64Encoder.h>
#include <Poco/Net/HTTPBasicCredentials.h> #include <Poco/Net/HTTPBasicCredentials.h>
#include <Poco/Net/HTTPMessage.h>
#include <Poco/Net/HTTPStream.h> #include <Poco/Net/HTTPStream.h>
#include <Poco/MemoryStream.h> #include <Poco/MemoryStream.h>
#include <Poco/StreamCopier.h> #include <Poco/StreamCopier.h>
@ -53,7 +53,10 @@
#include <algorithm> #include <algorithm>
#include <chrono> #include <chrono>
#include <memory> #include <memory>
#include <optional>
#include <sstream> #include <sstream>
#include <unordered_map>
#include <utility>
#if USE_SSL #if USE_SSL
#include <Poco/Net/X509Certificate.h> #include <Poco/Net/X509Certificate.h>
@ -338,11 +341,11 @@ void HTTPHandler::pushDelayedResults(Output & used_output)
} }
HTTPHandler::HTTPHandler(IServer & server_, const std::string & name, const std::optional<String> & content_type_override_) HTTPHandler::HTTPHandler(IServer & server_, const std::string & name, const HTTPResponseHeaderSetup & http_response_headers_override_)
: server(server_) : server(server_)
, log(getLogger(name)) , log(getLogger(name))
, default_settings(server.context()->getSettingsRef()) , default_settings(server.context()->getSettingsRef())
, content_type_override(content_type_override_) , http_response_headers_override(http_response_headers_override_)
{ {
server_display_name = server.config().getString("display_name", getFQDNOrHostName()); server_display_name = server.config().getString("display_name", getFQDNOrHostName());
} }
@ -670,8 +673,7 @@ void HTTPHandler::processQuery(
{ {
auto tmp_data = std::make_shared<TemporaryDataOnDisk>(server.context()->getTempDataOnDisk()); auto tmp_data = std::make_shared<TemporaryDataOnDisk>(server.context()->getTempDataOnDisk());
auto create_tmp_disk_buffer = [tmp_data] (const WriteBufferPtr &) -> WriteBufferPtr auto create_tmp_disk_buffer = [tmp_data] (const WriteBufferPtr &) -> WriteBufferPtr {
{
return tmp_data->createRawStream(); return tmp_data->createRawStream();
}; };
@ -893,13 +895,14 @@ void HTTPHandler::processQuery(
customizeContext(request, context, *in_post_maybe_compressed); customizeContext(request, context, *in_post_maybe_compressed);
in = has_external_data ? std::move(in_param) : std::make_unique<ConcatReadBuffer>(*in_param, *in_post_maybe_compressed); in = has_external_data ? std::move(in_param) : std::make_unique<ConcatReadBuffer>(*in_param, *in_post_maybe_compressed);
applyHTTPResponseHeaders(response, http_response_headers_override);
auto set_query_result = [&response, this] (const QueryResultDetails & details) auto set_query_result = [&response, this] (const QueryResultDetails & details)
{ {
response.add("X-ClickHouse-Query-Id", details.query_id); response.add("X-ClickHouse-Query-Id", details.query_id);
if (content_type_override) if (!(http_response_headers_override && http_response_headers_override->contains(Poco::Net::HTTPMessage::CONTENT_TYPE))
response.setContentType(*content_type_override); && details.content_type)
else if (details.content_type)
response.setContentType(*details.content_type); response.setContentType(*details.content_type);
if (details.format) if (details.format)
@ -1185,8 +1188,9 @@ void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse
used_output.finalize(); used_output.finalize();
} }
DynamicQueryHandler::DynamicQueryHandler(IServer & server_, const std::string & param_name_, const std::optional<String>& content_type_override_) DynamicQueryHandler::DynamicQueryHandler(
: HTTPHandler(server_, "DynamicQueryHandler", content_type_override_), param_name(param_name_) IServer & server_, const std::string & param_name_, const HTTPResponseHeaderSetup & http_response_headers_override_)
: HTTPHandler(server_, "DynamicQueryHandler", http_response_headers_override_), param_name(param_name_)
{ {
} }
@ -1247,8 +1251,8 @@ PredefinedQueryHandler::PredefinedQueryHandler(
const std::string & predefined_query_, const std::string & predefined_query_,
const CompiledRegexPtr & url_regex_, const CompiledRegexPtr & url_regex_,
const std::unordered_map<String, CompiledRegexPtr> & header_name_with_regex_, const std::unordered_map<String, CompiledRegexPtr> & header_name_with_regex_,
const std::optional<String> & content_type_override_) const HTTPResponseHeaderSetup & http_response_headers_override_)
: HTTPHandler(server_, "PredefinedQueryHandler", content_type_override_) : HTTPHandler(server_, "PredefinedQueryHandler", http_response_headers_override_)
, receive_params(receive_params_) , receive_params(receive_params_)
, predefined_query(predefined_query_) , predefined_query(predefined_query_)
, url_regex(url_regex_) , url_regex(url_regex_)
@ -1340,14 +1344,10 @@ HTTPRequestHandlerFactoryPtr createDynamicHandlerFactory(IServer & server,
{ {
auto query_param_name = config.getString(config_prefix + ".handler.query_param_name", "query"); auto query_param_name = config.getString(config_prefix + ".handler.query_param_name", "query");
std::optional<String> content_type_override; HTTPResponseHeaderSetup http_response_headers_override = parseHTTPResponseHeaders(config, config_prefix);
if (config.has(config_prefix + ".handler.content_type"))
content_type_override = config.getString(config_prefix + ".handler.content_type");
auto creator = [&server, query_param_name, content_type_override] () -> std::unique_ptr<DynamicQueryHandler> auto creator = [&server, query_param_name, http_response_headers_override]() -> std::unique_ptr<DynamicQueryHandler>
{ { return std::make_unique<DynamicQueryHandler>(server, query_param_name, http_response_headers_override); };
return std::make_unique<DynamicQueryHandler>(server, query_param_name, content_type_override);
};
auto factory = std::make_shared<HandlingRuleHTTPHandlerFactory<DynamicQueryHandler>>(std::move(creator)); auto factory = std::make_shared<HandlingRuleHTTPHandlerFactory<DynamicQueryHandler>>(std::move(creator));
factory->addFiltersFromConfig(config, config_prefix); factory->addFiltersFromConfig(config, config_prefix);
@ -1402,9 +1402,7 @@ HTTPRequestHandlerFactoryPtr createPredefinedHandlerFactory(IServer & server,
headers_name_with_regex.emplace(std::make_pair(header_name, regex)); headers_name_with_regex.emplace(std::make_pair(header_name, regex));
} }
std::optional<String> content_type_override; HTTPResponseHeaderSetup http_response_headers_override = parseHTTPResponseHeaders(config, config_prefix);
if (config.has(config_prefix + ".handler.content_type"))
content_type_override = config.getString(config_prefix + ".handler.content_type");
std::shared_ptr<HandlingRuleHTTPHandlerFactory<PredefinedQueryHandler>> factory; std::shared_ptr<HandlingRuleHTTPHandlerFactory<PredefinedQueryHandler>> factory;
@ -1424,12 +1422,12 @@ HTTPRequestHandlerFactoryPtr createPredefinedHandlerFactory(IServer & server,
predefined_query, predefined_query,
regex, regex,
headers_name_with_regex, headers_name_with_regex,
content_type_override] http_response_headers_override]
-> std::unique_ptr<PredefinedQueryHandler> -> std::unique_ptr<PredefinedQueryHandler>
{ {
return std::make_unique<PredefinedQueryHandler>( return std::make_unique<PredefinedQueryHandler>(
server, analyze_receive_params, predefined_query, regex, server, analyze_receive_params, predefined_query, regex,
headers_name_with_regex, content_type_override); headers_name_with_regex, http_response_headers_override);
}; };
factory = std::make_shared<HandlingRuleHTTPHandlerFactory<PredefinedQueryHandler>>(std::move(creator)); factory = std::make_shared<HandlingRuleHTTPHandlerFactory<PredefinedQueryHandler>>(std::move(creator));
factory->addFiltersFromConfig(config, config_prefix); factory->addFiltersFromConfig(config, config_prefix);
@ -1442,12 +1440,12 @@ HTTPRequestHandlerFactoryPtr createPredefinedHandlerFactory(IServer & server,
analyze_receive_params, analyze_receive_params,
predefined_query, predefined_query,
headers_name_with_regex, headers_name_with_regex,
content_type_override] http_response_headers_override]
-> std::unique_ptr<PredefinedQueryHandler> -> std::unique_ptr<PredefinedQueryHandler>
{ {
return std::make_unique<PredefinedQueryHandler>( return std::make_unique<PredefinedQueryHandler>(
server, analyze_receive_params, predefined_query, CompiledRegexPtr{}, server, analyze_receive_params, predefined_query, CompiledRegexPtr{},
headers_name_with_regex, content_type_override); headers_name_with_regex, http_response_headers_override);
}; };
factory = std::make_shared<HandlingRuleHTTPHandlerFactory<PredefinedQueryHandler>>(std::move(creator)); factory = std::make_shared<HandlingRuleHTTPHandlerFactory<PredefinedQueryHandler>>(std::move(creator));

View File

@ -1,5 +1,8 @@
#pragma once #pragma once
#include <optional>
#include <string>
#include <unordered_map>
#include <Core/Names.h> #include <Core/Names.h>
#include <Server/HTTP/HTMLForm.h> #include <Server/HTTP/HTMLForm.h>
#include <Server/HTTP/HTTPRequestHandler.h> #include <Server/HTTP/HTTPRequestHandler.h>
@ -10,6 +13,8 @@
#include <Compression/CompressedWriteBuffer.h> #include <Compression/CompressedWriteBuffer.h>
#include <Common/re2.h> #include <Common/re2.h>
#include "HTTPResponseHeaderWriter.h"
namespace CurrentMetrics namespace CurrentMetrics
{ {
extern const Metric HTTPConnection; extern const Metric HTTPConnection;
@ -31,7 +36,7 @@ using CompiledRegexPtr = std::shared_ptr<const re2::RE2>;
class HTTPHandler : public HTTPRequestHandler class HTTPHandler : public HTTPRequestHandler
{ {
public: public:
HTTPHandler(IServer & server_, const std::string & name, const std::optional<String> & content_type_override_); HTTPHandler(IServer & server_, const std::string & name, const HTTPResponseHeaderSetup & http_response_headers_override_);
~HTTPHandler() override; ~HTTPHandler() override;
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
@ -113,8 +118,8 @@ private:
/// See settings http_max_fields, http_max_field_name_size, http_max_field_value_size in HTMLForm. /// See settings http_max_fields, http_max_field_name_size, http_max_field_value_size in HTMLForm.
const Settings & default_settings; const Settings & default_settings;
/// Overrides Content-Type provided by the format of the response. /// Overrides for response headers.
std::optional<String> content_type_override; HTTPResponseHeaderSetup http_response_headers_override;
// session is reset at the end of each request/response. // session is reset at the end of each request/response.
std::unique_ptr<Session> session; std::unique_ptr<Session> session;
@ -162,8 +167,12 @@ class DynamicQueryHandler : public HTTPHandler
{ {
private: private:
std::string param_name; std::string param_name;
public: public:
explicit DynamicQueryHandler(IServer & server_, const std::string & param_name_ = "query", const std::optional<String>& content_type_override_ = std::nullopt); explicit DynamicQueryHandler(
IServer & server_,
const std::string & param_name_ = "query",
const HTTPResponseHeaderSetup & http_response_headers_override_ = std::nullopt);
std::string getQuery(HTTPServerRequest & request, HTMLForm & params, ContextMutablePtr context) override; std::string getQuery(HTTPServerRequest & request, HTMLForm & params, ContextMutablePtr context) override;
@ -177,11 +186,15 @@ private:
std::string predefined_query; std::string predefined_query;
CompiledRegexPtr url_regex; CompiledRegexPtr url_regex;
std::unordered_map<String, CompiledRegexPtr> header_name_with_capture_regex; std::unordered_map<String, CompiledRegexPtr> header_name_with_capture_regex;
public: public:
PredefinedQueryHandler( PredefinedQueryHandler(
IServer & server_, const NameSet & receive_params_, const std::string & predefined_query_ IServer & server_,
, const CompiledRegexPtr & url_regex_, const std::unordered_map<String, CompiledRegexPtr> & header_name_with_regex_ const NameSet & receive_params_,
, const std::optional<std::string> & content_type_override_); const std::string & predefined_query_,
const CompiledRegexPtr & url_regex_,
const std::unordered_map<String, CompiledRegexPtr> & header_name_with_regex_,
const HTTPResponseHeaderSetup & http_response_headers_override_ = std::nullopt);
void customizeContext(HTTPServerRequest & request, ContextMutablePtr context, ReadBuffer & body) override; void customizeContext(HTTPServerRequest & request, ContextMutablePtr context, ReadBuffer & body) override;

View File

@ -74,7 +74,8 @@ static auto createPingHandlerFactory(IServer & server)
auto creator = [&server]() -> std::unique_ptr<StaticRequestHandler> auto creator = [&server]() -> std::unique_ptr<StaticRequestHandler>
{ {
constexpr auto ping_response_expression = "Ok.\n"; constexpr auto ping_response_expression = "Ok.\n";
return std::make_unique<StaticRequestHandler>(server, ping_response_expression); return std::make_unique<StaticRequestHandler>(
server, ping_response_expression, parseHTTPResponseHeaders("text/html; charset=UTF-8"));
}; };
return std::make_shared<HandlingRuleHTTPHandlerFactory<StaticRequestHandler>>(std::move(creator)); return std::make_shared<HandlingRuleHTTPHandlerFactory<StaticRequestHandler>>(std::move(creator));
} }
@ -214,7 +215,8 @@ void addCommonDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IS
auto root_creator = [&server]() -> std::unique_ptr<StaticRequestHandler> auto root_creator = [&server]() -> std::unique_ptr<StaticRequestHandler>
{ {
constexpr auto root_response_expression = "config://http_server_default_response"; constexpr auto root_response_expression = "config://http_server_default_response";
return std::make_unique<StaticRequestHandler>(server, root_response_expression); return std::make_unique<StaticRequestHandler>(
server, root_response_expression, parseHTTPResponseHeaders("text/html; charset=UTF-8"));
}; };
auto root_handler = std::make_shared<HandlingRuleHTTPHandlerFactory<StaticRequestHandler>>(std::move(root_creator)); auto root_handler = std::make_shared<HandlingRuleHTTPHandlerFactory<StaticRequestHandler>>(std::move(root_creator));
root_handler->attachStrictPath("/"); root_handler->attachStrictPath("/");

View File

@ -0,0 +1,69 @@
#include "HTTPResponseHeaderWriter.h"
#include <unordered_map>
#include <utility>
#include <Poco/Net/HTTPMessage.h>
namespace DB
{
std::unordered_map<String, String>
baseParseHTTPResponseHeaders(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix)
{
std::unordered_map<String, String> http_response_headers_override;
String http_response_headers_key = config_prefix + ".handler.http_response_headers";
String http_response_headers_key_prefix = http_response_headers_key + ".";
if (config.has(http_response_headers_key))
{
Poco::Util::AbstractConfiguration::Keys keys;
config.keys(http_response_headers_key, keys);
for (const auto & key : keys)
{
http_response_headers_override[key] = config.getString(http_response_headers_key_prefix + key);
}
}
if (config.has(config_prefix + ".handler.content_type"))
http_response_headers_override[Poco::Net::HTTPMessage::CONTENT_TYPE] = config.getString(config_prefix + ".handler.content_type");
return http_response_headers_override;
}
HTTPResponseHeaderSetup parseHTTPResponseHeaders(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix)
{
std::unordered_map<String, String> http_response_headers_override = baseParseHTTPResponseHeaders(config, config_prefix);
if (http_response_headers_override.empty())
return {};
return std::move(http_response_headers_override);
}
std::unordered_map<String, String> parseHTTPResponseHeaders(
const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, const std::string & default_content_type)
{
std::unordered_map<String, String> http_response_headers_override = baseParseHTTPResponseHeaders(config, config_prefix);
if (!http_response_headers_override.contains(Poco::Net::HTTPMessage::CONTENT_TYPE))
http_response_headers_override[Poco::Net::HTTPMessage::CONTENT_TYPE] = default_content_type;
return http_response_headers_override;
}
std::unordered_map<String, String> parseHTTPResponseHeaders(const std::string & default_content_type)
{
return {{{Poco::Net::HTTPMessage::CONTENT_TYPE, default_content_type}}};
}
void applyHTTPResponseHeaders(Poco::Net::HTTPResponse & response, const HTTPResponseHeaderSetup & setup)
{
if (setup)
for (const auto & [header_name, header_value] : *setup)
response.set(header_name, header_value);
}
void applyHTTPResponseHeaders(Poco::Net::HTTPResponse & response, const std::unordered_map<String, String> & setup)
{
for (const auto & [header_name, header_value] : setup)
response.set(header_name, header_value);
}
}

View File

@ -0,0 +1,25 @@
#pragma once
#include <optional>
#include <string>
#include <unordered_map>
#include <base/types.h>
#include <Poco/Net/HTTPResponse.h>
#include <Poco/Util/AbstractConfiguration.h>
namespace DB
{
using HTTPResponseHeaderSetup = std::optional<std::unordered_map<String, String>>;
HTTPResponseHeaderSetup parseHTTPResponseHeaders(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix);
std::unordered_map<String, String> parseHTTPResponseHeaders(
const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, const std::string & default_content_type);
std::unordered_map<String, String> parseHTTPResponseHeaders(const std::string & default_content_type);
void applyHTTPResponseHeaders(Poco::Net::HTTPResponse & response, const HTTPResponseHeaderSetup & setup);
void applyHTTPResponseHeaders(Poco::Net::HTTPResponse & response, const std::unordered_map<String, String> & setup);
}

View File

@ -2,7 +2,7 @@
#include "IServer.h" #include "IServer.h"
#include "HTTPHandlerFactory.h" #include "HTTPHandlerFactory.h"
#include "HTTPHandlerRequestFilter.h" #include "HTTPResponseHeaderWriter.h"
#include <IO/HTTPCommon.h> #include <IO/HTTPCommon.h>
#include <IO/ReadBufferFromFile.h> #include <IO/ReadBufferFromFile.h>
@ -14,6 +14,7 @@
#include <Common/Exception.h> #include <Common/Exception.h>
#include <unordered_map>
#include <Poco/Net/HTTPServerRequest.h> #include <Poco/Net/HTTPServerRequest.h>
#include <Poco/Net/HTTPServerResponse.h> #include <Poco/Net/HTTPServerResponse.h>
#include <Poco/Net/HTTPRequestHandlerFactory.h> #include <Poco/Net/HTTPRequestHandlerFactory.h>
@ -94,7 +95,7 @@ void StaticRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServer
try try
{ {
response.setContentType(content_type); applyHTTPResponseHeaders(response, http_response_headers_override);
if (request.getVersion() == Poco::Net::HTTPServerRequest::HTTP_1_1) if (request.getVersion() == Poco::Net::HTTPServerRequest::HTTP_1_1)
response.setChunkedTransferEncoding(true); response.setChunkedTransferEncoding(true);
@ -155,8 +156,9 @@ void StaticRequestHandler::writeResponse(WriteBuffer & out)
writeString(response_expression, out); writeString(response_expression, out);
} }
StaticRequestHandler::StaticRequestHandler(IServer & server_, const String & expression, int status_, const String & content_type_) StaticRequestHandler::StaticRequestHandler(
: server(server_), status(status_), content_type(content_type_), response_expression(expression) IServer & server_, const String & expression, const std::unordered_map<String, String> & http_response_headers_override_, int status_)
: server(server_), status(status_), http_response_headers_override(http_response_headers_override_), response_expression(expression)
{ {
} }
@ -166,12 +168,12 @@ HTTPRequestHandlerFactoryPtr createStaticHandlerFactory(IServer & server,
{ {
int status = config.getInt(config_prefix + ".handler.status", 200); int status = config.getInt(config_prefix + ".handler.status", 200);
std::string response_content = config.getRawString(config_prefix + ".handler.response_content", "Ok.\n"); std::string response_content = config.getRawString(config_prefix + ".handler.response_content", "Ok.\n");
std::string response_content_type = config.getString(config_prefix + ".handler.content_type", "text/plain; charset=UTF-8");
auto creator = [&server, response_content, status, response_content_type]() -> std::unique_ptr<StaticRequestHandler> std::unordered_map<String, String> http_response_headers_override
{ = parseHTTPResponseHeaders(config, config_prefix, "text/plain; charset=UTF-8");
return std::make_unique<StaticRequestHandler>(server, response_content, status, response_content_type);
}; auto creator = [&server, http_response_headers_override, response_content, status]() -> std::unique_ptr<StaticRequestHandler>
{ return std::make_unique<StaticRequestHandler>(server, response_content, http_response_headers_override, status); };
auto factory = std::make_shared<HandlingRuleHTTPHandlerFactory<StaticRequestHandler>>(std::move(creator)); auto factory = std::make_shared<HandlingRuleHTTPHandlerFactory<StaticRequestHandler>>(std::move(creator));

View File

@ -1,9 +1,9 @@
#pragma once #pragma once
#include <unordered_map>
#include <Server/HTTP/HTTPRequestHandler.h> #include <Server/HTTP/HTTPRequestHandler.h>
#include <base/types.h> #include <base/types.h>
namespace DB namespace DB
{ {
@ -17,15 +17,16 @@ private:
IServer & server; IServer & server;
int status; int status;
String content_type; /// Overrides for response headers.
std::unordered_map<String, String> http_response_headers_override;
String response_expression; String response_expression;
public: public:
StaticRequestHandler( StaticRequestHandler(
IServer & server, IServer & server,
const String & expression, const String & expression,
int status_ = 200, const std::unordered_map<String, String> & http_response_headers_override_,
const String & content_type_ = "text/html; charset=UTF-8"); int status_ = 200);
void writeResponse(WriteBuffer & out); void writeResponse(WriteBuffer & out);

View File

@ -737,10 +737,10 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks
{ {
/// Don't scare people with broken part error /// Don't scare people with broken part error
if (!isRetryableException(std::current_exception())) if (!isRetryableException(std::current_exception()))
LOG_ERROR(storage.log, "Part {} is broken and need manual correction", getDataPartStorage().getFullPath()); LOG_ERROR(storage.log, "Part {} is broken and needs manual correction", getDataPartStorage().getFullPath());
// There could be conditions that data part to be loaded is broken, but some of meta infos are already written // There could be conditions that data part to be loaded is broken, but some of meta infos are already written
// into meta data before exception, need to clean them all. // into metadata before exception, need to clean them all.
metadata_manager->deleteAll(/*include_projection*/ true); metadata_manager->deleteAll(/*include_projection*/ true);
metadata_manager->assertAllDeleted(/*include_projection*/ true); metadata_manager->assertAllDeleted(/*include_projection*/ true);
throw; throw;

View File

@ -310,7 +310,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare()
auto table_id = storage.getStorageID(); auto table_id = storage.getStorageID();
task_context = Context::createCopy(storage.getContext()); task_context = Context::createCopy(storage.getContext());
task_context->makeQueryContext(); task_context->makeQueryContextForMerge(*storage.getSettings());
task_context->setCurrentQueryId(getQueryId()); task_context->setCurrentQueryId(getQueryId());
task_context->setBackgroundOperationTypeForContext(ClientInfo::BackgroundOperationType::MERGE); task_context->setBackgroundOperationTypeForContext(ClientInfo::BackgroundOperationType::MERGE);

View File

@ -165,7 +165,7 @@ void MergePlainMergeTreeTask::finish()
ContextMutablePtr MergePlainMergeTreeTask::createTaskContext() const ContextMutablePtr MergePlainMergeTreeTask::createTaskContext() const
{ {
auto context = Context::createCopy(storage.getContext()); auto context = Context::createCopy(storage.getContext());
context->makeQueryContext(); context->makeQueryContextForMerge(*storage.getSettings());
auto queryId = getQueryId(); auto queryId = getQueryId();
context->setCurrentQueryId(queryId); context->setCurrentQueryId(queryId);
context->setBackgroundOperationTypeForContext(ClientInfo::BackgroundOperationType::MERGE); context->setBackgroundOperationTypeForContext(ClientInfo::BackgroundOperationType::MERGE);

View File

@ -138,7 +138,7 @@ private:
virtual ~IStage() = default; virtual ~IStage() = default;
}; };
/// By default this context is uninitialed, but some variables has to be set after construction, /// By default this context is uninitialized, but some variables has to be set after construction,
/// some variables are used in a process of execution /// some variables are used in a process of execution
/// Proper initialization is responsibility of the author /// Proper initialization is responsibility of the author
struct GlobalRuntimeContext : public IStageRuntimeContext struct GlobalRuntimeContext : public IStageRuntimeContext
@ -199,7 +199,7 @@ private:
using GlobalRuntimeContextPtr = std::shared_ptr<GlobalRuntimeContext>; using GlobalRuntimeContextPtr = std::shared_ptr<GlobalRuntimeContext>;
/// By default this context is uninitialed, but some variables has to be set after construction, /// By default this context is uninitialized, but some variables has to be set after construction,
/// some variables are used in a process of execution /// some variables are used in a process of execution
/// Proper initialization is responsibility of the author /// Proper initialization is responsibility of the author
struct ExecuteAndFinalizeHorizontalPartRuntimeContext : public IStageRuntimeContext struct ExecuteAndFinalizeHorizontalPartRuntimeContext : public IStageRuntimeContext
@ -273,7 +273,7 @@ private:
GlobalRuntimeContextPtr global_ctx; GlobalRuntimeContextPtr global_ctx;
}; };
/// By default this context is uninitialed, but some variables has to be set after construction, /// By default this context is uninitialized, but some variables has to be set after construction,
/// some variables are used in a process of execution /// some variables are used in a process of execution
/// Proper initialization is responsibility of the author /// Proper initialization is responsibility of the author
struct VerticalMergeRuntimeContext : public IStageRuntimeContext struct VerticalMergeRuntimeContext : public IStageRuntimeContext
@ -348,7 +348,7 @@ private:
GlobalRuntimeContextPtr global_ctx; GlobalRuntimeContextPtr global_ctx;
}; };
/// By default this context is uninitialed, but some variables has to be set after construction, /// By default this context is uninitialized, but some variables has to be set after construction,
/// some variables are used in a process of execution /// some variables are used in a process of execution
/// Proper initialization is responsibility of the author /// Proper initialization is responsibility of the author
struct MergeProjectionsRuntimeContext : public IStageRuntimeContext struct MergeProjectionsRuntimeContext : public IStageRuntimeContext

View File

@ -82,6 +82,8 @@ struct Settings;
M(UInt64, min_delay_to_mutate_ms, 10, "Min delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \ M(UInt64, min_delay_to_mutate_ms, 10, "Min delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \
M(UInt64, max_delay_to_mutate_ms, 1000, "Max delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \ M(UInt64, max_delay_to_mutate_ms, 1000, "Max delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \
M(Bool, exclude_deleted_rows_for_part_size_in_merge, false, "Use an estimated source part size (excluding lightweight deleted rows) when selecting parts to merge", 0) \ M(Bool, exclude_deleted_rows_for_part_size_in_merge, false, "Use an estimated source part size (excluding lightweight deleted rows) when selecting parts to merge", 0) \
M(String, merge_workload, "", "Name of workload to be used to access resources for merges", 0) \
M(String, mutation_workload, "", "Name of workload to be used to access resources for mutations", 0) \
\ \
/** Inserts settings. */ \ /** Inserts settings. */ \
M(UInt64, parts_to_delay_insert, 1000, "If table contains at least that many active parts in single partition, artificially slow down insert into table. Disabled if set to 0", 0) \ M(UInt64, parts_to_delay_insert, 1000, "If table contains at least that many active parts in single partition, artificially slow down insert into table. Disabled if set to 0", 0) \

View File

@ -204,7 +204,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare()
} }
task_context = Context::createCopy(storage.getContext()); task_context = Context::createCopy(storage.getContext());
task_context->makeQueryContext(); task_context->makeQueryContextForMutate(*storage.getSettings());
task_context->setCurrentQueryId(getQueryId()); task_context->setCurrentQueryId(getQueryId());
task_context->setBackgroundOperationTypeForContext(ClientInfo::BackgroundOperationType::MUTATION); task_context->setBackgroundOperationTypeForContext(ClientInfo::BackgroundOperationType::MUTATION);

View File

@ -136,7 +136,7 @@ bool MutatePlainMergeTreeTask::executeStep()
ContextMutablePtr MutatePlainMergeTreeTask::createTaskContext() const ContextMutablePtr MutatePlainMergeTreeTask::createTaskContext() const
{ {
auto context = Context::createCopy(storage.getContext()); auto context = Context::createCopy(storage.getContext());
context->makeQueryContext(); context->makeQueryContextForMutate(*storage.getSettings());
auto queryId = getQueryId(); auto queryId = getQueryId();
context->setCurrentQueryId(queryId); context->setCurrentQueryId(queryId);
context->setBackgroundOperationTypeForContext(ClientInfo::BackgroundOperationType::MUTATION); context->setBackgroundOperationTypeForContext(ClientInfo::BackgroundOperationType::MUTATION);

View File

@ -132,11 +132,11 @@ void WriteBufferFromHDFS::sync()
} }
void WriteBufferFromHDFS::finalizeImpl() WriteBufferFromHDFS::~WriteBufferFromHDFS()
{ {
try try
{ {
next(); finalize();
} }
catch (...) catch (...)
{ {
@ -144,11 +144,5 @@ void WriteBufferFromHDFS::finalizeImpl()
} }
} }
WriteBufferFromHDFS::~WriteBufferFromHDFS()
{
finalize();
}
} }
#endif #endif

View File

@ -38,8 +38,6 @@ public:
std::string getFileName() const override { return filename; } std::string getFileName() const override { return filename; }
private: private:
void finalizeImpl() override;
struct WriteBufferFromHDFSImpl; struct WriteBufferFromHDFSImpl;
std::unique_ptr<WriteBufferFromHDFSImpl> impl; std::unique_ptr<WriteBufferFromHDFSImpl> impl;
const std::string filename; const std::string filename;

View File

@ -83,7 +83,6 @@ void StorageObjectStorageSink::finalize()
{ {
writer->finalize(); writer->finalize();
writer->flush(); writer->flush();
write_buf->finalize();
} }
catch (...) catch (...)
{ {
@ -91,6 +90,8 @@ void StorageObjectStorageSink::finalize()
release(); release();
throw; throw;
} }
write_buf->finalize();
} }
void StorageObjectStorageSink::release() void StorageObjectStorageSink::release()

View File

@ -1823,7 +1823,6 @@ private:
{ {
writer->finalize(); writer->finalize();
writer->flush(); writer->flush();
write_buf->finalize();
} }
catch (...) catch (...)
{ {
@ -1831,12 +1830,14 @@ private:
release(); release();
throw; throw;
} }
write_buf->finalize();
} }
void release() void release()
{ {
writer.reset(); writer.reset();
write_buf->finalize(); write_buf.reset();
} }
StorageMetadataPtr metadata_snapshot; StorageMetadataPtr metadata_snapshot;

View File

@ -609,7 +609,6 @@ void StorageURLSink::finalize()
{ {
writer->finalize(); writer->finalize();
writer->flush(); writer->flush();
write_buf->finalize();
} }
catch (...) catch (...)
{ {
@ -617,12 +616,14 @@ void StorageURLSink::finalize()
release(); release();
throw; throw;
} }
write_buf->finalize();
} }
void StorageURLSink::release() void StorageURLSink::release()
{ {
writer.reset(); writer.reset();
write_buf->finalize(); write_buf.reset();
} }
class PartitionedStorageURLSink : public PartitionedSink class PartitionedStorageURLSink : public PartitionedSink

View File

@ -12,7 +12,6 @@
#include <Common/Scheduler/Nodes/ThrottlerConstraint.h> #include <Common/Scheduler/Nodes/ThrottlerConstraint.h>
#include <Common/Scheduler/Nodes/FifoQueue.h> #include <Common/Scheduler/Nodes/FifoQueue.h>
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include "Common/Scheduler/ResourceRequest.h"
namespace DB namespace DB

View File

@ -81,7 +81,10 @@ void StorageSystemServerSettings::fillData(MutableColumns & res_columns, Context
{"uncompressed_cache_size", {std::to_string(context->getUncompressedCache()->maxSizeInBytes()), ChangeableWithoutRestart::Yes}}, {"uncompressed_cache_size", {std::to_string(context->getUncompressedCache()->maxSizeInBytes()), ChangeableWithoutRestart::Yes}},
{"index_mark_cache_size", {std::to_string(context->getIndexMarkCache()->maxSizeInBytes()), ChangeableWithoutRestart::Yes}}, {"index_mark_cache_size", {std::to_string(context->getIndexMarkCache()->maxSizeInBytes()), ChangeableWithoutRestart::Yes}},
{"index_uncompressed_cache_size", {std::to_string(context->getIndexUncompressedCache()->maxSizeInBytes()), ChangeableWithoutRestart::Yes}}, {"index_uncompressed_cache_size", {std::to_string(context->getIndexUncompressedCache()->maxSizeInBytes()), ChangeableWithoutRestart::Yes}},
{"mmap_cache_size", {std::to_string(context->getMMappedFileCache()->maxSizeInBytes()), ChangeableWithoutRestart::Yes}} {"mmap_cache_size", {std::to_string(context->getMMappedFileCache()->maxSizeInBytes()), ChangeableWithoutRestart::Yes}},
{"merge_workload", {context->getMergeWorkload(), ChangeableWithoutRestart::Yes}},
{"mutation_workload", {context->getMutationWorkload(), ChangeableWithoutRestart::Yes}}
}; };
if (context->areBackgroundExecutorsInitialized()) if (context->areBackgroundExecutorsInitialized())

View File

@ -9,3 +9,5 @@
01287_max_execution_speed 01287_max_execution_speed
# Check after ConstantNode refactoring # Check after ConstantNode refactoring
02154_parser_backtracking 02154_parser_backtracking
02944_variant_as_common_type
02942_variant_cast

View File

@ -15,7 +15,7 @@ from github.Commit import Commit
from build_download_helper import download_build_with_progress from build_download_helper import download_build_with_progress
from commit_status_helper import post_commit_status from commit_status_helper import post_commit_status
from compress_files import SUFFIX, compress_fast, decompress_fast from compress_files import SUFFIX, compress_fast, decompress_fast
from env_helper import CI, RUNNER_TEMP, S3_BUILDS_BUCKET from env_helper import IS_CI, RUNNER_TEMP, S3_BUILDS_BUCKET
from git_helper import SHA_REGEXP from git_helper import SHA_REGEXP
from report import FOOTER_HTML_TEMPLATE, HEAD_HTML_TEMPLATE, SUCCESS from report import FOOTER_HTML_TEMPLATE, HEAD_HTML_TEMPLATE, SUCCESS
from s3_helper import S3Helper from s3_helper import S3Helper
@ -131,7 +131,7 @@ class ArtifactsHelper:
post_commit_status(commit, SUCCESS, url, "Artifacts for workflow", "Artifacts") post_commit_status(commit, SUCCESS, url, "Artifacts for workflow", "Artifacts")
def _regenerate_index(self) -> None: def _regenerate_index(self) -> None:
if CI: if IS_CI:
files = self._get_s3_objects() files = self._get_s3_objects()
else: else:
files = self._get_local_s3_objects() files = self._get_local_s3_objects()

View File

@ -6,7 +6,7 @@ import subprocess
import sys import sys
from pathlib import Path from pathlib import Path
from build_download_helper import get_build_name_for_check, read_build_urls from build_download_helper import read_build_urls
from clickhouse_helper import CiLogsCredentials from clickhouse_helper import CiLogsCredentials
from docker_images_helper import DockerImage, get_docker_image, pull_image from docker_images_helper import DockerImage, get_docker_image, pull_image
from env_helper import REPORT_PATH, TEMP_PATH from env_helper import REPORT_PATH, TEMP_PATH
@ -14,6 +14,7 @@ from pr_info import PRInfo
from report import FAIL, FAILURE, OK, SUCCESS, JobReport, TestResult from report import FAIL, FAILURE, OK, SUCCESS, JobReport, TestResult
from stopwatch import Stopwatch from stopwatch import Stopwatch
from tee_popen import TeePopen from tee_popen import TeePopen
from ci_config import CI
IMAGE_NAME = "clickhouse/fuzzer" IMAGE_NAME = "clickhouse/fuzzer"
@ -64,7 +65,7 @@ def main():
docker_image = pull_image(get_docker_image(IMAGE_NAME)) docker_image = pull_image(get_docker_image(IMAGE_NAME))
build_name = get_build_name_for_check(check_name) build_name = CI.get_required_build_name(check_name)
urls = read_build_urls(build_name, reports_path) urls = read_build_urls(build_name, reports_path)
if not urls: if not urls:
raise ValueError("No build URLs found") raise ValueError("No build URLs found")

View File

@ -7,7 +7,7 @@ import sys
from pathlib import Path from pathlib import Path
from typing import List, Sequence, Tuple from typing import List, Sequence, Tuple
from ci_config import JobNames from ci_config import CI
from ci_utils import normalize_string from ci_utils import normalize_string
from env_helper import TEMP_PATH from env_helper import TEMP_PATH
from functional_test_check import NO_CHANGES_MSG from functional_test_check import NO_CHANGES_MSG
@ -92,16 +92,19 @@ def main():
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
# args = parse_args() # args = parse_args()
stopwatch = Stopwatch() stopwatch = Stopwatch()
jobs_to_validate = [JobNames.STATELESS_TEST_RELEASE, JobNames.INTEGRATION_TEST] jobs_to_validate = [
CI.JobNames.STATELESS_TEST_RELEASE,
CI.JobNames.INTEGRATION_TEST,
]
functional_job_report_file = Path(TEMP_PATH) / "functional_test_job_report.json" functional_job_report_file = Path(TEMP_PATH) / "functional_test_job_report.json"
integration_job_report_file = Path(TEMP_PATH) / "integration_test_job_report.json" integration_job_report_file = Path(TEMP_PATH) / "integration_test_job_report.json"
jobs_report_files = { jobs_report_files = {
JobNames.STATELESS_TEST_RELEASE: functional_job_report_file, CI.JobNames.STATELESS_TEST_RELEASE: functional_job_report_file,
JobNames.INTEGRATION_TEST: integration_job_report_file, CI.JobNames.INTEGRATION_TEST: integration_job_report_file,
} }
jobs_scripts = { jobs_scripts = {
JobNames.STATELESS_TEST_RELEASE: "functional_test_check.py", CI.JobNames.STATELESS_TEST_RELEASE: "functional_test_check.py",
JobNames.INTEGRATION_TEST: "integration_test_check.py", CI.JobNames.INTEGRATION_TEST: "integration_test_check.py",
} }
for test_job in jobs_to_validate: for test_job in jobs_to_validate:

Some files were not shown because too many files have changed in this diff Show More