qMerge branch 'master' of github.com:ClickHouse/ClickHouse into divanik/add_local_and_azure_iceberg_support

This commit is contained in:
divanik 2024-08-06 11:59:47 +00:00
commit c4e29466de
754 changed files with 2838 additions and 2295 deletions

View File

@ -260,13 +260,18 @@ jobs:
- name: Finish label - name: Finish label
if: ${{ !failure() }} if: ${{ !failure() }}
run: | run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ toJson(needs) }}
EOF
cd "$GITHUB_WORKSPACE/tests/ci" cd "$GITHUB_WORKSPACE/tests/ci"
# update mergeable check # update mergeable check
python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} python3 merge_pr.py --set-ci-status
# update overall ci report # update overall ci report
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
python3 merge_pr.py python3 merge_pr.py
- name: Check Workflow results - name: Check Workflow results
if: ${{ !cancelled() }}
run: | run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat > "$WORKFLOW_RESULT_FILE" << 'EOF' cat > "$WORKFLOW_RESULT_FILE" << 'EOF'

View File

@ -64,6 +64,7 @@ jobs:
- name: Check out repository code - name: Check out repository code
uses: ClickHouse/checkout@v1 uses: ClickHouse/checkout@v1
- name: Check Workflow results - name: Check Workflow results
if: ${{ !cancelled() }}
run: | run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' cat >> "$WORKFLOW_RESULT_FILE" << 'EOF'

View File

@ -103,9 +103,14 @@ jobs:
- name: Check and set merge status - name: Check and set merge status
if: ${{ needs.StyleCheck.result == 'success' }} if: ${{ needs.StyleCheck.result == 'success' }}
run: | run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ toJson(needs) }}
EOF
cd "$GITHUB_WORKSPACE/tests/ci" cd "$GITHUB_WORKSPACE/tests/ci"
python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} python3 merge_pr.py --set-ci-status
- name: Check Workflow results - name: Check Workflow results
if: ${{ !cancelled() }}
run: | run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat > "$WORKFLOW_RESULT_FILE" << 'EOF' cat > "$WORKFLOW_RESULT_FILE" << 'EOF'

View File

@ -52,6 +52,7 @@ jobs:
- name: Check out repository code - name: Check out repository code
uses: ClickHouse/checkout@v1 uses: ClickHouse/checkout@v1
- name: Check Workflow results - name: Check Workflow results
if: ${{ !cancelled() }}
run: | run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat > "$WORKFLOW_RESULT_FILE" << 'EOF' cat > "$WORKFLOW_RESULT_FILE" << 'EOF'

View File

@ -170,7 +170,11 @@ jobs:
if: ${{ needs.StyleCheck.result == 'success' }} if: ${{ needs.StyleCheck.result == 'success' }}
run: | run: |
cd "$GITHUB_WORKSPACE/tests/ci" cd "$GITHUB_WORKSPACE/tests/ci"
python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ toJson(needs) }}
EOF
python3 merge_pr.py --set-ci-status
- name: Check Workflow results - name: Check Workflow results
uses: ./.github/actions/check_workflow uses: ./.github/actions/check_workflow
with: with:

View File

@ -481,12 +481,10 @@ jobs:
- name: Finish label - name: Finish label
if: ${{ !failure() }} if: ${{ !failure() }}
run: | run: |
cd "$GITHUB_WORKSPACE/tests/ci"
# update mergeable check
python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
# update overall ci report # update overall ci report
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
- name: Check Workflow results - name: Check Workflow results
if: ${{ !cancelled() }}
run: | run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat > "$WORKFLOW_RESULT_FILE" << 'EOF' cat > "$WORKFLOW_RESULT_FILE" << 'EOF'

View File

@ -5,12 +5,6 @@ rules:
indentation: indentation:
level: warning level: warning
indent-sequences: consistent indent-sequences: consistent
line-length:
# there are:
# - bash -c "", so this is OK
# - yaml in tests
max: 1000
level: warning
comments: comments:
min-spaces-from-content: 1 min-spaces-from-content: 1
document-start: disable document-start: disable

View File

@ -64,6 +64,7 @@
* The setting `optimize_trivial_insert_select` is disabled by default. In most cases, it should be beneficial. Nevertheless, if you are seeing slower INSERT SELECT or increased memory usage, you can enable it back or `SET compatibility = '24.6'`. [#58970](https://github.com/ClickHouse/ClickHouse/pull/58970) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * The setting `optimize_trivial_insert_select` is disabled by default. In most cases, it should be beneficial. Nevertheless, if you are seeing slower INSERT SELECT or increased memory usage, you can enable it back or `SET compatibility = '24.6'`. [#58970](https://github.com/ClickHouse/ClickHouse/pull/58970) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Print stacktrace and diagnostic info if `clickhouse-client` or `clickhouse-local` crashes. [#61109](https://github.com/ClickHouse/ClickHouse/pull/61109) ([Alexander Tokmakov](https://github.com/tavplubix)). * Print stacktrace and diagnostic info if `clickhouse-client` or `clickhouse-local` crashes. [#61109](https://github.com/ClickHouse/ClickHouse/pull/61109) ([Alexander Tokmakov](https://github.com/tavplubix)).
* The result of `SHOW INDEX | INDEXES | INDICES | KEYS` was previously sorted by the primary key column names. Since this was unintuitive, the result is now sorted by the position of the primary key columns within the primary key. [#61131](https://github.com/ClickHouse/ClickHouse/pull/61131) ([Robert Schulze](https://github.com/rschu1ze)). * The result of `SHOW INDEX | INDEXES | INDICES | KEYS` was previously sorted by the primary key column names. Since this was unintuitive, the result is now sorted by the position of the primary key columns within the primary key. [#61131](https://github.com/ClickHouse/ClickHouse/pull/61131) ([Robert Schulze](https://github.com/rschu1ze)).
* Change how deduplication for Materialized Views works. Fixed a lot of cases like: - on destination table: data is split for 2 or more blocks and that blocks is considered as duplicate when that block is inserted in parallel. - on MV destination table: the equal blocks are deduplicated, that happens when MV often produces equal data as a result for different input data due to performing aggregation. - on MV destination table: the equal blocks which comes from different MV are deduplicated. [#61601](https://github.com/ClickHouse/ClickHouse/pull/61601) ([Sema Checherinda](https://github.com/CheSema)).
* Support reading partitioned data DeltaLake data. Infer DeltaLake schema by reading metadata instead of data. [#63201](https://github.com/ClickHouse/ClickHouse/pull/63201) ([Kseniia Sumarokova](https://github.com/kssenii)). * Support reading partitioned data DeltaLake data. Infer DeltaLake schema by reading metadata instead of data. [#63201](https://github.com/ClickHouse/ClickHouse/pull/63201) ([Kseniia Sumarokova](https://github.com/kssenii)).
* In composable protocols TLS layer accepted only `certificateFile` and `privateKeyFile` parameters. https://clickhouse.com/docs/en/operations/settings/composable-protocols. [#63985](https://github.com/ClickHouse/ClickHouse/pull/63985) ([Anton Ivashkin](https://github.com/ianton-ru)). * In composable protocols TLS layer accepted only `certificateFile` and `privateKeyFile` parameters. https://clickhouse.com/docs/en/operations/settings/composable-protocols. [#63985](https://github.com/ClickHouse/ClickHouse/pull/63985) ([Anton Ivashkin](https://github.com/ianton-ru)).
* Added profile event `SelectQueriesWithPrimaryKeyUsage` which indicates how many SELECT queries use the primary key to evaluate the WHERE clause. [#64492](https://github.com/ClickHouse/ClickHouse/pull/64492) ([0x01f](https://github.com/0xfei)). * Added profile event `SelectQueriesWithPrimaryKeyUsage` which indicates how many SELECT queries use the primary key to evaluate the WHERE clause. [#64492](https://github.com/ClickHouse/ClickHouse/pull/64492) ([0x01f](https://github.com/0xfei)).

2
contrib/rocksdb vendored

@ -1 +1 @@
Subproject commit 01e43568fa9f3f7bf107b2b66c00b286b456f33e Subproject commit 49ce8a1064dd1ad89117899839bf136365e49e79

View File

@ -1,6 +1,6 @@
option (ENABLE_ROCKSDB "Enable RocksDB" ${ENABLE_LIBRARIES}) option (ENABLE_ROCKSDB "Enable RocksDB" ${ENABLE_LIBRARIES})
if (NOT ENABLE_ROCKSDB) if (NOT ENABLE_ROCKSDB OR NO_SSE3_OR_HIGHER) # assumes SSE4.2 and PCLMUL
message (STATUS "Not using RocksDB") message (STATUS "Not using RocksDB")
return() return()
endif() endif()
@ -39,13 +39,6 @@ if(WITH_ZSTD)
list(APPEND THIRDPARTY_LIBS ch_contrib::zstd) list(APPEND THIRDPARTY_LIBS ch_contrib::zstd)
endif() endif()
add_definitions(-DROCKSDB_PORTABLE)
if(ENABLE_SSE42 AND ENABLE_PCLMULQDQ)
add_definitions(-DHAVE_SSE42)
add_definitions(-DHAVE_PCLMUL)
endif()
if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64|aarch64|AARCH64") if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64|aarch64|AARCH64")
set (HAS_ARMV8_CRC 1) set (HAS_ARMV8_CRC 1)
# the original build descriptions set specific flags for ARM. These flags are already subsumed by ClickHouse's general # the original build descriptions set specific flags for ARM. These flags are already subsumed by ClickHouse's general
@ -91,7 +84,9 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/cache/compressed_secondary_cache.cc ${ROCKSDB_SOURCE_DIR}/cache/compressed_secondary_cache.cc
${ROCKSDB_SOURCE_DIR}/cache/lru_cache.cc ${ROCKSDB_SOURCE_DIR}/cache/lru_cache.cc
${ROCKSDB_SOURCE_DIR}/cache/secondary_cache.cc ${ROCKSDB_SOURCE_DIR}/cache/secondary_cache.cc
${ROCKSDB_SOURCE_DIR}/cache/secondary_cache_adapter.cc
${ROCKSDB_SOURCE_DIR}/cache/sharded_cache.cc ${ROCKSDB_SOURCE_DIR}/cache/sharded_cache.cc
${ROCKSDB_SOURCE_DIR}/cache/tiered_secondary_cache.cc
${ROCKSDB_SOURCE_DIR}/db/arena_wrapped_db_iter.cc ${ROCKSDB_SOURCE_DIR}/db/arena_wrapped_db_iter.cc
${ROCKSDB_SOURCE_DIR}/db/blob/blob_contents.cc ${ROCKSDB_SOURCE_DIR}/db/blob/blob_contents.cc
${ROCKSDB_SOURCE_DIR}/db/blob/blob_fetcher.cc ${ROCKSDB_SOURCE_DIR}/db/blob/blob_fetcher.cc
@ -174,9 +169,11 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/db/wal_manager.cc ${ROCKSDB_SOURCE_DIR}/db/wal_manager.cc
${ROCKSDB_SOURCE_DIR}/db/wide/wide_column_serialization.cc ${ROCKSDB_SOURCE_DIR}/db/wide/wide_column_serialization.cc
${ROCKSDB_SOURCE_DIR}/db/wide/wide_columns.cc ${ROCKSDB_SOURCE_DIR}/db/wide/wide_columns.cc
${ROCKSDB_SOURCE_DIR}/db/wide/wide_columns_helper.cc
${ROCKSDB_SOURCE_DIR}/db/write_batch.cc ${ROCKSDB_SOURCE_DIR}/db/write_batch.cc
${ROCKSDB_SOURCE_DIR}/db/write_batch_base.cc ${ROCKSDB_SOURCE_DIR}/db/write_batch_base.cc
${ROCKSDB_SOURCE_DIR}/db/write_controller.cc ${ROCKSDB_SOURCE_DIR}/db/write_controller.cc
${ROCKSDB_SOURCE_DIR}/db/write_stall_stats.cc
${ROCKSDB_SOURCE_DIR}/db/write_thread.cc ${ROCKSDB_SOURCE_DIR}/db/write_thread.cc
${ROCKSDB_SOURCE_DIR}/env/composite_env.cc ${ROCKSDB_SOURCE_DIR}/env/composite_env.cc
${ROCKSDB_SOURCE_DIR}/env/env.cc ${ROCKSDB_SOURCE_DIR}/env/env.cc
@ -229,6 +226,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/options/configurable.cc ${ROCKSDB_SOURCE_DIR}/options/configurable.cc
${ROCKSDB_SOURCE_DIR}/options/customizable.cc ${ROCKSDB_SOURCE_DIR}/options/customizable.cc
${ROCKSDB_SOURCE_DIR}/options/db_options.cc ${ROCKSDB_SOURCE_DIR}/options/db_options.cc
${ROCKSDB_SOURCE_DIR}/options/offpeak_time_info.cc
${ROCKSDB_SOURCE_DIR}/options/options.cc ${ROCKSDB_SOURCE_DIR}/options/options.cc
${ROCKSDB_SOURCE_DIR}/options/options_helper.cc ${ROCKSDB_SOURCE_DIR}/options/options_helper.cc
${ROCKSDB_SOURCE_DIR}/options/options_parser.cc ${ROCKSDB_SOURCE_DIR}/options/options_parser.cc
@ -268,6 +266,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/table/get_context.cc ${ROCKSDB_SOURCE_DIR}/table/get_context.cc
${ROCKSDB_SOURCE_DIR}/table/iterator.cc ${ROCKSDB_SOURCE_DIR}/table/iterator.cc
${ROCKSDB_SOURCE_DIR}/table/merging_iterator.cc ${ROCKSDB_SOURCE_DIR}/table/merging_iterator.cc
${ROCKSDB_SOURCE_DIR}/table/compaction_merging_iterator.cc
${ROCKSDB_SOURCE_DIR}/table/meta_blocks.cc ${ROCKSDB_SOURCE_DIR}/table/meta_blocks.cc
${ROCKSDB_SOURCE_DIR}/table/persistent_cache_helper.cc ${ROCKSDB_SOURCE_DIR}/table/persistent_cache_helper.cc
${ROCKSDB_SOURCE_DIR}/table/plain/plain_table_bloom.cc ${ROCKSDB_SOURCE_DIR}/table/plain/plain_table_bloom.cc
@ -309,6 +308,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/util/compression_context_cache.cc ${ROCKSDB_SOURCE_DIR}/util/compression_context_cache.cc
${ROCKSDB_SOURCE_DIR}/util/concurrent_task_limiter_impl.cc ${ROCKSDB_SOURCE_DIR}/util/concurrent_task_limiter_impl.cc
${ROCKSDB_SOURCE_DIR}/util/crc32c.cc ${ROCKSDB_SOURCE_DIR}/util/crc32c.cc
${ROCKSDB_SOURCE_DIR}/util/data_structure.cc
${ROCKSDB_SOURCE_DIR}/util/dynamic_bloom.cc ${ROCKSDB_SOURCE_DIR}/util/dynamic_bloom.cc
${ROCKSDB_SOURCE_DIR}/util/hash.cc ${ROCKSDB_SOURCE_DIR}/util/hash.cc
${ROCKSDB_SOURCE_DIR}/util/murmurhash.cc ${ROCKSDB_SOURCE_DIR}/util/murmurhash.cc
@ -322,6 +322,8 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/util/string_util.cc ${ROCKSDB_SOURCE_DIR}/util/string_util.cc
${ROCKSDB_SOURCE_DIR}/util/thread_local.cc ${ROCKSDB_SOURCE_DIR}/util/thread_local.cc
${ROCKSDB_SOURCE_DIR}/util/threadpool_imp.cc ${ROCKSDB_SOURCE_DIR}/util/threadpool_imp.cc
${ROCKSDB_SOURCE_DIR}/util/udt_util.cc
${ROCKSDB_SOURCE_DIR}/util/write_batch_util.cc
${ROCKSDB_SOURCE_DIR}/util/xxhash.cc ${ROCKSDB_SOURCE_DIR}/util/xxhash.cc
${ROCKSDB_SOURCE_DIR}/utilities/agg_merge/agg_merge.cc ${ROCKSDB_SOURCE_DIR}/utilities/agg_merge/agg_merge.cc
${ROCKSDB_SOURCE_DIR}/utilities/backup/backup_engine.cc ${ROCKSDB_SOURCE_DIR}/utilities/backup/backup_engine.cc
@ -404,12 +406,6 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/util/memarena.cc ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/util/memarena.cc
build_version.cc) # generated by hand build_version.cc) # generated by hand
if(ENABLE_SSE42 AND ENABLE_PCLMULQDQ)
set_source_files_properties(
"${ROCKSDB_SOURCE_DIR}/util/crc32c.cc"
PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul")
endif()
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64")
list(APPEND SOURCES list(APPEND SOURCES
"${ROCKSDB_SOURCE_DIR}/util/crc32c_ppc.c" "${ROCKSDB_SOURCE_DIR}/util/crc32c_ppc.c"

View File

@ -41,7 +41,7 @@ export FASTTEST_WORKSPACE
export FASTTEST_SOURCE export FASTTEST_SOURCE
export FASTTEST_BUILD export FASTTEST_BUILD
export FASTTEST_DATA export FASTTEST_DATA
export FASTTEST_OUT export FASTTEST_OUTPUT
export PATH export PATH
function ccache_status function ccache_status

View File

@ -28,9 +28,9 @@
</table_function_remote_max_addresses> </table_function_remote_max_addresses>
<!-- Don't waste cycles testing the old interpreter. Spend time in the new analyzer instead --> <!-- Don't waste cycles testing the old interpreter. Spend time in the new analyzer instead -->
<allow_experimental_analyzer> <enable_analyzer>
<readonly/> <readonly/>
</allow_experimental_analyzer> </enable_analyzer>
<!-- This feature is broken, deprecated and will be removed. We don't want more reports about it --> <!-- This feature is broken, deprecated and will be removed. We don't want more reports about it -->
<allow_experimental_object_type> <allow_experimental_object_type>

View File

@ -11,6 +11,7 @@ function attach_gdb_to_clickhouse()
# explicitly ignore non-fatal signals that are used by server. # explicitly ignore non-fatal signals that are used by server.
# Number of SIGRTMIN can be determined only in runtime. # Number of SIGRTMIN can be determined only in runtime.
RTMIN=$(kill -l SIGRTMIN) RTMIN=$(kill -l SIGRTMIN)
# shellcheck disable=SC2016
echo " echo "
set follow-fork-mode parent set follow-fork-mode parent
handle SIGHUP nostop noprint pass handle SIGHUP nostop noprint pass
@ -24,8 +25,11 @@ handle SIG$RTMIN nostop noprint pass
info signals info signals
continue continue
backtrace full backtrace full
thread apply all backtrace full
info registers info registers
p "top 1 KiB of the stack:"
p/x *(uint64_t[128]*)"'$sp'"
maintenance info sections
thread apply all backtrace full
disassemble /s disassemble /s
up up
disassemble /s disassemble /s

View File

@ -139,9 +139,9 @@ EOL
</table_function_remote_max_addresses> </table_function_remote_max_addresses>
<!-- Don't waste cycles testing the old interpreter. Spend time in the new analyzer instead --> <!-- Don't waste cycles testing the old interpreter. Spend time in the new analyzer instead -->
<allow_experimental_analyzer> <enable_analyzer>
<readonly/> <readonly/>
</allow_experimental_analyzer> </enable_analyzer>
<!-- This feature is broken, deprecated and will be removed. We don't want more reports about it --> <!-- This feature is broken, deprecated and will be removed. We don't want more reports about it -->
<allow_experimental_object_type> <allow_experimental_object_type>

View File

@ -0,0 +1,55 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.5.5.78-stable (0138248cb62) FIXME as compared to v24.5.4.49-stable (63b760955a0)
#### Improvement
* Backported in [#66768](https://github.com/ClickHouse/ClickHouse/issues/66768): Make allow_experimental_analyzer be controlled by the initiator for distributed queries. This ensures compatibility and correctness during operations in mixed version clusters. [#65777](https://github.com/ClickHouse/ClickHouse/pull/65777) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)
* Backported in [#66884](https://github.com/ClickHouse/ClickHouse/issues/66884): Fix unexpeced size of low cardinality column in function calls. [#65298](https://github.com/ClickHouse/ClickHouse/pull/65298) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#66691](https://github.com/ClickHouse/ClickHouse/issues/66691): Fix the VALID UNTIL clause in the user definition resetting after a restart. Closes [#66405](https://github.com/ClickHouse/ClickHouse/issues/66405). [#66409](https://github.com/ClickHouse/ClickHouse/pull/66409) ([Nikolay Degterinsky](https://github.com/evillique)).
* Backported in [#67814](https://github.com/ClickHouse/ClickHouse/issues/67814): Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#67501](https://github.com/ClickHouse/ClickHouse/issues/67501): Fix crash in DistributedAsyncInsert when connection is empty. [#67219](https://github.com/ClickHouse/ClickHouse/pull/67219) ([Pablo Marcos](https://github.com/pamarcos)).
* Backported in [#67850](https://github.com/ClickHouse/ClickHouse/issues/67850): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Backported in [#65350](https://github.com/ClickHouse/ClickHouse/issues/65350): Fix possible abort on uncaught exception in ~WriteBufferFromFileDescriptor in StatusFile. [#64206](https://github.com/ClickHouse/ClickHouse/pull/64206) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#65621](https://github.com/ClickHouse/ClickHouse/issues/65621): Fix `Cannot find column` in distributed query with `ARRAY JOIN` by `Nested` column. Fixes [#64755](https://github.com/ClickHouse/ClickHouse/issues/64755). [#64801](https://github.com/ClickHouse/ClickHouse/pull/64801) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#65933](https://github.com/ClickHouse/ClickHouse/issues/65933): For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)).
* Backported in [#66301](https://github.com/ClickHouse/ClickHouse/issues/66301): Better handling of join conditions involving `IS NULL` checks (for example `ON (a = b AND (a IS NOT NULL) AND (b IS NOT NULL) ) OR ( (a IS NULL) AND (b IS NULL) )` is rewritten to `ON a <=> b`), fix incorrect optimization when condition other then `IS NULL` are present. [#65835](https://github.com/ClickHouse/ClickHouse/pull/65835) ([vdimir](https://github.com/vdimir)).
* Backported in [#66328](https://github.com/ClickHouse/ClickHouse/issues/66328): Add missing settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines/input_format_csv_try_infer_numbers_from_strings/input_format_csv_try_infer_strings_from_quoted_tuples` in schema inference cache because they can change the resulting schema. It prevents from incorrect result of schema inference with these settings changed. [#65980](https://github.com/ClickHouse/ClickHouse/pull/65980) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#66155](https://github.com/ClickHouse/ClickHouse/issues/66155): Fixed buffer overflow bug in `unbin`/`unhex` implementation. [#66106](https://github.com/ClickHouse/ClickHouse/pull/66106) ([Nikita Taranov](https://github.com/nickitat)).
* Backported in [#66454](https://github.com/ClickHouse/ClickHouse/issues/66454): Fixed a bug in ZooKeeper client: a session could get stuck in unusable state after receiving a hardware error from ZooKeeper. For example, this might happen due to "soft memory limit" in ClickHouse Keeper. [#66140](https://github.com/ClickHouse/ClickHouse/pull/66140) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#66226](https://github.com/ClickHouse/ClickHouse/issues/66226): Fix issue in SumIfToCountIfVisitor and signed integers. [#66146](https://github.com/ClickHouse/ClickHouse/pull/66146) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#66680](https://github.com/ClickHouse/ClickHouse/issues/66680): Fix handling limit for `system.numbers_mt` when no index can be used. [#66231](https://github.com/ClickHouse/ClickHouse/pull/66231) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Backported in [#66604](https://github.com/ClickHouse/ClickHouse/issues/66604): Fixed how the ClickHouse server detects the maximum number of usable CPU cores as specified by cgroups v2 if the server runs in a container such as Docker. In more detail, containers often run their process in the root cgroup which has an empty name. In that case, ClickHouse ignored the CPU limits set by cgroups v2. [#66237](https://github.com/ClickHouse/ClickHouse/pull/66237) ([filimonov](https://github.com/filimonov)).
* Backported in [#66360](https://github.com/ClickHouse/ClickHouse/issues/66360): Fix the `Not-ready set` error when a subquery with `IN` is used in the constraint. [#66261](https://github.com/ClickHouse/ClickHouse/pull/66261) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66972](https://github.com/ClickHouse/ClickHouse/issues/66972): Fix `Column identifier is already registered` error with `group_by_use_nulls=true` and new analyzer. [#66400](https://github.com/ClickHouse/ClickHouse/pull/66400) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66969](https://github.com/ClickHouse/ClickHouse/issues/66969): Fix `Cannot find column` error for queries with constant expression in `GROUP BY` key and new analyzer enabled. [#66433](https://github.com/ClickHouse/ClickHouse/pull/66433) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66720](https://github.com/ClickHouse/ClickHouse/issues/66720): Correctly track memory for `Allocator::realloc`. [#66548](https://github.com/ClickHouse/ClickHouse/pull/66548) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#66951](https://github.com/ClickHouse/ClickHouse/issues/66951): Fix an invalid result for queries with `WINDOW`. This could happen when `PARTITION` columns have sparse serialization and window functions are executed in parallel. [#66579](https://github.com/ClickHouse/ClickHouse/pull/66579) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66757](https://github.com/ClickHouse/ClickHouse/issues/66757): Fix `Unknown identifier` and `Column is not under aggregate function` errors for queries with the expression `(column IS NULL).` The bug was triggered by [#65088](https://github.com/ClickHouse/ClickHouse/issues/65088), with the disabled analyzer only. [#66654](https://github.com/ClickHouse/ClickHouse/pull/66654) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66948](https://github.com/ClickHouse/ClickHouse/issues/66948): Fix `Method getResultType is not supported for QUERY query node` error when scalar subquery was used as the first argument of IN (with new analyzer). [#66655](https://github.com/ClickHouse/ClickHouse/pull/66655) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#67633](https://github.com/ClickHouse/ClickHouse/issues/67633): Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)).
* Backported in [#67481](https://github.com/ClickHouse/ClickHouse/issues/67481): In rare cases ClickHouse could consider parts as broken because of some unexpected projections on disk. Now it's fixed. [#66898](https://github.com/ClickHouse/ClickHouse/pull/66898) ([alesapin](https://github.com/alesapin)).
* Backported in [#67197](https://github.com/ClickHouse/ClickHouse/issues/67197): TRUNCATE DATABASE used to stop replication as if it was a DROP DATABASE query, it's fixed. [#67129](https://github.com/ClickHouse/ClickHouse/pull/67129) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#67379](https://github.com/ClickHouse/ClickHouse/issues/67379): Fix error `Cannot convert column because it is non constant in source stream but must be constant in result.` for a query that reads from the `Merge` table over the `Distriburted` table with one shard. [#67146](https://github.com/ClickHouse/ClickHouse/pull/67146) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#67576](https://github.com/ClickHouse/ClickHouse/issues/67576): Fix execution of nested short-circuit functions. [#67520](https://github.com/ClickHouse/ClickHouse/pull/67520) ([Kruglov Pavel](https://github.com/Avogar)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Backported in [#66387](https://github.com/ClickHouse/ClickHouse/issues/66387): Disable broken cases from 02911_join_on_nullsafe_optimization. [#66310](https://github.com/ClickHouse/ClickHouse/pull/66310) ([vdimir](https://github.com/vdimir)).
* Backported in [#66426](https://github.com/ClickHouse/ClickHouse/issues/66426): Ignore subquery for IN in DDLLoadingDependencyVisitor. [#66395](https://github.com/ClickHouse/ClickHouse/pull/66395) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66544](https://github.com/ClickHouse/ClickHouse/issues/66544): Add additional log masking in CI. [#66523](https://github.com/ClickHouse/ClickHouse/pull/66523) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#66859](https://github.com/ClickHouse/ClickHouse/issues/66859): Fix data race in S3::ClientCache. [#66644](https://github.com/ClickHouse/ClickHouse/pull/66644) ([Konstantin Morozov](https://github.com/k-morozov)).
* Backported in [#66875](https://github.com/ClickHouse/ClickHouse/issues/66875): Support one more case in JOIN ON ... IS NULL. [#66725](https://github.com/ClickHouse/ClickHouse/pull/66725) ([vdimir](https://github.com/vdimir)).
* Backported in [#67059](https://github.com/ClickHouse/ClickHouse/issues/67059): Increase asio pool size in case the server is tiny. [#66761](https://github.com/ClickHouse/ClickHouse/pull/66761) ([alesapin](https://github.com/alesapin)).
* Backported in [#66945](https://github.com/ClickHouse/ClickHouse/issues/66945): Small fix in realloc memory tracking. [#66820](https://github.com/ClickHouse/ClickHouse/pull/66820) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#67252](https://github.com/ClickHouse/ClickHouse/issues/67252): Followup [#66725](https://github.com/ClickHouse/ClickHouse/issues/66725). [#66869](https://github.com/ClickHouse/ClickHouse/pull/66869) ([vdimir](https://github.com/vdimir)).
* Backported in [#67412](https://github.com/ClickHouse/ClickHouse/issues/67412): CI: Fix build results for release branches. [#67402](https://github.com/ClickHouse/ClickHouse/pull/67402) ([Max K.](https://github.com/maxknv)).

View File

@ -118,7 +118,7 @@ And the result of interpreting the `INSERT SELECT` query is a "completed" `Query
`InterpreterSelectQuery` uses `ExpressionAnalyzer` and `ExpressionActions` machinery for query analysis and transformations. This is where most rule-based query optimizations are performed. `ExpressionAnalyzer` is quite messy and should be rewritten: various query transformations and optimizations should be extracted into separate classes to allow for modular transformations of the query. `InterpreterSelectQuery` uses `ExpressionAnalyzer` and `ExpressionActions` machinery for query analysis and transformations. This is where most rule-based query optimizations are performed. `ExpressionAnalyzer` is quite messy and should be rewritten: various query transformations and optimizations should be extracted into separate classes to allow for modular transformations of the query.
To address current problems that exist in interpreters, a new `InterpreterSelectQueryAnalyzer` is being developed. It is a new version of `InterpreterSelectQuery` that does not use `ExpressionAnalyzer` and introduces an additional abstraction level between `AST` and `QueryPipeline` called `QueryTree`. It is not production-ready yet, but it can be tested with the `allow_experimental_analyzer` flag. To address problems that exist in interpreters, a new `InterpreterSelectQueryAnalyzer` has been developed. This is a new version of the `InterpreterSelectQuery`, which does not use the `ExpressionAnalyzer` and introduces an additional layer of abstraction between `AST` and `QueryPipeline`, called `QueryTree'. It is fully ready for use in production, but just in case it can be turned off by setting the value of the `enable_analyzer` setting to `false`.
## Functions {#functions} ## Functions {#functions}

View File

@ -144,7 +144,7 @@ During projection names computation, aliases are not substituted.
SELECT SELECT
1 + 1 AS x, 1 + 1 AS x,
x + 1 x + 1
SETTINGS allow_experimental_analyzer = 0 SETTINGS enable_analyzer = 0
FORMAT PrettyCompact FORMAT PrettyCompact
┌─x─┬─plus(plus(1, 1), 1)─┐ ┌─x─┬─plus(plus(1, 1), 1)─┐
@ -154,7 +154,7 @@ FORMAT PrettyCompact
SELECT SELECT
1 + 1 AS x, 1 + 1 AS x,
x + 1 x + 1
SETTINGS allow_experimental_analyzer = 1 SETTINGS enable_analyzer = 1
FORMAT PrettyCompact FORMAT PrettyCompact
┌─x─┬─plus(x, 1)─┐ ┌─x─┬─plus(x, 1)─┐
@ -177,7 +177,7 @@ SELECT toTypeName(if(0, [2, 3, 4], 'String'))
### Heterogeneous clusters ### Heterogeneous clusters
The new analyzer significantly changed the communication protocol between servers in the cluster. Thus, it's impossible to run distributed queries on servers with different `allow_experimental_analyzer` setting values. The new analyzer significantly changed the communication protocol between servers in the cluster. Thus, it's impossible to run distributed queries on servers with different `enable_analyzer` setting values.
### Mutations are interpreted by previous analyzer ### Mutations are interpreted by previous analyzer

View File

@ -4051,7 +4051,7 @@ Rewrite aggregate functions with if expression as argument when logically equiva
For example, `avg(if(cond, col, null))` can be rewritten to `avgOrNullIf(cond, col)`. It may improve performance. For example, `avg(if(cond, col, null))` can be rewritten to `avgOrNullIf(cond, col)`. It may improve performance.
:::note :::note
Supported only with experimental analyzer (`allow_experimental_analyzer = 1`). Supported only with experimental analyzer (`enable_analyzer = 1`).
::: :::
## database_replicated_initial_query_timeout_sec {#database_replicated_initial_query_timeout_sec} ## database_replicated_initial_query_timeout_sec {#database_replicated_initial_query_timeout_sec}

View File

@ -115,7 +115,7 @@ ClickHouse — полноценная столбцовая СУБД. Данны
`InterpreterSelectQuery` использует `ExpressionAnalyzer` и `ExpressionActions` механизмы для анализа запросов и преобразований. Именно здесь выполняется большинство оптимизаций запросов на основе правил. `ExpressionAnalyzer` написан довольно грязно и должен быть переписан: различные преобразования запросов и оптимизации должны быть извлечены в отдельные классы, чтобы позволить модульные преобразования или запросы. `InterpreterSelectQuery` использует `ExpressionAnalyzer` и `ExpressionActions` механизмы для анализа запросов и преобразований. Именно здесь выполняется большинство оптимизаций запросов на основе правил. `ExpressionAnalyzer` написан довольно грязно и должен быть переписан: различные преобразования запросов и оптимизации должны быть извлечены в отдельные классы, чтобы позволить модульные преобразования или запросы.
Для решения текущих проблем, существующих в интерпретаторах, разрабатывается новый `InterpreterSelectQueryAnalyzer`. Это новая версия `InterpreterSelectQuery`, которая не использует `ExpressionAnalyzer` и вводит дополнительный уровень абстракции между `AST` и `QueryPipeline`, называемый `QueryTree`. Он еще не готов к использованию в продакшене, но его можно протестировать с помощью флага `allow_experimental_analyzer`. Для решения проблем, существующих в интерпретаторах, был разработан новый `InterpreterSelectQueryAnalyzer`. Это новая версия `InterpreterSelectQuery`, которая не использует `ExpressionAnalyzer` и вводит дополнительный уровень абстракции между `AST` и `QueryPipeline`, называемый `QueryTree`. Он полностью готов к использованию в продакшене, но на всякий случай его можно выключить, установив значение настройки `enable_analyzer` в `false`.
## Функции {#functions} ## Функции {#functions}

View File

@ -522,6 +522,9 @@
const current_url = new URL(window.location); const current_url = new URL(window.location);
const opened_locally = location.protocol == 'file:'; const opened_locally = location.protocol == 'file:';
/// Run query instantly after page is loaded if the run parameter is present.
const run_immediately = current_url.searchParams.has("run");
const server_address = current_url.searchParams.get('url'); const server_address = current_url.searchParams.get('url');
if (server_address) { if (server_address) {
document.getElementById('url').value = server_address; document.getElementById('url').value = server_address;
@ -599,6 +602,9 @@
const title = "ClickHouse Query: " + query; const title = "ClickHouse Query: " + query;
let history_url = window.location.pathname + '?user=' + encodeURIComponent(user); let history_url = window.location.pathname + '?user=' + encodeURIComponent(user);
if (run_immediately) {
history_url += "&run=1";
}
if (server_address != location.origin) { if (server_address != location.origin) {
/// Save server's address in URL if it's not identical to the address of the play UI. /// Save server's address in URL if it's not identical to the address of the play UI.
history_url += '&url=' + encodeURIComponent(server_address); history_url += '&url=' + encodeURIComponent(server_address);
@ -1160,6 +1166,10 @@
}); });
} }
if (run_immediately) {
post();
}
document.getElementById('toggle-light').onclick = function() { document.getElementById('toggle-light').onclick = function() {
setColorTheme('light', true); setColorTheme('light', true);
} }

View File

@ -1029,7 +1029,8 @@ bool Dwarf::findLocation(
const LocationInfoMode mode, const LocationInfoMode mode,
CompilationUnit & cu, CompilationUnit & cu,
LocationInfo & info, LocationInfo & info,
std::vector<SymbolizedFrame> & inline_frames) const std::vector<SymbolizedFrame> & inline_frames,
bool assume_in_cu_range) const
{ {
Die die = getDieAtOffset(cu, cu.first_die); Die die = getDieAtOffset(cu, cu.first_die);
// Partial compilation unit (DW_TAG_partial_unit) is not supported. // Partial compilation unit (DW_TAG_partial_unit) is not supported.
@ -1041,6 +1042,11 @@ bool Dwarf::findLocation(
std::optional<std::string_view> main_file_name; std::optional<std::string_view> main_file_name;
std::optional<uint64_t> base_addr_cu; std::optional<uint64_t> base_addr_cu;
std::optional<uint64_t> low_pc;
std::optional<uint64_t> high_pc;
std::optional<bool> is_high_pc_addr;
std::optional<uint64_t> range_offset;
forEachAttribute(cu, die, [&](const Attribute & attr) forEachAttribute(cu, die, [&](const Attribute & attr)
{ {
switch (attr.spec.name) // NOLINT(bugprone-switch-missing-default-case) switch (attr.spec.name) // NOLINT(bugprone-switch-missing-default-case)
@ -1058,18 +1064,47 @@ bool Dwarf::findLocation(
// File name of main file being compiled // File name of main file being compiled
main_file_name = std::get<std::string_view>(attr.attr_value); main_file_name = std::get<std::string_view>(attr.attr_value);
break; break;
case DW_AT_low_pc:
case DW_AT_entry_pc: case DW_AT_entry_pc:
// 2.17.1: historically DW_AT_low_pc was used. DW_AT_entry_pc was // 2.17.1: historically DW_AT_low_pc was used. DW_AT_entry_pc was
// introduced in DWARF3. Support either to determine the base address of // introduced in DWARF3. Support either to determine the base address of
// the CU. // the CU.
base_addr_cu = std::get<uint64_t>(attr.attr_value); base_addr_cu = std::get<uint64_t>(attr.attr_value);
break; break;
case DW_AT_ranges:
range_offset = std::get<uint64_t>(attr.attr_value);
break;
case DW_AT_low_pc:
low_pc = std::get<uint64_t>(attr.attr_value);
base_addr_cu = std::get<uint64_t>(attr.attr_value);
break;
case DW_AT_high_pc:
// The value of the DW_AT_high_pc attribute can be
// an address (DW_FORM_addr*) or an offset (DW_FORM_data*).
is_high_pc_addr = attr.spec.form == DW_FORM_addr || //
attr.spec.form == DW_FORM_addrx || //
attr.spec.form == DW_FORM_addrx1 || //
attr.spec.form == DW_FORM_addrx2 || //
attr.spec.form == DW_FORM_addrx3 || //
attr.spec.form == DW_FORM_addrx4;
high_pc = std::get<uint64_t>(attr.attr_value);
break;
} }
// Iterate through all attributes until find all above. // Iterate through all attributes until find all above.
return true; return true;
}); });
/// Check if the address falls inside this unit's address ranges.
if (!assume_in_cu_range && ((low_pc && high_pc) || range_offset))
{
bool pc_match = low_pc && high_pc && is_high_pc_addr && address >= *low_pc
&& (address < (*is_high_pc_addr ? *high_pc : *low_pc + *high_pc));
bool range_match = range_offset && isAddrInRangeList(cu, address, base_addr_cu, range_offset.value(), cu.addr_size);
if (!pc_match && !range_match)
{
return false;
}
}
if (main_file_name) if (main_file_name)
{ {
info.has_main_file = true; info.has_main_file = true;
@ -1442,7 +1477,7 @@ bool Dwarf::findAddress(
{ {
return false; return false;
} }
findLocation(address, mode, unit, locationInfo, inline_frames); findLocation(address, mode, unit, locationInfo, inline_frames, /*assume_in_cu_range*/ true);
return locationInfo.has_file_and_line; return locationInfo.has_file_and_line;
} }
else if (mode == LocationInfoMode::FAST) else if (mode == LocationInfoMode::FAST)
@ -1471,7 +1506,7 @@ bool Dwarf::findAddress(
{ {
continue; continue;
} }
findLocation(address, mode, unit, locationInfo, inline_frames); findLocation(address, mode, unit, locationInfo, inline_frames, /*assume_in_cu_range*/ false);
} }
return locationInfo.has_file_and_line; return locationInfo.has_file_and_line;

View File

@ -283,7 +283,8 @@ private:
LocationInfoMode mode, LocationInfoMode mode,
CompilationUnit & cu, CompilationUnit & cu,
LocationInfo & info, LocationInfo & info,
std::vector<SymbolizedFrame> & inline_frames) const; std::vector<SymbolizedFrame> & inline_frames,
bool assume_in_cu_range) const;
/** /**
* Finds a subprogram debugging info entry that contains a given address among * Finds a subprogram debugging info entry that contains a given address among

View File

@ -193,8 +193,10 @@
M(ReplicaPartialShutdown, "How many times Replicated table has to deinitialize its state due to session expiration in ZooKeeper. The state is reinitialized every time when ZooKeeper is available again.") \ M(ReplicaPartialShutdown, "How many times Replicated table has to deinitialize its state due to session expiration in ZooKeeper. The state is reinitialized every time when ZooKeeper is available again.") \
\ \
M(SelectedParts, "Number of data parts selected to read from a MergeTree table.") \ M(SelectedParts, "Number of data parts selected to read from a MergeTree table.") \
M(SelectedPartsTotal, "Number of total data parts before selecting which ones to read from a MergeTree table.") \
M(SelectedRanges, "Number of (non-adjacent) ranges in all data parts selected to read from a MergeTree table.") \ M(SelectedRanges, "Number of (non-adjacent) ranges in all data parts selected to read from a MergeTree table.") \
M(SelectedMarks, "Number of marks (index granules) selected to read from a MergeTree table.") \ M(SelectedMarks, "Number of marks (index granules) selected to read from a MergeTree table.") \
M(SelectedMarksTotal, "Number of total marks (index granules) before selecting which ones to read from a MergeTree table.") \
M(SelectedRows, "Number of rows SELECTed from all tables.") \ M(SelectedRows, "Number of rows SELECTed from all tables.") \
M(SelectedBytes, "Number of bytes (uncompressed; for columns as they stored in memory) SELECTed from all tables.") \ M(SelectedBytes, "Number of bytes (uncompressed; for columns as they stored in memory) SELECTed from all tables.") \
M(RowsReadByMainReader, "Number of rows read from MergeTree tables by the main reader (after PREWHERE step).") \ M(RowsReadByMainReader, "Number of rows read from MergeTree tables by the main reader (after PREWHERE step).") \

View File

@ -23,6 +23,7 @@
#if USE_ROCKSDB #if USE_ROCKSDB
#include <rocksdb/table.h> #include <rocksdb/table.h>
#include <rocksdb/convenience.h> #include <rocksdb/convenience.h>
#include <rocksdb/statistics.h>
#include <rocksdb/utilities/db_ttl.h> #include <rocksdb/utilities/db_ttl.h>
#endif #endif
@ -88,7 +89,7 @@ static rocksdb::Options getRocksDBOptionsFromConfig(const Poco::Util::AbstractCo
if (config.has("keeper_server.rocksdb.options")) if (config.has("keeper_server.rocksdb.options"))
{ {
auto config_options = getOptionsFromConfig(config, "keeper_server.rocksdb.options"); auto config_options = getOptionsFromConfig(config, "keeper_server.rocksdb.options");
status = rocksdb::GetDBOptionsFromMap(merged, config_options, &merged); status = rocksdb::GetDBOptionsFromMap({}, merged, config_options, &merged);
if (!status.ok()) if (!status.ok())
{ {
throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from 'rocksdb.options' : {}", throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from 'rocksdb.options' : {}",
@ -98,7 +99,7 @@ static rocksdb::Options getRocksDBOptionsFromConfig(const Poco::Util::AbstractCo
if (config.has("rocksdb.column_family_options")) if (config.has("rocksdb.column_family_options"))
{ {
auto column_family_options = getOptionsFromConfig(config, "rocksdb.column_family_options"); auto column_family_options = getOptionsFromConfig(config, "rocksdb.column_family_options");
status = rocksdb::GetColumnFamilyOptionsFromMap(merged, column_family_options, &merged); status = rocksdb::GetColumnFamilyOptionsFromMap({}, merged, column_family_options, &merged);
if (!status.ok()) if (!status.ok())
{ {
throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from 'rocksdb.column_family_options' at: {}", status.ToString()); throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from 'rocksdb.column_family_options' at: {}", status.ToString());
@ -107,7 +108,7 @@ static rocksdb::Options getRocksDBOptionsFromConfig(const Poco::Util::AbstractCo
if (config.has("rocksdb.block_based_table_options")) if (config.has("rocksdb.block_based_table_options"))
{ {
auto block_based_table_options = getOptionsFromConfig(config, "rocksdb.block_based_table_options"); auto block_based_table_options = getOptionsFromConfig(config, "rocksdb.block_based_table_options");
status = rocksdb::GetBlockBasedTableOptionsFromMap(table_options, block_based_table_options, &table_options); status = rocksdb::GetBlockBasedTableOptionsFromMap({}, table_options, block_based_table_options, &table_options);
if (!status.ok()) if (!status.ok())
{ {
throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from 'rocksdb.block_based_table_options' at: {}", status.ToString()); throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from 'rocksdb.block_based_table_options' at: {}", status.ToString());

View File

@ -111,10 +111,13 @@ public:
} }
}; };
using Implementation = testing::Types<TestParam<DB::KeeperMemoryStorage, true>, using Implementation = testing::Types<TestParam<DB::KeeperMemoryStorage, true>
TestParam<DB::KeeperMemoryStorage, false>, ,TestParam<DB::KeeperMemoryStorage, false>
TestParam<DB::KeeperRocksStorage, true>, #if USE_ROCKSDB
TestParam<DB::KeeperRocksStorage, false>>; ,TestParam<DB::KeeperRocksStorage, true>
,TestParam<DB::KeeperRocksStorage, false>
#endif
>;
TYPED_TEST_SUITE(CoordinationTest, Implementation); TYPED_TEST_SUITE(CoordinationTest, Implementation);
TYPED_TEST(CoordinationTest, RaftServerConfigParse) TYPED_TEST(CoordinationTest, RaftServerConfigParse)

View File

@ -944,8 +944,7 @@ class IColumn;
\ \
M(Bool, allow_experimental_join_condition, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y.", 0) \ M(Bool, allow_experimental_join_condition, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y.", 0) \
\ \
/* Analyzer: It's not experimental anymore (WIP) */ \ M(Bool, allow_experimental_analyzer, true, "Allow new query analyzer.", IMPORTANT) ALIAS(enable_analyzer) \
M(Bool, allow_experimental_analyzer, true, "Allow new query analyzer.", IMPORTANT) \
M(Bool, analyzer_compatibility_join_using_top_level_identifier, false, "Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`).", 0) \ M(Bool, analyzer_compatibility_join_using_top_level_identifier, false, "Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`).", 0) \
\ \
M(Bool, allow_experimental_live_view, false, "Enable LIVE VIEW. Not mature enough.", 0) \ M(Bool, allow_experimental_live_view, false, "Enable LIVE VIEW. Not mature enough.", 0) \
@ -1127,7 +1126,6 @@ class IColumn;
M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \ M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \
M(Bool, input_format_json_throw_on_bad_escape_sequence, true, "Throw an exception if JSON string contains bad escape sequence in JSON input formats. If disabled, bad escape sequences will remain as is in the data", 0) \ M(Bool, input_format_json_throw_on_bad_escape_sequence, true, "Throw an exception if JSON string contains bad escape sequence in JSON input formats. If disabled, bad escape sequences will remain as is in the data", 0) \
M(Bool, input_format_json_ignore_unnecessary_fields, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields", 0) \ M(Bool, input_format_json_ignore_unnecessary_fields, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields", 0) \
M(Bool, input_format_json_case_insensitive_column_matching, false, "Ignore case when matching JSON keys with CH columns", 0) \
M(UInt64, input_format_json_max_depth, 1000, "Maximum depth of a field in JSON. This is not a strict limit, it does not have to be applied precisely.", 0) \ M(UInt64, input_format_json_max_depth, 1000, "Maximum depth of a field in JSON. This is not a strict limit, it does not have to be applied precisely.", 0) \
M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \
M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \

View File

@ -338,6 +338,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"input_format_json_max_depth", 1000000, 1000, "It was unlimited in previous versions, but that was unsafe."}, {"input_format_json_max_depth", 1000000, 1000, "It was unlimited in previous versions, but that was unsafe."},
{"merge_tree_min_bytes_per_task_for_remote_reading", 4194304, 2097152, "Value is unified with `filesystem_prefetch_min_bytes_for_single_read_task`"}, {"merge_tree_min_bytes_per_task_for_remote_reading", 4194304, 2097152, "Value is unified with `filesystem_prefetch_min_bytes_for_single_read_task`"},
{"allow_archive_path_syntax", true, true, "Added new setting to allow disabling archive path syntax."}, {"allow_archive_path_syntax", true, true, "Added new setting to allow disabling archive path syntax."},
{"enable_analyzer", 1, 1, "Added an alias to a setting `allow_experimental_analyzer`."},
} }
}, },
{"24.7", {"24.7",
@ -349,7 +350,6 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"input_format_native_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in Native output format"}, {"input_format_native_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in Native output format"},
{"read_in_order_use_buffering", false, true, "Use buffering before merging while reading in order of primary key"}, {"read_in_order_use_buffering", false, true, "Use buffering before merging while reading in order of primary key"},
{"enable_named_columns_in_function_tuple", false, true, "Generate named tuples in function tuple() when all names are unique and can be treated as unquoted identifiers."}, {"enable_named_columns_in_function_tuple", false, true, "Generate named tuples in function tuple() when all names are unique and can be treated as unquoted identifiers."},
{"input_format_json_case_insensitive_column_matching", false, false, "Ignore case when matching JSON keys with CH columns."},
{"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."}, {"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."},
{"dictionary_validate_primary_key_type", false, false, "Validate primary key type for dictionaries. By default id type for simple layouts will be implicitly converted to UInt64."}, {"dictionary_validate_primary_key_type", false, false, "Validate primary key type for dictionaries. By default id type for simple layouts will be implicitly converted to UInt64."},
{"collect_hash_table_stats_during_joins", false, true, "New setting."}, {"collect_hash_table_stats_during_joins", false, true, "New setting."},

View File

@ -151,7 +151,6 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
format_settings.json.try_infer_objects_as_tuples = settings.input_format_json_try_infer_named_tuples_from_objects; format_settings.json.try_infer_objects_as_tuples = settings.input_format_json_try_infer_named_tuples_from_objects;
format_settings.json.throw_on_bad_escape_sequence = settings.input_format_json_throw_on_bad_escape_sequence; format_settings.json.throw_on_bad_escape_sequence = settings.input_format_json_throw_on_bad_escape_sequence;
format_settings.json.ignore_unnecessary_fields = settings.input_format_json_ignore_unnecessary_fields; format_settings.json.ignore_unnecessary_fields = settings.input_format_json_ignore_unnecessary_fields;
format_settings.json.case_insensitive_column_matching = settings.input_format_json_case_insensitive_column_matching;
format_settings.null_as_default = settings.input_format_null_as_default; format_settings.null_as_default = settings.input_format_null_as_default;
format_settings.force_null_for_omitted_fields = settings.input_format_force_null_for_omitted_fields; format_settings.force_null_for_omitted_fields = settings.input_format_force_null_for_omitted_fields;
format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros; format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros;

View File

@ -234,7 +234,6 @@ struct FormatSettings
bool infer_incomplete_types_as_strings = true; bool infer_incomplete_types_as_strings = true;
bool throw_on_bad_escape_sequence = true; bool throw_on_bad_escape_sequence = true;
bool ignore_unnecessary_fields = true; bool ignore_unnecessary_fields = true;
bool case_insensitive_column_matching = false;
} json{}; } json{};
struct struct

View File

@ -42,11 +42,11 @@ public:
{ {
FunctionArgumentDescriptors mandatory_args{ FunctionArgumentDescriptors mandatory_args{
{"s", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), nullptr, "String"}, {"s", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), nullptr, "String"},
{"offset", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeNumber), nullptr, "(U)Int8, (U)Int16, (U)Int32, (U)Int64 or Float"}, {"offset", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeNumber), nullptr, "(U)Int8/16/32/64 or Float"},
}; };
FunctionArgumentDescriptors optional_args{ FunctionArgumentDescriptors optional_args{
{"length", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeNumber), nullptr, "(U)Int8, (U)Int16, (U)Int32, (U)Int64 or Float"}, {"length", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeNumber), nullptr, "(U)Int8/16/32/64 or Float"},
}; };
validateFunctionArguments(*this, arguments, mandatory_args, optional_args); validateFunctionArguments(*this, arguments, mandatory_args, optional_args);

View File

@ -1233,6 +1233,12 @@ public:
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. " throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
"Must be ColumnUInt8 or ColumnConstUInt8.", arg_cond.column->getName(), getName()); "Must be ColumnUInt8 or ColumnConstUInt8.", arg_cond.column->getName(), getName());
/// If result is Variant, always use generic implementation.
/// Using typed implementations may lead to incorrect result column type when
/// resulting Variant is created by use_variant_when_no_common_type.
if (isVariant(result_type))
return executeGeneric(cond_col, arguments, input_rows_count, use_variant_when_no_common_type);
auto call = [&](const auto & types) -> bool auto call = [&](const auto & types) -> bool
{ {
using Types = std::decay_t<decltype(types)>; using Types = std::decay_t<decltype(types)>;

View File

@ -100,6 +100,13 @@ bool isConstantFromScalarSubquery(const ActionsDAG::Node * node)
} }
bool ActionsDAG::Node::isDeterministic() const
{
bool deterministic_if_func = type != ActionType::FUNCTION || function_base->isDeterministic();
bool deterministic_if_const = type != ActionType::COLUMN || is_deterministic_constant;
return deterministic_if_func && deterministic_if_const;
}
void ActionsDAG::Node::toTree(JSONBuilder::JSONMap & map) const void ActionsDAG::Node::toTree(JSONBuilder::JSONMap & map) const
{ {
map.add("Node Type", magic_enum::enum_name(type)); map.add("Node Type", magic_enum::enum_name(type));
@ -318,7 +325,6 @@ const ActionsDAG::Node & ActionsDAG::addFunctionImpl(
node.function_base = function_base; node.function_base = function_base;
node.result_type = result_type; node.result_type = result_type;
node.function = node.function_base->prepare(arguments); node.function = node.function_base->prepare(arguments);
node.is_deterministic = node.function_base->isDeterministic();
/// If all arguments are constants, and function is suitable to be executed in 'prepare' stage - execute function. /// If all arguments are constants, and function is suitable to be executed in 'prepare' stage - execute function.
if (node.function_base->isSuitableForConstantFolding()) if (node.function_base->isSuitableForConstantFolding())
@ -536,64 +542,99 @@ void ActionsDAG::removeUnusedActions(bool allow_remove_inputs, bool allow_consta
void ActionsDAG::removeUnusedActions(const std::unordered_set<const Node *> & used_inputs, bool allow_constant_folding) void ActionsDAG::removeUnusedActions(const std::unordered_set<const Node *> & used_inputs, bool allow_constant_folding)
{ {
std::unordered_set<const Node *> visited_nodes; NodeRawConstPtrs roots;
std::stack<Node *> stack; roots.reserve(outputs.size() + used_inputs.size());
roots = outputs;
for (const auto * node : outputs)
{
visited_nodes.insert(node);
stack.push(const_cast<Node *>(node));
}
for (auto & node : nodes) for (auto & node : nodes)
{ {
/// We cannot remove arrayJoin because it changes the number of rows. /// We cannot remove arrayJoin because it changes the number of rows.
bool is_array_join = node.type == ActionType::ARRAY_JOIN; if (node.type == ActionType::ARRAY_JOIN)
roots.push_back(&node);
if (is_array_join && !visited_nodes.contains(&node))
{
visited_nodes.insert(&node);
stack.push(&node);
}
if (node.type == ActionType::INPUT && used_inputs.contains(&node)) if (node.type == ActionType::INPUT && used_inputs.contains(&node))
visited_nodes.insert(&node); roots.push_back(&node);
}
std::unordered_set<const Node *> required_nodes;
std::unordered_set<const Node *> non_deterministic_nodes;
struct Frame
{
const ActionsDAG::Node * node;
size_t next_child_to_visit = 0;
};
std::stack<Frame> stack;
enum class VisitStage { NonDeterministic, Required };
for (auto stage : {VisitStage::NonDeterministic, VisitStage::Required})
{
required_nodes.clear();
for (const auto * root : roots)
{
if (!required_nodes.contains(root))
{
required_nodes.insert(root);
stack.push({.node = root});
} }
while (!stack.empty()) while (!stack.empty())
{ {
auto * node = stack.top(); auto & frame = stack.top();
stack.pop(); auto * node = const_cast<Node *>(frame.node);
/// Constant folding. while (frame.next_child_to_visit < node->children.size())
if (allow_constant_folding && !node->children.empty() && node->column && isColumnConst(*node->column))
{ {
node->type = ActionsDAG::ActionType::COLUMN; const auto * child = node->children[frame.next_child_to_visit];
++frame.next_child_to_visit;
for (const auto & child : node->children) if (!required_nodes.contains(child))
{ {
if (!child->is_deterministic) required_nodes.insert(child);
{ stack.push({.node = child});
node->is_deterministic = false;
break; break;
} }
} }
node->children.clear(); if (stack.top().node != node)
} continue;
stack.pop();
if (stage == VisitStage::Required)
continue;
if (!node->isDeterministic())
non_deterministic_nodes.insert(node);
else
{
for (const auto * child : node->children) for (const auto * child : node->children)
{ {
if (!visited_nodes.contains(child)) if (non_deterministic_nodes.contains(child))
{ {
stack.push(const_cast<Node *>(child)); non_deterministic_nodes.insert(node);
visited_nodes.insert(child); break;
} }
} }
} }
std::erase_if(nodes, [&](const Node & node) { return !visited_nodes.contains(&node); }); /// Constant folding.
std::erase_if(inputs, [&](const Node * node) { return !visited_nodes.contains(node); }); if (allow_constant_folding && !node->children.empty()
&& node->column && isColumnConst(*node->column))
{
node->type = ActionsDAG::ActionType::COLUMN;
node->children.clear();
node->is_deterministic_constant = !non_deterministic_nodes.contains(node);
}
}
}
}
std::erase_if(nodes, [&](const Node & node) { return !required_nodes.contains(&node); });
std::erase_if(inputs, [&](const Node * node) { return !required_nodes.contains(node); });
} }
@ -1379,7 +1420,7 @@ bool ActionsDAG::trivial() const
void ActionsDAG::assertDeterministic() const void ActionsDAG::assertDeterministic() const
{ {
for (const auto & node : nodes) for (const auto & node : nodes)
if (!node.is_deterministic) if (!node.isDeterministic())
throw Exception(ErrorCodes::BAD_ARGUMENTS, throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Expression must be deterministic but it contains non-deterministic part `{}`", node.result_name); "Expression must be deterministic but it contains non-deterministic part `{}`", node.result_name);
} }
@ -1387,7 +1428,7 @@ void ActionsDAG::assertDeterministic() const
bool ActionsDAG::hasNonDeterministic() const bool ActionsDAG::hasNonDeterministic() const
{ {
for (const auto & node : nodes) for (const auto & node : nodes)
if (!node.is_deterministic) if (!node.isDeterministic())
return true; return true;
return false; return false;
} }

View File

@ -80,13 +80,15 @@ public:
ExecutableFunctionPtr function; ExecutableFunctionPtr function;
/// If function is a compiled statement. /// If function is a compiled statement.
bool is_function_compiled = false; bool is_function_compiled = false;
/// It is deterministic (See IFunction::isDeterministic).
/// This property is kept after constant folding of non-deterministic functions like 'now', 'today'.
bool is_deterministic = true;
/// It is a constant calculated from deterministic functions (See IFunction::isDeterministic).
/// This property is kept after constant folding of non-deterministic functions like 'now', 'today'.
bool is_deterministic_constant = true;
/// For COLUMN node and propagated constants. /// For COLUMN node and propagated constants.
ColumnPtr column; ColumnPtr column;
/// If result of this not is deterministic. Checks only this node, not a subtree.
bool isDeterministic() const;
void toTree(JSONBuilder::JSONMap & map) const; void toTree(JSONBuilder::JSONMap & map) const;
}; };

View File

@ -326,6 +326,8 @@ std::vector<FileSegment::Range> FileCache::splitRange(size_t offset, size_t size
/// ^ ^ /// ^ ^
/// right offset aligned_right_offset /// right offset aligned_right_offset
/// [_________] <-- last cached file segment, e.g. we have uncovered suffix of the requested range /// [_________] <-- last cached file segment, e.g. we have uncovered suffix of the requested range
/// ^
/// last_file_segment_right_offset
/// [________________] /// [________________]
/// size /// size
/// [____________________________________] /// [____________________________________]
@ -336,8 +338,9 @@ std::vector<FileSegment::Range> FileCache::splitRange(size_t offset, size_t size
/// ///
/// [________________________] /// [________________________]
/// ^ ^ /// ^ ^
/// right_offset right_offset + max_file_segment_size /// | last_file_segment_right_offset + max_file_segment_size
/// e.g. there is no need to create sub-segment for range (right_offset + max_file_segment_size, aligned_right_offset]. /// last_file_segment_right_offset
/// e.g. there is no need to create sub-segment for range (last_file_segment_right_offset + max_file_segment_size, aligned_right_offset].
/// Because its left offset would be bigger than right_offset. /// Because its left offset would be bigger than right_offset.
/// Therefore, we set end_pos_non_included as offset+size, but remaining_size as aligned_size. /// Therefore, we set end_pos_non_included as offset+size, but remaining_size as aligned_size.
@ -557,7 +560,7 @@ FileCache::getOrSet(
FileSegment::Range initial_range(offset, offset + size - 1); FileSegment::Range initial_range(offset, offset + size - 1);
/// result_range is initial range, which will be adjusted according to /// result_range is initial range, which will be adjusted according to
/// 1. aligned offset, alighed_end_offset /// 1. aligned_offset, aligned_end_offset
/// 2. max_file_segments_limit /// 2. max_file_segments_limit
FileSegment::Range result_range = initial_range; FileSegment::Range result_range = initial_range;

View File

@ -102,7 +102,7 @@ public:
/// ///
/// @param allow_duplicates_in_input - actions are allowed to have /// @param allow_duplicates_in_input - actions are allowed to have
/// duplicated input (that will refer into the block). This is needed for /// duplicated input (that will refer into the block). This is needed for
/// preliminary query filtering (filterBlockWithDAG()), because they just /// preliminary query filtering (filterBlockWithExpression()), because they just
/// pass available virtual columns, which cannot be moved in case they are /// pass available virtual columns, which cannot be moved in case they are
/// used multiple times. /// used multiple times.
void execute(Block & block, size_t & num_rows, bool dry_run = false, bool allow_duplicates_in_input = false) const; void execute(Block & block, size_t & num_rows, bool dry_run = false, bool allow_duplicates_in_input = false) const;

View File

@ -90,9 +90,18 @@ private:
using FutureSetFromTuplePtr = std::shared_ptr<FutureSetFromTuple>; using FutureSetFromTuplePtr = std::shared_ptr<FutureSetFromTuple>;
/// Set from subquery can be built inplace for PK or in CreatingSet step. /// Set from subquery can be filled (by running the subquery) in one of two ways:
/// If use_index_for_in_with_subqueries_max_values is reached, set for PK won't be created, /// 1. During query analysis. Specifically, inside `SourceStepWithFilter::applyFilters()`.
/// but ordinary set would be created instead. /// Useful if the query plan depends on the set contents, e.g. to determine which files to read.
/// 2. During query execution. This is the preferred way.
/// Sets are created by CreatingSetStep, which runs before other steps.
/// Be careful: to build the set during query analysis, the `buildSetInplace()` call must happen
/// inside `SourceStepWithFilter::applyFilters()`. Calling it later, e.g. from `initializePipeline()`
/// will result in LOGICAL_ERROR "Not-ready Set is passed" (because a CreatingSetStep was already
/// added to pipeline but hasn't executed yet).
///
/// If use_index_for_in_with_subqueries_max_values is reached, the built set won't be suitable for
/// key analysis, but will work with function IN (the set will contain only hashes of elements).
class FutureSetFromSubquery final : public FutureSet class FutureSetFromSubquery final : public FutureSet
{ {
public: public:

View File

@ -690,6 +690,12 @@ void validateAnalyzerSettings(ASTPtr ast, bool context_value)
if (top_level != value->safeGet<bool>()) if (top_level != value->safeGet<bool>())
throw Exception(ErrorCodes::INCORRECT_QUERY, "Setting 'allow_experimental_analyzer' is changed in the subquery. Top level value: {}", top_level); throw Exception(ErrorCodes::INCORRECT_QUERY, "Setting 'allow_experimental_analyzer' is changed in the subquery. Top level value: {}", top_level);
} }
if (auto * value = set_query->changes.tryGet("enable_analyzer"))
{
if (top_level != value->safeGet<bool>())
throw Exception(ErrorCodes::INCORRECT_QUERY, "Setting 'enable_analyzer' is changed in the subquery. Top level value: {}", top_level);
}
} }
for (auto child : node->children) for (auto child : node->children)

View File

@ -323,9 +323,7 @@ void ASTColumnsReplaceTransformer::formatImpl(const FormatSettings & settings, F
{ {
settings.ostr << (settings.hilite ? hilite_keyword : "") << "REPLACE" << (is_strict ? " STRICT " : " ") << (settings.hilite ? hilite_none : ""); settings.ostr << (settings.hilite ? hilite_keyword : "") << "REPLACE" << (is_strict ? " STRICT " : " ") << (settings.hilite ? hilite_none : "");
if (children.size() > 1)
settings.ostr << "("; settings.ostr << "(";
for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it) for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it)
{ {
if (it != children.begin()) if (it != children.begin())
@ -333,8 +331,6 @@ void ASTColumnsReplaceTransformer::formatImpl(const FormatSettings & settings, F
(*it)->formatImpl(settings, state, frame); (*it)->formatImpl(settings, state, frame);
} }
if (children.size() > 1)
settings.ostr << ")"; settings.ostr << ")";
} }

View File

@ -1099,7 +1099,7 @@ void addBuildSubqueriesForSetsStepIfNeeded(
auto query_tree = subquery->detachQueryTree(); auto query_tree = subquery->detachQueryTree();
auto subquery_options = select_query_options.subquery(); auto subquery_options = select_query_options.subquery();
/// I don't know if this is a good decision, /// I don't know if this is a good decision,
/// But for now it is done in the same way as in old analyzer. /// but for now it is done in the same way as in old analyzer.
/// This would not ignore limits for subqueries (affects mutations only). /// This would not ignore limits for subqueries (affects mutations only).
/// See test_build_sets_from_multiple_threads-analyzer. /// See test_build_sets_from_multiple_threads-analyzer.
subquery_options.ignore_limits = false; subquery_options.ignore_limits = false;

View File

@ -46,15 +46,6 @@ JSONEachRowRowInputFormat::JSONEachRowRowInputFormat(
{ {
const auto & header = getPort().getHeader(); const auto & header = getPort().getHeader();
name_map = header.getNamesToIndexesMap(); name_map = header.getNamesToIndexesMap();
if (format_settings_.json.case_insensitive_column_matching)
{
for (auto & it : name_map)
{
StringRef key = it.first;
String lower_case_key = transformFieldNameToLowerCase(key);
lower_case_name_map[lower_case_key] = key;
}
}
if (format_settings_.import_nested_json) if (format_settings_.import_nested_json)
{ {
for (size_t i = 0; i != header.columns(); ++i) for (size_t i = 0; i != header.columns(); ++i)
@ -180,15 +171,7 @@ void JSONEachRowRowInputFormat::readJSONObject(MutableColumns & columns)
skipUnknownField(name_ref); skipUnknownField(name_ref);
continue; continue;
} }
size_t column_index = 0; const size_t column_index = columnIndex(name_ref, key_index);
if (format_settings.json.case_insensitive_column_matching)
{
String lower_case_name = transformFieldNameToLowerCase(name_ref);
StringRef field_name_ref = lower_case_name_map[lower_case_name];
column_index = columnIndex(field_name_ref, key_index);
}
else
column_index = columnIndex(name_ref, key_index);
if (unlikely(ssize_t(column_index) < 0)) if (unlikely(ssize_t(column_index) < 0))
{ {

View File

@ -55,13 +55,7 @@ private:
virtual void readRowStart(MutableColumns &) {} virtual void readRowStart(MutableColumns &) {}
virtual void skipRowStart() {} virtual void skipRowStart() {}
String transformFieldNameToLowerCase(const StringRef & field_name)
{
String field_name_str = field_name.toString();
std::transform(field_name_str.begin(), field_name_str.end(), field_name_str.begin(),
[](unsigned char c) { return std::tolower(c); });
return field_name_str;
}
/// Buffer for the read from the stream field name. Used when you have to copy it. /// Buffer for the read from the stream field name. Used when you have to copy it.
/// Also, if processing of Nested data is in progress, it holds the common prefix /// Also, if processing of Nested data is in progress, it holds the common prefix
/// of the nested column names (so that appending the field name to it produces /// of the nested column names (so that appending the field name to it produces
@ -80,8 +74,7 @@ private:
/// Hash table match `field name -> position in the block`. NOTE You can use perfect hash map. /// Hash table match `field name -> position in the block`. NOTE You can use perfect hash map.
Block::NameMap name_map; Block::NameMap name_map;
/// Hash table match `lower_case field name -> field name in the block`.
std::unordered_map<String, StringRef> lower_case_name_map;
/// Cached search results for previous row (keyed as index in JSON object) - used as a hint. /// Cached search results for previous row (keyed as index in JSON object) - used as a hint.
std::vector<Block::NameMap::const_iterator> prev_positions; std::vector<Block::NameMap::const_iterator> prev_positions;

View File

@ -119,8 +119,10 @@ bool restorePrewhereInputs(PrewhereInfo & info, const NameSet & inputs)
namespace ProfileEvents namespace ProfileEvents
{ {
extern const Event SelectedParts; extern const Event SelectedParts;
extern const Event SelectedPartsTotal;
extern const Event SelectedRanges; extern const Event SelectedRanges;
extern const Event SelectedMarks; extern const Event SelectedMarks;
extern const Event SelectedMarksTotal;
extern const Event SelectQueriesWithPrimaryKeyUsage; extern const Event SelectQueriesWithPrimaryKeyUsage;
} }
@ -1970,8 +1972,10 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons
} }
ProfileEvents::increment(ProfileEvents::SelectedParts, result.selected_parts); ProfileEvents::increment(ProfileEvents::SelectedParts, result.selected_parts);
ProfileEvents::increment(ProfileEvents::SelectedPartsTotal, result.total_parts);
ProfileEvents::increment(ProfileEvents::SelectedRanges, result.selected_ranges); ProfileEvents::increment(ProfileEvents::SelectedRanges, result.selected_ranges);
ProfileEvents::increment(ProfileEvents::SelectedMarks, result.selected_marks); ProfileEvents::increment(ProfileEvents::SelectedMarks, result.selected_marks);
ProfileEvents::increment(ProfileEvents::SelectedMarksTotal, result.total_marks_pk);
auto query_id_holder = MergeTreeDataSelectExecutor::checkLimits(data, result, context); auto query_id_holder = MergeTreeDataSelectExecutor::checkLimits(data, result, context);

View File

@ -1,4 +1,5 @@
#include <Storages/MergeTree/IMergeTreeDataPartWriter.h> #include <Storages/MergeTree/IMergeTreeDataPartWriter.h>
#include <Common/MemoryTrackerBlockerInThread.h>
namespace DB namespace DB
{ {
@ -71,9 +72,21 @@ IMergeTreeDataPartWriter::IMergeTreeDataPartWriter(
Columns IMergeTreeDataPartWriter::releaseIndexColumns() Columns IMergeTreeDataPartWriter::releaseIndexColumns()
{ {
return Columns( /// The memory for index was allocated without thread memory tracker.
std::make_move_iterator(index_columns.begin()), /// We need to deallocate it in shrinkToFit without memory tracker as well.
std::make_move_iterator(index_columns.end())); MemoryTrackerBlockerInThread temporarily_disable_memory_tracker;
Columns result;
result.reserve(index_columns.size());
for (auto & column : index_columns)
{
column->shrinkToFit();
result.push_back(std::move(column));
}
index_columns.clear();
return result;
} }
SerializationPtr IMergeTreeDataPartWriter::getSerialization(const String & column_name) const SerializationPtr IMergeTreeDataPartWriter::getSerialization(const String & column_name) const

View File

@ -566,6 +566,7 @@ static const ActionsDAG::Node & cloneASTWithInversionPushDown(
} }
const ActionsDAG::Node * res = nullptr; const ActionsDAG::Node * res = nullptr;
bool handled_inversion = false;
switch (node.type) switch (node.type)
{ {
@ -582,7 +583,7 @@ static const ActionsDAG::Node & cloneASTWithInversionPushDown(
/// Re-generate column name for constant. /// Re-generate column name for constant.
/// DAG form query (with enabled analyzer) uses suffixes for constants, like 1_UInt8. /// DAG form query (with enabled analyzer) uses suffixes for constants, like 1_UInt8.
/// DAG from PK does not use it. This breaks matching by column name sometimes. /// DAG from PK does not use it. This breaks matching by column name sometimes.
/// Ideally, we should not compare manes, but DAG subtrees instead. /// Ideally, we should not compare names, but DAG subtrees instead.
name = ASTLiteral(column_const->getDataColumn()[0]).getColumnName(); name = ASTLiteral(column_const->getDataColumn()[0]).getColumnName();
else else
name = node.result_name; name = node.result_name;
@ -593,9 +594,9 @@ static const ActionsDAG::Node & cloneASTWithInversionPushDown(
case (ActionsDAG::ActionType::ALIAS): case (ActionsDAG::ActionType::ALIAS):
{ {
/// Ignore aliases /// Ignore aliases
const auto & alias = cloneASTWithInversionPushDown(*node.children.front(), inverted_dag, to_inverted, context, need_inversion); res = &cloneASTWithInversionPushDown(*node.children.front(), inverted_dag, to_inverted, context, need_inversion);
to_inverted[&node] = &alias; handled_inversion = true;
return alias; break;
} }
case (ActionsDAG::ActionType::ARRAY_JOIN): case (ActionsDAG::ActionType::ARRAY_JOIN):
{ {
@ -608,20 +609,10 @@ static const ActionsDAG::Node & cloneASTWithInversionPushDown(
auto name = node.function_base->getName(); auto name = node.function_base->getName();
if (name == "not") if (name == "not")
{ {
const auto & arg = cloneASTWithInversionPushDown(*node.children.front(), inverted_dag, to_inverted, context, !need_inversion); res = &cloneASTWithInversionPushDown(*node.children.front(), inverted_dag, to_inverted, context, !need_inversion);
to_inverted[&node] = &arg; handled_inversion = true;
return arg;
} }
else if (name == "indexHint")
if (name == "materialize")
{
/// Ignore materialize
const auto & arg = cloneASTWithInversionPushDown(*node.children.front(), inverted_dag, to_inverted, context, need_inversion);
to_inverted[&node] = &arg;
return arg;
}
if (name == "indexHint")
{ {
ActionsDAG::NodeRawConstPtrs children; ActionsDAG::NodeRawConstPtrs children;
if (const auto * adaptor = typeid_cast<const FunctionToFunctionBaseAdaptor *>(node.function_base.get())) if (const auto * adaptor = typeid_cast<const FunctionToFunctionBaseAdaptor *>(node.function_base.get()))
@ -636,12 +627,10 @@ static const ActionsDAG::Node & cloneASTWithInversionPushDown(
} }
} }
const auto & func = inverted_dag.addFunction(node.function_base, children, ""); res = &inverted_dag.addFunction(node.function_base, children, "");
to_inverted[&node] = &func; handled_inversion = true;
return func;
} }
else if (need_inversion && (name == "and" || name == "or"))
if (need_inversion && (name == "and" || name == "or"))
{ {
ActionsDAG::NodeRawConstPtrs children(node.children); ActionsDAG::NodeRawConstPtrs children(node.children);
@ -659,11 +648,11 @@ static const ActionsDAG::Node & cloneASTWithInversionPushDown(
/// We match columns by name, so it is important to fill name correctly. /// We match columns by name, so it is important to fill name correctly.
/// So, use empty string to make it automatically. /// So, use empty string to make it automatically.
const auto & func = inverted_dag.addFunction(function_builder, children, ""); res = &inverted_dag.addFunction(function_builder, children, "");
to_inverted[&node] = &func; handled_inversion = true;
return func;
} }
else
{
ActionsDAG::NodeRawConstPtrs children(node.children); ActionsDAG::NodeRawConstPtrs children(node.children);
for (auto & arg : children) for (auto & arg : children)
@ -674,17 +663,41 @@ static const ActionsDAG::Node & cloneASTWithInversionPushDown(
{ {
const auto & func_name = need_inversion ? it->second : it->first; const auto & func_name = need_inversion ? it->second : it->first;
auto function_builder = FunctionFactory::instance().get(func_name, context); auto function_builder = FunctionFactory::instance().get(func_name, context);
const auto & func = inverted_dag.addFunction(function_builder, children, ""); res = &inverted_dag.addFunction(function_builder, children, "");
to_inverted[&node] = &func; handled_inversion = true;
return func; }
else
{
/// Argument types could change slightly because of our transformations, e.g.
/// LowCardinality can be added because some subexpressions became constant
/// (in particular, sets). If that happens, re-run function overload resolver.
/// Otherwise don't re-run it because some functions may not be available
/// through FunctionFactory::get(), e.g. FunctionCapture.
bool types_changed = false;
for (size_t i = 0; i < children.size(); ++i)
{
if (!node.children[i]->result_type->equals(*children[i]->result_type))
{
types_changed = true;
break;
}
} }
if (types_changed)
{
auto function_builder = FunctionFactory::instance().get(name, context);
res = &inverted_dag.addFunction(function_builder, children, "");
}
else
{
res = &inverted_dag.addFunction(node.function_base, children, ""); res = &inverted_dag.addFunction(node.function_base, children, "");
chassert(res->result_type == node.result_type); }
}
}
} }
} }
if (need_inversion) if (!handled_inversion && need_inversion)
res = &inverted_dag.addFunction(FunctionFactory::instance().get("not", context), {res}, ""); res = &inverted_dag.addFunction(FunctionFactory::instance().get("not", context), {res}, "");
to_inverted[&node] = res; to_inverted[&node] = res;

View File

@ -6,6 +6,8 @@
#include <Core/Range.h> #include <Core/Range.h>
#include <Core/PlainRanges.h> #include <Core/PlainRanges.h>
#include <DataTypes/Serializations/ISerialization.h>
#include <Parsers/ASTExpressionList.h> #include <Parsers/ASTExpressionList.h>
#include <Interpreters/Set.h> #include <Interpreters/Set.h>
@ -14,7 +16,6 @@
#include <Storages/SelectQueryInfo.h> #include <Storages/SelectQueryInfo.h>
#include <Storages/MergeTree/RPNBuilder.h> #include <Storages/MergeTree/RPNBuilder.h>
#include "DataTypes/Serializations/ISerialization.h"
namespace DB namespace DB

View File

@ -1146,7 +1146,7 @@ std::optional<UInt64> MergeTreeData::totalRowsByPartitionPredicateImpl(
auto metadata_snapshot = getInMemoryMetadataPtr(); auto metadata_snapshot = getInMemoryMetadataPtr();
auto virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, {parts[0]}); auto virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, {parts[0]});
auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag.getOutputs().at(0), nullptr, /*allow_non_deterministic_functions=*/ false); auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag.getOutputs().at(0), nullptr);
if (!filter_dag) if (!filter_dag)
return {}; return {};

View File

@ -255,6 +255,12 @@ void MergeTreeDataPartWriterOnDisk::initPrimaryIndex()
index_compressor_stream = std::make_unique<CompressedWriteBuffer>(*index_file_hashing_stream, primary_key_compression_codec, settings.primary_key_compress_block_size); index_compressor_stream = std::make_unique<CompressedWriteBuffer>(*index_file_hashing_stream, primary_key_compression_codec, settings.primary_key_compress_block_size);
index_source_hashing_stream = std::make_unique<HashingWriteBuffer>(*index_compressor_stream); index_source_hashing_stream = std::make_unique<HashingWriteBuffer>(*index_compressor_stream);
} }
const auto & primary_key_types = metadata_snapshot->getPrimaryKey().data_types;
index_serializations.reserve(primary_key_types.size());
for (const auto & type : primary_key_types)
index_serializations.push_back(type->getDefaultSerialization());
} }
} }
@ -300,22 +306,30 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices()
store = std::make_shared<GinIndexStore>(stream_name, data_part_storage, data_part_storage, storage_settings->max_digestion_size_per_segment); store = std::make_shared<GinIndexStore>(stream_name, data_part_storage, data_part_storage, storage_settings->max_digestion_size_per_segment);
gin_index_stores[stream_name] = store; gin_index_stores[stream_name] = store;
} }
skip_indices_aggregators.push_back(skip_index->createIndexAggregatorForPart(store, settings)); skip_indices_aggregators.push_back(skip_index->createIndexAggregatorForPart(store, settings));
skip_index_accumulated_marks.push_back(0); skip_index_accumulated_marks.push_back(0);
} }
} }
void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndexRow(const Block & index_block, size_t row)
{
chassert(index_block.columns() == index_serializations.size());
auto & index_stream = compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream;
for (size_t i = 0; i < index_block.columns(); ++i)
{
const auto & column = index_block.getByPosition(i).column;
index_columns[i]->insertFrom(*column, row);
index_serializations[i]->serializeBinary(*column, row, index_stream, {});
}
}
void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Block & primary_index_block, const Granules & granules_to_write) void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Block & primary_index_block, const Granules & granules_to_write)
{ {
size_t primary_columns_num = primary_index_block.columns(); if (!metadata_snapshot->hasPrimaryKey())
if (index_columns.empty()) return;
{
index_types = primary_index_block.getDataTypes();
index_columns.resize(primary_columns_num);
last_block_index_columns.resize(primary_columns_num);
for (size_t i = 0; i < primary_columns_num; ++i)
index_columns[i] = primary_index_block.getByPosition(i).column->cloneEmpty();
}
{ {
/** While filling index (index_columns), disable memory tracker. /** While filling index (index_columns), disable memory tracker.
@ -326,25 +340,20 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Bloc
*/ */
MemoryTrackerBlockerInThread temporarily_disable_memory_tracker; MemoryTrackerBlockerInThread temporarily_disable_memory_tracker;
if (index_columns.empty())
index_columns = primary_index_block.cloneEmptyColumns();
/// Write index. The index contains Primary Key value for each `index_granularity` row. /// Write index. The index contains Primary Key value for each `index_granularity` row.
for (const auto & granule : granules_to_write) for (const auto & granule : granules_to_write)
{ {
if (metadata_snapshot->hasPrimaryKey() && granule.mark_on_start) if (granule.mark_on_start)
{ calculateAndSerializePrimaryIndexRow(primary_index_block, granule.start_row);
for (size_t j = 0; j < primary_columns_num; ++j)
{
const auto & primary_column = primary_index_block.getByPosition(j);
index_columns[j]->insertFrom(*primary_column.column, granule.start_row);
primary_column.type->getDefaultSerialization()->serializeBinary(
*primary_column.column, granule.start_row, compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream, {});
}
}
} }
} }
/// store last index row to write final mark at the end of column /// Store block with last index row to write final mark at the end of column
for (size_t j = 0; j < primary_columns_num; ++j) if (with_final_mark)
last_block_index_columns[j] = primary_index_block.getByPosition(j).column; last_index_block = primary_index_block;
} }
void MergeTreeDataPartWriterOnDisk::calculateAndSerializeStatistics(const Block & block) void MergeTreeDataPartWriterOnDisk::calculateAndSerializeStatistics(const Block & block)
@ -421,19 +430,14 @@ void MergeTreeDataPartWriterOnDisk::fillPrimaryIndexChecksums(MergeTreeData::Dat
if (index_file_hashing_stream) if (index_file_hashing_stream)
{ {
if (write_final_mark) if (write_final_mark && last_index_block)
{ {
for (size_t j = 0; j < index_columns.size(); ++j) MemoryTrackerBlockerInThread temporarily_disable_memory_tracker;
{ calculateAndSerializePrimaryIndexRow(last_index_block, last_index_block.rows() - 1);
const auto & column = *last_block_index_columns[j];
size_t last_row_number = column.size() - 1;
index_columns[j]->insertFrom(column, last_row_number);
index_types[j]->getDefaultSerialization()->serializeBinary(
column, last_row_number, compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream, {});
}
last_block_index_columns.clear();
} }
last_index_block.clear();
if (compress_primary_key) if (compress_primary_key)
{ {
index_source_hashing_stream->finalize(); index_source_hashing_stream->finalize();

View File

@ -173,10 +173,10 @@ protected:
std::unique_ptr<HashingWriteBuffer> index_source_hashing_stream; std::unique_ptr<HashingWriteBuffer> index_source_hashing_stream;
bool compress_primary_key; bool compress_primary_key;
DataTypes index_types; /// Last block with index columns.
/// Index columns from the last block /// It's written to index file in the `writeSuffixAndFinalizePart` method.
/// It's written to index file in the `writeSuffixAndFinalizePart` method Block last_index_block;
Columns last_block_index_columns; Serializations index_serializations;
bool data_written = false; bool data_written = false;
@ -193,6 +193,7 @@ private:
void initStatistics(); void initStatistics();
virtual void fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block) = 0; virtual void fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block) = 0;
void calculateAndSerializePrimaryIndexRow(const Block & index_block, size_t row);
struct ExecutionStatistics struct ExecutionStatistics
{ {

View File

@ -36,10 +36,12 @@
#include <base/sort.h> #include <base/sort.h>
#include <rocksdb/advanced_options.h> #include <rocksdb/advanced_options.h>
#include <rocksdb/compression_type.h>
#include <rocksdb/convenience.h>
#include <rocksdb/env.h> #include <rocksdb/env.h>
#include <rocksdb/options.h> #include <rocksdb/options.h>
#include <rocksdb/statistics.h>
#include <rocksdb/table.h> #include <rocksdb/table.h>
#include <rocksdb/convenience.h>
#include <rocksdb/utilities/db_ttl.h> #include <rocksdb/utilities/db_ttl.h>
#include <cstddef> #include <cstddef>
@ -428,6 +430,7 @@ void StorageEmbeddedRocksDB::initDB()
rocksdb::Options base; rocksdb::Options base;
base.create_if_missing = true; base.create_if_missing = true;
base.compression = rocksdb::CompressionType::kZSTD;
base.statistics = rocksdb::CreateDBStatistics(); base.statistics = rocksdb::CreateDBStatistics();
/// It is too verbose by default, and in fact we don't care about rocksdb logs at all. /// It is too verbose by default, and in fact we don't care about rocksdb logs at all.
base.info_log_level = rocksdb::ERROR_LEVEL; base.info_log_level = rocksdb::ERROR_LEVEL;
@ -439,7 +442,7 @@ void StorageEmbeddedRocksDB::initDB()
if (config.has("rocksdb.options")) if (config.has("rocksdb.options"))
{ {
auto config_options = getOptionsFromConfig(config, "rocksdb.options"); auto config_options = getOptionsFromConfig(config, "rocksdb.options");
status = rocksdb::GetDBOptionsFromMap(merged, config_options, &merged); status = rocksdb::GetDBOptionsFromMap({}, merged, config_options, &merged);
if (!status.ok()) if (!status.ok())
{ {
throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from 'rocksdb.options' at: {}: {}", throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from 'rocksdb.options' at: {}: {}",
@ -449,7 +452,7 @@ void StorageEmbeddedRocksDB::initDB()
if (config.has("rocksdb.column_family_options")) if (config.has("rocksdb.column_family_options"))
{ {
auto column_family_options = getOptionsFromConfig(config, "rocksdb.column_family_options"); auto column_family_options = getOptionsFromConfig(config, "rocksdb.column_family_options");
status = rocksdb::GetColumnFamilyOptionsFromMap(merged, column_family_options, &merged); status = rocksdb::GetColumnFamilyOptionsFromMap({}, merged, column_family_options, &merged);
if (!status.ok()) if (!status.ok())
{ {
throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from 'rocksdb.column_family_options' at: {}: {}", throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from 'rocksdb.column_family_options' at: {}: {}",
@ -459,7 +462,7 @@ void StorageEmbeddedRocksDB::initDB()
if (config.has("rocksdb.block_based_table_options")) if (config.has("rocksdb.block_based_table_options"))
{ {
auto block_based_table_options = getOptionsFromConfig(config, "rocksdb.block_based_table_options"); auto block_based_table_options = getOptionsFromConfig(config, "rocksdb.block_based_table_options");
status = rocksdb::GetBlockBasedTableOptionsFromMap(table_options, block_based_table_options, &table_options); status = rocksdb::GetBlockBasedTableOptionsFromMap({}, table_options, block_based_table_options, &table_options);
if (!status.ok()) if (!status.ok())
{ {
throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from 'rocksdb.block_based_table_options' at: {}: {}", throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from 'rocksdb.block_based_table_options' at: {}: {}",
@ -484,7 +487,7 @@ void StorageEmbeddedRocksDB::initDB()
if (config.has(config_key)) if (config.has(config_key))
{ {
auto table_config_options = getOptionsFromConfig(config, config_key); auto table_config_options = getOptionsFromConfig(config, config_key);
status = rocksdb::GetDBOptionsFromMap(merged, table_config_options, &merged); status = rocksdb::GetDBOptionsFromMap({}, merged, table_config_options, &merged);
if (!status.ok()) if (!status.ok())
{ {
throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from '{}' at: {}: {}", throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from '{}' at: {}: {}",
@ -496,7 +499,7 @@ void StorageEmbeddedRocksDB::initDB()
if (config.has(config_key)) if (config.has(config_key))
{ {
auto table_column_family_options = getOptionsFromConfig(config, config_key); auto table_column_family_options = getOptionsFromConfig(config, config_key);
status = rocksdb::GetColumnFamilyOptionsFromMap(merged, table_column_family_options, &merged); status = rocksdb::GetColumnFamilyOptionsFromMap({}, merged, table_column_family_options, &merged);
if (!status.ok()) if (!status.ok())
{ {
throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from '{}' at: {}: {}", throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from '{}' at: {}: {}",
@ -508,7 +511,7 @@ void StorageEmbeddedRocksDB::initDB()
if (config.has(config_key)) if (config.has(config_key))
{ {
auto block_based_table_options = getOptionsFromConfig(config, config_key); auto block_based_table_options = getOptionsFromConfig(config, config_key);
status = rocksdb::GetBlockBasedTableOptionsFromMap(table_options, block_based_table_options, &table_options); status = rocksdb::GetBlockBasedTableOptionsFromMap({}, table_options, block_based_table_options, &table_options);
if (!status.ok()) if (!status.ok())
{ {
throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from '{}' at: {}: {}", throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from '{}' at: {}: {}",

View File

@ -41,6 +41,14 @@ ColumnsDescription StorageSystemRocksDB::getColumnsDescription()
} }
Block StorageSystemRocksDB::getFilterSampleBlock() const
{
return {
{ {}, std::make_shared<DataTypeString>(), "database" },
{ {}, std::make_shared<DataTypeString>(), "table" },
};
}
void StorageSystemRocksDB::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8>) const void StorageSystemRocksDB::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8>) const
{ {
const auto access = context->getAccess(); const auto access = context->getAccess();

View File

@ -22,6 +22,7 @@ protected:
using IStorageSystemOneBlock::IStorageSystemOneBlock; using IStorageSystemOneBlock::IStorageSystemOneBlock;
void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8>) const override; void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8>) const override;
Block getFilterSampleBlock() const override;
}; };
} }

View File

@ -275,7 +275,7 @@ public:
private: private:
std::shared_ptr<StorageMergeTreeIndex> storage; std::shared_ptr<StorageMergeTreeIndex> storage;
Poco::Logger * log; Poco::Logger * log;
const ActionsDAG::Node * predicate = nullptr; ExpressionActionsPtr virtual_columns_filter;
}; };
void ReadFromMergeTreeIndex::applyFilters(ActionDAGNodes added_filter_nodes) void ReadFromMergeTreeIndex::applyFilters(ActionDAGNodes added_filter_nodes)
@ -283,7 +283,16 @@ void ReadFromMergeTreeIndex::applyFilters(ActionDAGNodes added_filter_nodes)
SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); SourceStepWithFilter::applyFilters(std::move(added_filter_nodes));
if (filter_actions_dag) if (filter_actions_dag)
predicate = filter_actions_dag->getOutputs().at(0); {
Block block_to_filter
{
{ {}, std::make_shared<DataTypeString>(), StorageMergeTreeIndex::part_name_column.name },
};
auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), &block_to_filter);
if (dag)
virtual_columns_filter = VirtualColumnUtils::buildFilterExpression(std::move(*dag), context);
}
} }
void StorageMergeTreeIndex::read( void StorageMergeTreeIndex::read(
@ -335,7 +344,7 @@ void StorageMergeTreeIndex::read(
void ReadFromMergeTreeIndex::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) void ReadFromMergeTreeIndex::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
{ {
auto filtered_parts = storage->getFilteredDataParts(predicate, context); auto filtered_parts = storage->getFilteredDataParts(virtual_columns_filter);
LOG_DEBUG(log, "Reading index{}from {} parts of table {}", LOG_DEBUG(log, "Reading index{}from {} parts of table {}",
storage->with_marks ? " with marks " : " ", storage->with_marks ? " with marks " : " ",
@ -345,9 +354,9 @@ void ReadFromMergeTreeIndex::initializePipeline(QueryPipelineBuilder & pipeline,
pipeline.init(Pipe(std::make_shared<MergeTreeIndexSource>(getOutputStream().header, storage->key_sample_block, std::move(filtered_parts), context, storage->with_marks))); pipeline.init(Pipe(std::make_shared<MergeTreeIndexSource>(getOutputStream().header, storage->key_sample_block, std::move(filtered_parts), context, storage->with_marks)));
} }
MergeTreeData::DataPartsVector StorageMergeTreeIndex::getFilteredDataParts(const ActionsDAG::Node * predicate, const ContextPtr & context) const MergeTreeData::DataPartsVector StorageMergeTreeIndex::getFilteredDataParts(const ExpressionActionsPtr & virtual_columns_filter) const
{ {
if (!predicate) if (!virtual_columns_filter)
return data_parts; return data_parts;
auto all_part_names = ColumnString::create(); auto all_part_names = ColumnString::create();
@ -355,7 +364,7 @@ MergeTreeData::DataPartsVector StorageMergeTreeIndex::getFilteredDataParts(const
all_part_names->insert(part->name); all_part_names->insert(part->name);
Block filtered_block{{std::move(all_part_names), std::make_shared<DataTypeString>(), part_name_column.name}}; Block filtered_block{{std::move(all_part_names), std::make_shared<DataTypeString>(), part_name_column.name}};
VirtualColumnUtils::filterBlockWithPredicate(predicate, filtered_block, context); VirtualColumnUtils::filterBlockWithExpression(virtual_columns_filter, filtered_block);
if (!filtered_block.rows()) if (!filtered_block.rows())
return {}; return {};

View File

@ -36,7 +36,7 @@ public:
private: private:
friend class ReadFromMergeTreeIndex; friend class ReadFromMergeTreeIndex;
MergeTreeData::DataPartsVector getFilteredDataParts(const ActionsDAG::Node * predicate, const ContextPtr & context) const; MergeTreeData::DataPartsVector getFilteredDataParts(const ExpressionActionsPtr & virtual_columns_filter) const;
StoragePtr source_table; StoragePtr source_table;
bool with_marks; bool with_marks;

View File

@ -5,6 +5,7 @@
// #include <Storages/IStorage.h> // #include <Storages/IStorage.h>
#include <Storages/SelectQueryInfo.h> #include <Storages/SelectQueryInfo.h>
#include <Storages/System/getQueriedColumnsMaskAndHeader.h> #include <Storages/System/getQueriedColumnsMaskAndHeader.h>
#include <Storages/VirtualColumnUtils.h>
#include <Processors/Sources/SourceFromSingleChunk.h> #include <Processors/Sources/SourceFromSingleChunk.h>
#include <Processors/QueryPlan/QueryPlan.h> #include <Processors/QueryPlan/QueryPlan.h>
#include <Processors/QueryPlan/SourceStepWithFilter.h> #include <Processors/QueryPlan/SourceStepWithFilter.h>
@ -44,7 +45,7 @@ public:
private: private:
std::shared_ptr<IStorageSystemOneBlock> storage; std::shared_ptr<IStorageSystemOneBlock> storage;
std::vector<UInt8> columns_mask; std::vector<UInt8> columns_mask;
const ActionsDAG::Node * predicate = nullptr; std::optional<ActionsDAG> filter;
}; };
void IStorageSystemOneBlock::read( void IStorageSystemOneBlock::read(
@ -79,8 +80,9 @@ void IStorageSystemOneBlock::read(
void ReadFromSystemOneBlock::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) void ReadFromSystemOneBlock::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
{ {
const auto & sample_block = getOutputStream().header; const Block & sample_block = getOutputStream().header;
MutableColumns res_columns = sample_block.cloneEmptyColumns(); MutableColumns res_columns = sample_block.cloneEmptyColumns();
const ActionsDAG::Node * predicate = filter ? filter->getOutputs().at(0) : nullptr;
storage->fillData(res_columns, context, predicate, std::move(columns_mask)); storage->fillData(res_columns, context, predicate, std::move(columns_mask));
UInt64 num_rows = res_columns.at(0)->size(); UInt64 num_rows = res_columns.at(0)->size();
@ -93,8 +95,18 @@ void ReadFromSystemOneBlock::applyFilters(ActionDAGNodes added_filter_nodes)
{ {
SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); SourceStepWithFilter::applyFilters(std::move(added_filter_nodes));
if (filter_actions_dag) if (!filter_actions_dag)
predicate = filter_actions_dag->getOutputs().at(0); return;
Block sample = storage->getFilterSampleBlock();
if (sample.columns() == 0)
return;
filter = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), &sample);
/// Must prepare sets here, initializePipeline() would be too late, see comment on FutureSetFromSubquery.
if (filter)
VirtualColumnUtils::buildSetsForDAG(*filter, context);
} }
} }

View File

@ -22,8 +22,16 @@ class Context;
class IStorageSystemOneBlock : public IStorage class IStorageSystemOneBlock : public IStorage
{ {
protected: protected:
/// If this method uses `predicate`, getFilterSampleBlock() must list all columns to which
/// it's applied. (Otherwise there'll be a LOGICAL_ERROR "Not-ready Set is passed" on subqueries.)
virtual void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8> columns_mask) const = 0; virtual void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8> columns_mask) const = 0;
/// Columns to which fillData() applies the `predicate`.
virtual Block getFilterSampleBlock() const
{
return {};
}
virtual bool supportsColumnsMask() const { return false; } virtual bool supportsColumnsMask() const { return false; }
friend class ReadFromSystemOneBlock; friend class ReadFromSystemOneBlock;

View File

@ -338,7 +338,7 @@ private:
std::shared_ptr<StorageSystemColumns> storage; std::shared_ptr<StorageSystemColumns> storage;
std::vector<UInt8> columns_mask; std::vector<UInt8> columns_mask;
const size_t max_block_size; const size_t max_block_size;
const ActionsDAG::Node * predicate = nullptr; std::optional<ActionsDAG> virtual_columns_filter;
}; };
void ReadFromSystemColumns::applyFilters(ActionDAGNodes added_filter_nodes) void ReadFromSystemColumns::applyFilters(ActionDAGNodes added_filter_nodes)
@ -346,7 +346,17 @@ void ReadFromSystemColumns::applyFilters(ActionDAGNodes added_filter_nodes)
SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); SourceStepWithFilter::applyFilters(std::move(added_filter_nodes));
if (filter_actions_dag) if (filter_actions_dag)
predicate = filter_actions_dag->getOutputs().at(0); {
Block block_to_filter;
block_to_filter.insert(ColumnWithTypeAndName(ColumnString::create(), std::make_shared<DataTypeString>(), "database"));
block_to_filter.insert(ColumnWithTypeAndName(ColumnString::create(), std::make_shared<DataTypeString>(), "table"));
virtual_columns_filter = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), &block_to_filter);
/// Must prepare sets here, initializePipeline() would be too late, see comment on FutureSetFromSubquery.
if (virtual_columns_filter)
VirtualColumnUtils::buildSetsForDAG(*virtual_columns_filter, context);
}
} }
void StorageSystemColumns::read( void StorageSystemColumns::read(
@ -408,7 +418,8 @@ void ReadFromSystemColumns::initializePipeline(QueryPipelineBuilder & pipeline,
block_to_filter.insert(ColumnWithTypeAndName(std::move(database_column_mut), std::make_shared<DataTypeString>(), "database")); block_to_filter.insert(ColumnWithTypeAndName(std::move(database_column_mut), std::make_shared<DataTypeString>(), "database"));
/// Filter block with `database` column. /// Filter block with `database` column.
VirtualColumnUtils::filterBlockWithPredicate(predicate, block_to_filter, context); if (virtual_columns_filter)
VirtualColumnUtils::filterBlockWithPredicate(virtual_columns_filter->getOutputs().at(0), block_to_filter, context);
if (!block_to_filter.rows()) if (!block_to_filter.rows())
{ {
@ -456,7 +467,8 @@ void ReadFromSystemColumns::initializePipeline(QueryPipelineBuilder & pipeline,
} }
/// Filter block with `database` and `table` columns. /// Filter block with `database` and `table` columns.
VirtualColumnUtils::filterBlockWithPredicate(predicate, block_to_filter, context); if (virtual_columns_filter)
VirtualColumnUtils::filterBlockWithPredicate(virtual_columns_filter->getOutputs().at(0), block_to_filter, context);
if (!block_to_filter.rows()) if (!block_to_filter.rows())
{ {

View File

@ -214,7 +214,7 @@ private:
std::shared_ptr<StorageSystemDataSkippingIndices> storage; std::shared_ptr<StorageSystemDataSkippingIndices> storage;
std::vector<UInt8> columns_mask; std::vector<UInt8> columns_mask;
const size_t max_block_size; const size_t max_block_size;
const ActionsDAG::Node * predicate = nullptr; ExpressionActionsPtr virtual_columns_filter;
}; };
void ReadFromSystemDataSkippingIndices::applyFilters(ActionDAGNodes added_filter_nodes) void ReadFromSystemDataSkippingIndices::applyFilters(ActionDAGNodes added_filter_nodes)
@ -222,7 +222,16 @@ void ReadFromSystemDataSkippingIndices::applyFilters(ActionDAGNodes added_filter
SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); SourceStepWithFilter::applyFilters(std::move(added_filter_nodes));
if (filter_actions_dag) if (filter_actions_dag)
predicate = filter_actions_dag->getOutputs().at(0); {
Block block_to_filter
{
{ ColumnString::create(), std::make_shared<DataTypeString>(), "database" },
};
auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), &block_to_filter);
if (dag)
virtual_columns_filter = VirtualColumnUtils::buildFilterExpression(std::move(*dag), context);
}
} }
void StorageSystemDataSkippingIndices::read( void StorageSystemDataSkippingIndices::read(
@ -268,7 +277,8 @@ void ReadFromSystemDataSkippingIndices::initializePipeline(QueryPipelineBuilder
/// Condition on "database" in a query acts like an index. /// Condition on "database" in a query acts like an index.
Block block { ColumnWithTypeAndName(std::move(column), std::make_shared<DataTypeString>(), "database") }; Block block { ColumnWithTypeAndName(std::move(column), std::make_shared<DataTypeString>(), "database") };
VirtualColumnUtils::filterBlockWithPredicate(predicate, block, context); if (virtual_columns_filter)
VirtualColumnUtils::filterBlockWithExpression(virtual_columns_filter, block);
ColumnPtr & filtered_databases = block.getByPosition(0).column; ColumnPtr & filtered_databases = block.getByPosition(0).column;
pipeline.init(Pipe(std::make_shared<DataSkippingIndicesSource>( pipeline.init(Pipe(std::make_shared<DataSkippingIndicesSource>(

View File

@ -73,6 +73,14 @@ static String getEngineFull(const ContextPtr & ctx, const DatabasePtr & database
return engine_full; return engine_full;
} }
Block StorageSystemDatabases::getFilterSampleBlock() const
{
return {
{ {}, std::make_shared<DataTypeString>(), "engine" },
{ {}, std::make_shared<DataTypeUUID>(), "uuid" },
};
}
static ColumnPtr getFilteredDatabases(const Databases & databases, const ActionsDAG::Node * predicate, ContextPtr context) static ColumnPtr getFilteredDatabases(const Databases & databases, const ActionsDAG::Node * predicate, ContextPtr context)
{ {
MutableColumnPtr name_column = ColumnString::create(); MutableColumnPtr name_column = ColumnString::create();

View File

@ -27,6 +27,7 @@ protected:
bool supportsColumnsMask() const override { return true; } bool supportsColumnsMask() const override { return true; }
void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8> columns_mask) const override; void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8> columns_mask) const override;
Block getFilterSampleBlock() const override;
}; };
} }

View File

@ -107,6 +107,13 @@ ColumnsDescription StorageSystemDistributionQueue::getColumnsDescription()
}; };
} }
Block StorageSystemDistributionQueue::getFilterSampleBlock() const
{
return {
{ {}, std::make_shared<DataTypeString>(), "database" },
{ {}, std::make_shared<DataTypeString>(), "table" },
};
}
void StorageSystemDistributionQueue::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8>) const void StorageSystemDistributionQueue::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8>) const
{ {

View File

@ -22,6 +22,7 @@ protected:
using IStorageSystemOneBlock::IStorageSystemOneBlock; using IStorageSystemOneBlock::IStorageSystemOneBlock;
void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8>) const override; void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8>) const override;
Block getFilterSampleBlock() const override;
}; };
} }

View File

@ -46,6 +46,13 @@ ColumnsDescription StorageSystemMutations::getColumnsDescription()
}; };
} }
Block StorageSystemMutations::getFilterSampleBlock() const
{
return {
{ {}, std::make_shared<DataTypeString>(), "database" },
{ {}, std::make_shared<DataTypeString>(), "table" },
};
}
void StorageSystemMutations::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8>) const void StorageSystemMutations::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8>) const
{ {

View File

@ -22,6 +22,7 @@ protected:
using IStorageSystemOneBlock::IStorageSystemOneBlock; using IStorageSystemOneBlock::IStorageSystemOneBlock;
void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8>) const override; void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8>) const override;
Block getFilterSampleBlock() const override;
}; };
} }

View File

@ -43,6 +43,14 @@ ColumnsDescription StorageSystemPartMovesBetweenShards::getColumnsDescription()
} }
Block StorageSystemPartMovesBetweenShards::getFilterSampleBlock() const
{
return {
{ {}, std::make_shared<DataTypeString>(), "database" },
{ {}, std::make_shared<DataTypeString>(), "table" },
};
}
void StorageSystemPartMovesBetweenShards::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8>) const void StorageSystemPartMovesBetweenShards::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8>) const
{ {
const auto access = context->getAccess(); const auto access = context->getAccess();

View File

@ -20,6 +20,7 @@ protected:
using IStorageSystemOneBlock::IStorageSystemOneBlock; using IStorageSystemOneBlock::IStorageSystemOneBlock;
void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8>) const override; void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8>) const override;
Block getFilterSampleBlock() const override;
}; };
} }

View File

@ -285,7 +285,7 @@ private:
const bool with_zk_fields; const bool with_zk_fields;
const size_t max_block_size; const size_t max_block_size;
std::shared_ptr<StorageSystemReplicasImpl> impl; std::shared_ptr<StorageSystemReplicasImpl> impl;
const ActionsDAG::Node * predicate = nullptr; ExpressionActionsPtr virtual_columns_filter;
}; };
void ReadFromSystemReplicas::applyFilters(ActionDAGNodes added_filter_nodes) void ReadFromSystemReplicas::applyFilters(ActionDAGNodes added_filter_nodes)
@ -293,7 +293,18 @@ void ReadFromSystemReplicas::applyFilters(ActionDAGNodes added_filter_nodes)
SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); SourceStepWithFilter::applyFilters(std::move(added_filter_nodes));
if (filter_actions_dag) if (filter_actions_dag)
predicate = filter_actions_dag->getOutputs().at(0); {
Block block_to_filter
{
{ ColumnString::create(), std::make_shared<DataTypeString>(), "database" },
{ ColumnString::create(), std::make_shared<DataTypeString>(), "table" },
{ ColumnString::create(), std::make_shared<DataTypeString>(), "engine" },
};
auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), &block_to_filter);
if (dag)
virtual_columns_filter = VirtualColumnUtils::buildFilterExpression(std::move(*dag), context);
}
} }
void StorageSystemReplicas::read( void StorageSystemReplicas::read(
@ -430,7 +441,8 @@ void ReadFromSystemReplicas::initializePipeline(QueryPipelineBuilder & pipeline,
{ col_engine, std::make_shared<DataTypeString>(), "engine" }, { col_engine, std::make_shared<DataTypeString>(), "engine" },
}; };
VirtualColumnUtils::filterBlockWithPredicate(predicate, filtered_block, context); if (virtual_columns_filter)
VirtualColumnUtils::filterBlockWithExpression(virtual_columns_filter, filtered_block);
if (!filtered_block.rows()) if (!filtered_block.rows())
{ {

View File

@ -62,6 +62,14 @@ ColumnsDescription StorageSystemReplicationQueue::getColumnsDescription()
} }
Block StorageSystemReplicationQueue::getFilterSampleBlock() const
{
return {
{ {}, std::make_shared<DataTypeString>(), "database" },
{ {}, std::make_shared<DataTypeString>(), "table" },
};
}
void StorageSystemReplicationQueue::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8>) const void StorageSystemReplicationQueue::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8>) const
{ {
const auto access = context->getAccess(); const auto access = context->getAccess();

View File

@ -21,6 +21,7 @@ public:
protected: protected:
using IStorageSystemOneBlock::IStorageSystemOneBlock; using IStorageSystemOneBlock::IStorageSystemOneBlock;
void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8>) const override; void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8>) const override;
Block getFilterSampleBlock() const override;
}; };
} }

View File

@ -275,8 +275,7 @@ bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node)
static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
const ActionsDAG::Node * node, const ActionsDAG::Node * node,
const Block * allowed_inputs, const Block * allowed_inputs,
ActionsDAG::Nodes & additional_nodes, ActionsDAG::Nodes & additional_nodes)
bool allow_non_deterministic_functions)
{ {
if (node->type == ActionsDAG::ActionType::FUNCTION) if (node->type == ActionsDAG::ActionType::FUNCTION)
{ {
@ -285,14 +284,8 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
auto & node_copy = additional_nodes.emplace_back(*node); auto & node_copy = additional_nodes.emplace_back(*node);
node_copy.children.clear(); node_copy.children.clear();
for (const auto * child : node->children) for (const auto * child : node->children)
if (const auto * child_copy = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes, allow_non_deterministic_functions)) if (const auto * child_copy = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes))
node_copy.children.push_back(child_copy); node_copy.children.push_back(child_copy);
/// Expression like (now_allowed AND allowed) is not allowed if allow_non_deterministic_functions = true. This is important for
/// trivial count optimization, otherwise we can get incorrect results. For example, if the query is
/// SELECT count() FROM table WHERE _partition_id = '0' AND rowNumberInBlock() = 1, we cannot apply
/// trivial count.
else if (!allow_non_deterministic_functions)
return nullptr;
if (node_copy.children.empty()) if (node_copy.children.empty())
return nullptr; return nullptr;
@ -318,7 +311,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
{ {
auto & node_copy = additional_nodes.emplace_back(*node); auto & node_copy = additional_nodes.emplace_back(*node);
for (auto & child : node_copy.children) for (auto & child : node_copy.children)
if (child = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes, allow_non_deterministic_functions); !child) if (child = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes); !child)
return nullptr; return nullptr;
return &node_copy; return &node_copy;
@ -332,7 +325,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
auto index_hint_dag = index_hint->getActions().clone(); auto index_hint_dag = index_hint->getActions().clone();
ActionsDAG::NodeRawConstPtrs atoms; ActionsDAG::NodeRawConstPtrs atoms;
for (const auto & output : index_hint_dag.getOutputs()) for (const auto & output : index_hint_dag.getOutputs())
if (const auto * child_copy = splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes, allow_non_deterministic_functions)) if (const auto * child_copy = splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes))
atoms.push_back(child_copy); atoms.push_back(child_copy);
if (!atoms.empty()) if (!atoms.empty())
@ -366,13 +359,13 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
return node; return node;
} }
std::optional<ActionsDAG> splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs, bool allow_non_deterministic_functions) std::optional<ActionsDAG> splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs)
{ {
if (!predicate) if (!predicate)
return {}; return {};
ActionsDAG::Nodes additional_nodes; ActionsDAG::Nodes additional_nodes;
const auto * res = splitFilterNodeForAllowedInputs(predicate, allowed_inputs, additional_nodes, allow_non_deterministic_functions); const auto * res = splitFilterNodeForAllowedInputs(predicate, allowed_inputs, additional_nodes);
if (!res) if (!res)
return {}; return {};
@ -381,7 +374,7 @@ std::optional<ActionsDAG> splitFilterDagForAllowedInputs(const ActionsDAG::Node
void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context) void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context)
{ {
auto dag = splitFilterDagForAllowedInputs(predicate, &block, /*allow_non_deterministic_functions=*/ false); auto dag = splitFilterDagForAllowedInputs(predicate, &block);
if (dag) if (dag)
filterBlockWithExpression(buildFilterExpression(std::move(*dag), context), block); filterBlockWithExpression(buildFilterExpression(std::move(*dag), context), block);
} }

View File

@ -18,8 +18,16 @@ class NamesAndTypesList;
namespace VirtualColumnUtils namespace VirtualColumnUtils
{ {
/// Similar to filterBlockWithQuery, but uses ActionsDAG as a predicate. /// The filtering functions are tricky to use correctly.
/// Basically it is filterBlockWithDAG(splitFilterDagForAllowedInputs). /// There are 2 ways:
/// 1. Call filterBlockWithPredicate() or filterBlockWithExpression() inside SourceStepWithFilter::applyFilters().
/// 2. Call splitFilterDagForAllowedInputs() and buildSetsForDAG() inside SourceStepWithFilter::applyFilters().
/// Then call filterBlockWithPredicate() or filterBlockWithExpression() in initializePipeline().
///
/// Otherwise calling filter*() outside applyFilters() will throw "Not-ready Set is passed"
/// if there are subqueries.
/// Similar to filterBlockWithExpression(buildFilterExpression(splitFilterDagForAllowedInputs(...))).
void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context); void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context);
/// Just filters block. Block should contain all the required columns. /// Just filters block. Block should contain all the required columns.
@ -33,15 +41,7 @@ void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context);
bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node); bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node);
/// Extract a part of predicate that can be evaluated using only columns from input_names. /// Extract a part of predicate that can be evaluated using only columns from input_names.
/// When allow_non_deterministic_functions is true then even if the predicate contains non-deterministic std::optional<ActionsDAG> splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs);
/// functions, we still allow to extract a part of the predicate, otherwise we return nullptr.
/// allow_non_deterministic_functions must be false when we are going to use the result to filter parts in
/// MergeTreeData::totalRowsByPartitionPredicateImp. For example, if the query is
/// `SELECT count() FROM table WHERE _partition_id = '0' AND rowNumberInBlock() = 1`
/// The predicate will be `_partition_id = '0' AND rowNumberInBlock() = 1`, and `rowNumberInBlock()` is
/// non-deterministic. If we still extract the part `_partition_id = '0'` for filtering parts, then trivial
/// count optimization will be mistakenly applied to the query.
std::optional<ActionsDAG> splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs, bool allow_non_deterministic_functions = true);
/// Extract from the input stream a set of `name` column values /// Extract from the input stream a set of `name` column values
template <typename T> template <typename T>

View File

@ -8,7 +8,6 @@ from pathlib import Path
from typing import List, Sequence, Tuple from typing import List, Sequence, Tuple
from ci_config import CI from ci_config import CI
from ci_utils import normalize_string
from env_helper import TEMP_PATH from env_helper import TEMP_PATH
from functional_test_check import NO_CHANGES_MSG from functional_test_check import NO_CHANGES_MSG
from report import ( from report import (
@ -142,7 +141,9 @@ def main():
for file in set(jr.additional_files): for file in set(jr.additional_files):
file_ = Path(file) file_ = Path(file)
file_name = file_.name file_name = file_.name
file_name = file_name.replace(".", "__" + normalize_string(job_id) + ".", 1) file_name = file_name.replace(
".", "__" + CI.Utils.normalize_string(job_id) + ".", 1
)
file_ = file_.rename(file_.parent / file_name) file_ = file_.rename(file_.parent / file_name)
additional_files.append(file_) additional_files.append(file_)

View File

@ -16,7 +16,7 @@ import upload_result_helper
from build_check import get_release_or_pr from build_check import get_release_or_pr
from ci_config import CI from ci_config import CI
from ci_metadata import CiMetadata from ci_metadata import CiMetadata
from ci_utils import GH, normalize_string, Utils from ci_utils import GH, Utils
from clickhouse_helper import ( from clickhouse_helper import (
CiLogsCredentials, CiLogsCredentials,
ClickHouseHelper, ClickHouseHelper,
@ -296,7 +296,7 @@ def _pre_action(s3, job_name, batch, indata, pr_info):
# do not set report prefix for scheduled or dispatched wf (in case it started from feature branch while # do not set report prefix for scheduled or dispatched wf (in case it started from feature branch while
# testing), otherwise reports won't be found # testing), otherwise reports won't be found
if not (pr_info.is_scheduled or pr_info.is_dispatched): if not (pr_info.is_scheduled or pr_info.is_dispatched):
report_prefix = normalize_string(pr_info.head_ref) report_prefix = Utils.normalize_string(pr_info.head_ref)
print( print(
f"Use report prefix [{report_prefix}], pr_num [{pr_info.number}], head_ref [{pr_info.head_ref}]" f"Use report prefix [{report_prefix}], pr_num [{pr_info.number}], head_ref [{pr_info.head_ref}]"
) )
@ -718,7 +718,7 @@ def _upload_build_artifacts(
( (
get_release_or_pr(pr_info, get_version_from_repo())[1], get_release_or_pr(pr_info, get_version_from_repo())[1],
pr_info.sha, pr_info.sha,
normalize_string(build_name), Utils.normalize_string(build_name),
"performance.tar.zst", "performance.tar.zst",
) )
) )
@ -1250,7 +1250,7 @@ def main() -> int:
( (
get_release_or_pr(pr_info, get_version_from_repo())[0], get_release_or_pr(pr_info, get_version_from_repo())[0],
pr_info.sha, pr_info.sha,
normalize_string( Utils.normalize_string(
job_report.check_name or _get_ext_check_name(args.job_name) job_report.check_name or _get_ext_check_name(args.job_name)
), ),
) )

View File

@ -7,7 +7,7 @@ from typing import Dict, Optional, Any, Union, Sequence, List, Set
from ci_config import CI from ci_config import CI
from ci_utils import is_hex, GH from ci_utils import Utils, GH
from commit_status_helper import CommitStatusData from commit_status_helper import CommitStatusData
from env_helper import ( from env_helper import (
TEMP_PATH, TEMP_PATH,
@ -240,7 +240,7 @@ class CiCache:
int(job_properties[-1]), int(job_properties[-1]),
) )
if not is_hex(job_digest): if not Utils.is_hex(job_digest):
print("ERROR: wrong record job digest") print("ERROR: wrong record job digest")
return None return None

View File

@ -3,7 +3,7 @@ import re
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
from typing import Dict, Optional, List from typing import Dict, Optional, List
from ci_utils import normalize_string from ci_utils import Utils
from ci_definitions import * from ci_definitions import *
@ -13,7 +13,6 @@ class CI:
each config item in the below dicts should be an instance of JobConfig class or inherited from it each config item in the below dicts should be an instance of JobConfig class or inherited from it
""" """
MAX_TOTAL_FAILURES_BEFORE_BLOCKING_CI = 5
MAX_TOTAL_FAILURES_PER_JOB_BEFORE_BLOCKING_CI = 2 MAX_TOTAL_FAILURES_PER_JOB_BEFORE_BLOCKING_CI = 2
# reimport types to CI class so that they visible as CI.* and mypy is happy # reimport types to CI class so that they visible as CI.* and mypy is happy
@ -21,12 +20,10 @@ class CI:
from ci_definitions import BuildConfig as BuildConfig from ci_definitions import BuildConfig as BuildConfig
from ci_definitions import DigestConfig as DigestConfig from ci_definitions import DigestConfig as DigestConfig
from ci_definitions import JobConfig as JobConfig from ci_definitions import JobConfig as JobConfig
from ci_definitions import CheckDescription as CheckDescription
from ci_definitions import Tags as Tags from ci_definitions import Tags as Tags
from ci_definitions import JobNames as JobNames from ci_definitions import JobNames as JobNames
from ci_definitions import BuildNames as BuildNames from ci_definitions import BuildNames as BuildNames
from ci_definitions import StatusNames as StatusNames from ci_definitions import StatusNames as StatusNames
from ci_definitions import CHECK_DESCRIPTIONS as CHECK_DESCRIPTIONS
from ci_definitions import REQUIRED_CHECKS as REQUIRED_CHECKS from ci_definitions import REQUIRED_CHECKS as REQUIRED_CHECKS
from ci_definitions import SyncState as SyncState from ci_definitions import SyncState as SyncState
from ci_definitions import MQ_JOBS as MQ_JOBS from ci_definitions import MQ_JOBS as MQ_JOBS
@ -37,9 +34,7 @@ class CI:
from ci_utils import GH as GH from ci_utils import GH as GH
from ci_utils import Shell as Shell from ci_utils import Shell as Shell
from ci_definitions import Labels as Labels from ci_definitions import Labels as Labels
from ci_definitions import TRUSTED_CONTRIBUTORS as TRUSTED_CONTRIBUTORS
from ci_definitions import WorkFlowNames as WorkFlowNames from ci_definitions import WorkFlowNames as WorkFlowNames
from ci_utils import CATEGORY_TO_LABEL as CATEGORY_TO_LABEL
# Jobs that run for doc related updates # Jobs that run for doc related updates
_DOCS_CHECK_JOBS = [JobNames.DOCS_CHECK, JobNames.STYLE_CHECK] _DOCS_CHECK_JOBS = [JobNames.DOCS_CHECK, JobNames.STYLE_CHECK]
@ -558,7 +553,7 @@ class CI:
@classmethod @classmethod
def get_tag_config(cls, label_name: str) -> Optional[LabelConfig]: def get_tag_config(cls, label_name: str) -> Optional[LabelConfig]:
for label, config in cls.TAG_CONFIGS.items(): for label, config in cls.TAG_CONFIGS.items():
if normalize_string(label_name) == normalize_string(label): if Utils.normalize_string(label_name) == Utils.normalize_string(label):
return config return config
return None return None
@ -687,6 +682,34 @@ class CI:
assert res, f"not a build [{build_name}] or invalid JobConfig" assert res, f"not a build [{build_name}] or invalid JobConfig"
return res return res
@classmethod
def is_workflow_ok(cls) -> bool:
# TODO: temporary method to make Mergeable check working
res = cls.GH.get_workflow_results()
if not res:
print("ERROR: no workflow results found")
return False
for workflow_job, workflow_data in res.items():
status = workflow_data["result"]
if status in (
cls.GH.ActionStatuses.SUCCESS,
cls.GH.ActionStatuses.SKIPPED,
):
print(f"Workflow status for [{workflow_job}] is [{status}] - continue")
elif status in (cls.GH.ActionStatuses.FAILURE,):
if workflow_job in (
WorkflowStages.TESTS_2,
WorkflowStages.TESTS_2_WW,
):
print(
f"Failed Workflow status for [{workflow_job}], it's not required - continue"
)
continue
print(f"Failed Workflow status for [{workflow_job}]")
return False
return True
if __name__ == "__main__": if __name__ == "__main__":
parser = ArgumentParser( parser = ArgumentParser(

View File

@ -1,7 +1,7 @@
import copy import copy
from dataclasses import dataclass, field from dataclasses import dataclass, field
from pathlib import Path from pathlib import Path
from typing import Callable, List, Union, Iterable, Optional, Literal, Any from typing import List, Union, Iterable, Optional, Literal, Any
from ci_utils import WithIter from ci_utils import WithIter
from integration_test_images import IMAGES from integration_test_images import IMAGES
@ -32,28 +32,6 @@ class Labels:
AUTO_BACKPORT = {"pr-critical-bugfix"} AUTO_BACKPORT = {"pr-critical-bugfix"}
TRUSTED_CONTRIBUTORS = {
e.lower()
for e in [
"amosbird",
"azat", # SEMRush
"bharatnc", # Many contributions.
"cwurm", # ClickHouse, Inc
"den-crane", # Documentation contributor
"ildus", # adjust, ex-pgpro
"nvartolomei", # Seasoned contributor, CloudFlare
"taiyang-li",
"ucasFL", # Amos Bird's friend
"thomoco", # ClickHouse, Inc
"tonickkozlov", # Cloudflare
"tylerhannan", # ClickHouse, Inc
"tsolodov", # ClickHouse, Inc
"justindeguzman", # ClickHouse, Inc
"XuJia0210", # ClickHouse, Inc
]
}
class WorkflowStages(metaclass=WithIter): class WorkflowStages(metaclass=WithIter):
""" """
Stages of GitHUb actions workflow Stages of GitHUb actions workflow
@ -668,188 +646,3 @@ MQ_JOBS = [
BuildNames.BINARY_RELEASE, BuildNames.BINARY_RELEASE,
JobNames.UNIT_TEST, JobNames.UNIT_TEST,
] ]
@dataclass
class CheckDescription:
name: str
description: str # the check descriptions, will be put into the status table
match_func: Callable[[str], bool] # the function to check vs the commit status
def __hash__(self) -> int:
return hash(self.name + self.description)
CHECK_DESCRIPTIONS = [
CheckDescription(
StatusNames.PR_CHECK,
"Checks correctness of the PR's body",
lambda x: x == "PR Check",
),
CheckDescription(
StatusNames.SYNC,
"If it fails, ask a maintainer for help",
lambda x: x == StatusNames.SYNC,
),
CheckDescription(
"AST fuzzer",
"Runs randomly generated queries to catch program errors. "
"The build type is optionally given in parenthesis. "
"If it fails, ask a maintainer for help",
lambda x: x.startswith("AST fuzzer"),
),
CheckDescription(
JobNames.BUGFIX_VALIDATE,
"Checks that either a new test (functional or integration) or there "
"some changed tests that fail with the binary built on master branch",
lambda x: x == JobNames.BUGFIX_VALIDATE,
),
CheckDescription(
StatusNames.CI,
"A meta-check that indicates the running CI. Normally, it's in <b>success</b> or "
"<b>pending</b> state. The failed status indicates some problems with the PR",
lambda x: x == "CI running",
),
CheckDescription(
"Builds",
"Builds ClickHouse in various configurations for use in further steps. "
"You have to fix the builds that fail. Build logs often has enough "
"information to fix the error, but you might have to reproduce the failure "
"locally. The <b>cmake</b> options can be found in the build log, grepping for "
'<b>cmake</b>. Use these options and follow the <a href="'
'https://clickhouse.com/docs/en/development/build">general build process</a>',
lambda x: x.startswith("ClickHouse") and x.endswith("build check"),
),
CheckDescription(
"Compatibility check",
"Checks that <b>clickhouse</b> binary runs on distributions with old libc "
"versions. If it fails, ask a maintainer for help",
lambda x: x.startswith("Compatibility check"),
),
CheckDescription(
JobNames.DOCKER_SERVER,
"The check to build and optionally push the mentioned image to docker hub",
lambda x: x.startswith("Docker server"),
),
CheckDescription(
JobNames.DOCKER_KEEPER,
"The check to build and optionally push the mentioned image to docker hub",
lambda x: x.startswith("Docker keeper"),
),
CheckDescription(
JobNames.DOCS_CHECK,
"Builds and tests the documentation",
lambda x: x == JobNames.DOCS_CHECK,
),
CheckDescription(
JobNames.FAST_TEST,
"Normally this is the first check that is ran for a PR. It builds ClickHouse "
'and runs most of <a href="https://clickhouse.com/docs/en/development/tests'
'#functional-tests">stateless functional tests</a>, '
"omitting some. If it fails, further checks are not started until it is fixed. "
"Look at the report to see which tests fail, then reproduce the failure "
'locally as described <a href="https://clickhouse.com/docs/en/development/'
'tests#functional-test-locally">here</a>',
lambda x: x == JobNames.FAST_TEST,
),
CheckDescription(
"Flaky tests",
"Checks if new added or modified tests are flaky by running them repeatedly, "
"in parallel, with more randomization. Functional tests are run 100 times "
"with address sanitizer, and additional randomization of thread scheduling. "
"Integration tests are run up to 10 times. If at least once a new test has "
"failed, or was too long, this check will be red. We don't allow flaky tests, "
'read <a href="https://clickhouse.com/blog/decorating-a-christmas-tree-with-'
'the-help-of-flaky-tests/">the doc</a>',
lambda x: "tests flaky check" in x,
),
CheckDescription(
"Install packages",
"Checks that the built packages are installable in a clear environment",
lambda x: x.startswith("Install packages ("),
),
CheckDescription(
"Integration tests",
"The integration tests report. In parenthesis the package type is given, "
"and in square brackets are the optional part/total tests",
lambda x: x.startswith("Integration tests ("),
),
CheckDescription(
StatusNames.MERGEABLE,
"Checks if all other necessary checks are successful",
lambda x: x == StatusNames.MERGEABLE,
),
CheckDescription(
"Performance Comparison",
"Measure changes in query performance. The performance test report is "
'described in detail <a href="https://github.com/ClickHouse/ClickHouse/tree'
'/master/docker/test/performance-comparison#how-to-read-the-report">here</a>. '
"In square brackets are the optional part/total tests",
lambda x: x.startswith("Performance Comparison"),
),
CheckDescription(
"Push to Dockerhub",
"The check for building and pushing the CI related docker images to docker hub",
lambda x: x.startswith("Push") and "to Dockerhub" in x,
),
CheckDescription(
"Sqllogic",
"Run clickhouse on the "
'<a href="https://www.sqlite.org/sqllogictest">sqllogic</a> '
"test set against sqlite and checks that all statements are passed",
lambda x: x.startswith("Sqllogic test"),
),
CheckDescription(
"SQLancer",
"Fuzzing tests that detect logical bugs with "
'<a href="https://github.com/sqlancer/sqlancer">SQLancer</a> tool',
lambda x: x.startswith("SQLancer"),
),
CheckDescription(
"Stateful tests",
"Runs stateful functional tests for ClickHouse binaries built in various "
"configurations -- release, debug, with sanitizers, etc",
lambda x: x.startswith("Stateful tests ("),
),
CheckDescription(
"Stateless tests",
"Runs stateless functional tests for ClickHouse binaries built in various "
"configurations -- release, debug, with sanitizers, etc",
lambda x: x.startswith("Stateless tests ("),
),
CheckDescription(
"Stress test",
"Runs stateless functional tests concurrently from several clients to detect "
"concurrency-related errors",
lambda x: x.startswith("Stress test ("),
),
CheckDescription(
JobNames.STYLE_CHECK,
"Runs a set of checks to keep the code style clean. If some of tests failed, "
"see the related log from the report",
lambda x: x == JobNames.STYLE_CHECK,
),
CheckDescription(
"Unit tests",
"Runs the unit tests for different release types",
lambda x: x.startswith("Unit tests ("),
),
CheckDescription(
"Upgrade check",
"Runs stress tests on server version from last release and then tries to "
"upgrade it to the version from the PR. It checks if the new server can "
"successfully startup without any errors, crashes or sanitizer asserts",
lambda x: x.startswith("Upgrade check ("),
),
CheckDescription(
"ClickBench",
"Runs [ClickBench](https://github.com/ClickHouse/ClickBench/) with instant-attach table",
lambda x: x.startswith("ClickBench"),
),
CheckDescription(
"Fallback for unknown",
"There's no description for the check yet, please add it to "
"tests/ci/ci_config.py:CHECK_DESCRIPTIONS",
lambda x: True,
),
]

View File

@ -2,7 +2,6 @@ import re
from dataclasses import dataclass, asdict from dataclasses import dataclass, asdict
from typing import Optional, List, Dict, Any, Iterable from typing import Optional, List, Dict, Any, Iterable
from ci_utils import normalize_string
from ci_config import CI from ci_config import CI
from git_helper import Runner as GitRunner, GIT_PREFIX from git_helper import Runner as GitRunner, GIT_PREFIX
from pr_info import PRInfo from pr_info import PRInfo
@ -89,14 +88,14 @@ class CiSettings:
if not res.include_keywords: if not res.include_keywords:
res.include_keywords = [] res.include_keywords = []
res.include_keywords.append( res.include_keywords.append(
normalize_string(match.removeprefix("ci_include_")) CI.Utils.normalize_string(match.removeprefix("ci_include_"))
) )
elif match.startswith("ci_exclude_"): elif match.startswith("ci_exclude_"):
if not res.exclude_keywords: if not res.exclude_keywords:
res.exclude_keywords = [] res.exclude_keywords = []
keywords = match.removeprefix("ci_exclude_").split("|") keywords = match.removeprefix("ci_exclude_").split("|")
res.exclude_keywords += [ res.exclude_keywords += [
normalize_string(keyword) for keyword in keywords CI.Utils.normalize_string(keyword) for keyword in keywords
] ]
elif match == CI.Tags.NO_CI_CACHE: elif match == CI.Tags.NO_CI_CACHE:
res.no_ci_cache = True res.no_ci_cache = True
@ -163,7 +162,7 @@ class CiSettings:
# do not exclude builds # do not exclude builds
if self.exclude_keywords and not CI.is_build_job(job): if self.exclude_keywords and not CI.is_build_job(job):
for keyword in self.exclude_keywords: for keyword in self.exclude_keywords:
if keyword in normalize_string(job): if keyword in CI.Utils.normalize_string(job):
print(f"Job [{job}] matches Exclude keyword [{keyword}] - deny") print(f"Job [{job}] matches Exclude keyword [{keyword}] - deny")
return False return False
@ -174,7 +173,7 @@ class CiSettings:
# never exclude Style Check by include keywords # never exclude Style Check by include keywords
return True return True
for keyword in self.include_keywords: for keyword in self.include_keywords:
if keyword in normalize_string(job): if keyword in CI.Utils.normalize_string(job):
print(f"Job [{job}] matches Include keyword [{keyword}] - pass") print(f"Job [{job}] matches Include keyword [{keyword}] - pass")
return True return True
to_deny = True to_deny = True

View File

@ -6,7 +6,7 @@ import sys
import time import time
from contextlib import contextmanager from contextlib import contextmanager
from pathlib import Path from pathlib import Path
from typing import Any, Iterator, List, Union, Optional, Sequence, Tuple from typing import Any, Iterator, List, Union, Optional, Sequence
import requests import requests
@ -20,41 +20,6 @@ class Envs:
GITHUB_WORKFLOW = os.getenv("GITHUB_WORKFLOW", "") GITHUB_WORKFLOW = os.getenv("GITHUB_WORKFLOW", "")
LABEL_CATEGORIES = {
"pr-backward-incompatible": ["Backward Incompatible Change"],
"pr-bugfix": [
"Bug Fix",
"Bug Fix (user-visible misbehavior in an official stable release)",
"Bug Fix (user-visible misbehaviour in official stable or prestable release)",
"Bug Fix (user-visible misbehavior in official stable or prestable release)",
],
"pr-critical-bugfix": ["Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)"],
"pr-build": [
"Build/Testing/Packaging Improvement",
"Build Improvement",
"Build/Testing Improvement",
"Build",
"Packaging Improvement",
],
"pr-documentation": [
"Documentation (changelog entry is not required)",
"Documentation",
],
"pr-feature": ["New Feature"],
"pr-improvement": ["Improvement"],
"pr-not-for-changelog": [
"Not for changelog (changelog entry is not required)",
"Not for changelog",
],
"pr-performance": ["Performance Improvement"],
"pr-ci": ["CI Fix or Improvement (changelog entry is not required)"],
}
CATEGORY_TO_LABEL = {
c: lb for lb, categories in LABEL_CATEGORIES.items() for c in categories
}
class WithIter(type): class WithIter(type):
def __iter__(cls): def __iter__(cls):
return (v for k, v in cls.__dict__.items() if not k.startswith("_")) return (v for k, v in cls.__dict__.items() if not k.startswith("_"))
@ -70,21 +35,6 @@ def cd(path: Union[Path, str]) -> Iterator[None]:
os.chdir(oldpwd) os.chdir(oldpwd)
def is_hex(s):
try:
int(s, 16)
return True
except ValueError:
return False
def normalize_string(string: str) -> str:
res = string.lower()
for r in ((" ", "_"), ("(", "_"), (")", "_"), (",", "_"), ("/", "_"), ("-", "_")):
res = res.replace(*r)
return res
class GH: class GH:
class ActionsNames: class ActionsNames:
RunConfig = "RunConfig" RunConfig = "RunConfig"
@ -94,9 +44,10 @@ class GH:
FAILURE = "failure" FAILURE = "failure"
PENDING = "pending" PENDING = "pending"
SUCCESS = "success" SUCCESS = "success"
SKIPPED = "skipped"
@classmethod @classmethod
def _get_workflow_results(cls): def get_workflow_results(cls):
if not Path(Envs.WORKFLOW_RESULT_FILE).exists(): if not Path(Envs.WORKFLOW_RESULT_FILE).exists():
print( print(
f"ERROR: Failed to get workflow results from file [{Envs.WORKFLOW_RESULT_FILE}]" f"ERROR: Failed to get workflow results from file [{Envs.WORKFLOW_RESULT_FILE}]"
@ -115,13 +66,13 @@ class GH:
@classmethod @classmethod
def print_workflow_results(cls): def print_workflow_results(cls):
res = cls._get_workflow_results() res = cls.get_workflow_results()
results = [f"{job}: {data['result']}" for job, data in res.items()] results = [f"{job}: {data['result']}" for job, data in res.items()]
cls.print_in_group("Workflow results", results) cls.print_in_group("Workflow results", results)
@classmethod @classmethod
def is_workflow_ok(cls) -> bool: def is_workflow_ok(cls) -> bool:
res = cls._get_workflow_results() res = cls.get_workflow_results()
for _job, data in res.items(): for _job, data in res.items():
if data["result"] == "failure": if data["result"] == "failure":
return False return False
@ -129,7 +80,7 @@ class GH:
@classmethod @classmethod
def get_workflow_job_result(cls, wf_job_name: str) -> Optional[str]: def get_workflow_job_result(cls, wf_job_name: str) -> Optional[str]:
res = cls._get_workflow_results() res = cls.get_workflow_results()
if wf_job_name in res: if wf_job_name in res:
return res[wf_job_name]["result"] # type: ignore return res[wf_job_name]["result"] # type: ignore
else: else:
@ -149,8 +100,8 @@ class GH:
) -> str: ) -> str:
assert len(token) == 40 assert len(token) == 40
assert len(commit_sha) == 40 assert len(commit_sha) == 40
assert is_hex(commit_sha) assert Utils.is_hex(commit_sha)
assert not is_hex(token) assert not Utils.is_hex(token)
url = f"https://api.github.com/repos/{Envs.GITHUB_REPOSITORY}/commits/{commit_sha}/statuses?per_page={200}" url = f"https://api.github.com/repos/{Envs.GITHUB_REPOSITORY}/commits/{commit_sha}/statuses?per_page={200}"
headers = { headers = {
"Authorization": f"token {token}", "Authorization": f"token {token}",
@ -298,79 +249,23 @@ class Utils:
Shell.check("sudo dmesg --clear", verbose=True) Shell.check("sudo dmesg --clear", verbose=True)
@staticmethod @staticmethod
def check_pr_description(pr_body: str, repo_name: str) -> Tuple[str, str]: def is_hex(s):
"""The function checks the body to being properly formatted according to try:
.github/PULL_REQUEST_TEMPLATE.md, if the first returned string is not empty, int(s, 16)
then there is an error.""" return True
lines = list(map(lambda x: x.strip(), pr_body.split("\n") if pr_body else [])) except ValueError:
lines = [re.sub(r"\s+", " ", line) for line in lines] return False
# Check if body contains "Reverts ClickHouse/ClickHouse#36337" @staticmethod
if [ def normalize_string(string: str) -> str:
True for line in lines if re.match(rf"\AReverts {repo_name}#[\d]+\Z", line) res = string.lower()
]: for r in (
return "", LABEL_CATEGORIES["pr-not-for-changelog"][0] (" ", "_"),
("(", "_"),
category = "" (")", "_"),
entry = "" (",", "_"),
description_error = "" ("/", "_"),
("-", "_"),
i = 0
while i < len(lines):
if re.match(r"(?i)^[#>*_ ]*change\s*log\s*category", lines[i]):
i += 1
if i >= len(lines):
break
# Can have one empty line between header and the category
# itself. Filter it out.
if not lines[i]:
i += 1
if i >= len(lines):
break
category = re.sub(r"^[-*\s]*", "", lines[i])
i += 1
# Should not have more than one category. Require empty line
# after the first found category.
if i >= len(lines):
break
if lines[i]:
second_category = re.sub(r"^[-*\s]*", "", lines[i])
description_error = (
"More than one changelog category specified: "
f"'{category}', '{second_category}'"
)
return description_error, category
elif re.match(
r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
): ):
i += 1 res = res.replace(*r)
# Can have one empty line between header and the entry itself. return res
# Filter it out.
if i < len(lines) and not lines[i]:
i += 1
# All following lines until empty one are the changelog entry.
entry_lines = []
while i < len(lines) and lines[i]:
entry_lines.append(lines[i])
i += 1
entry = " ".join(entry_lines)
# Don't accept changelog entries like '...'.
entry = re.sub(r"[#>*_.\- ]", "", entry)
# Don't accept changelog entries like 'Close #12345'.
entry = re.sub(r"^[\w\-\s]{0,10}#?\d{5,6}\.?$", "", entry)
else:
i += 1
if not category:
description_error = "Changelog category is empty"
# Filter out the PR categories that are not for changelog.
elif "(changelog entry is not required)" in category:
pass # to not check the rest of the conditions
elif category not in CATEGORY_TO_LABEL:
description_error, category = f"Category '{category}' is not valid", ""
elif not entry:
description_error = f"Changelog entry required for category '{category}'"
return description_error, category

View File

@ -7,7 +7,7 @@ import time
from collections import defaultdict from collections import defaultdict
from dataclasses import asdict, dataclass from dataclasses import asdict, dataclass
from pathlib import Path from pathlib import Path
from typing import Dict, List, Optional, Union from typing import Dict, List, Optional, Union, Callable
from github import Github from github import Github
from github.Commit import Commit from github.Commit import Commit
@ -176,7 +176,7 @@ def set_status_comment(commit: Commit, pr_info: PRInfo) -> None:
if not [status for status in statuses if status.context == CI.StatusNames.CI]: if not [status for status in statuses if status.context == CI.StatusNames.CI]:
# This is the case, when some statuses already exist for the check, # This is the case, when some statuses already exist for the check,
# but not the StatusNames.CI. We should create it as pending. # but not the CI.StatusNames.CI. We should create it as pending.
# W/o pr_info to avoid recursion, and yes, one extra create_ci_report # W/o pr_info to avoid recursion, and yes, one extra create_ci_report
post_commit_status( post_commit_status(
commit, commit,
@ -226,20 +226,20 @@ def generate_status_comment(pr_info: PRInfo, statuses: CommitStatuses) -> str:
f"\n" f"\n"
) )
# group checks by the name to get the worst one per each # group checks by the name to get the worst one per each
grouped_statuses = {} # type: Dict[CI.CheckDescription, CommitStatuses] grouped_statuses = {} # type: Dict[CheckDescription, CommitStatuses]
for status in statuses: for status in statuses:
cd = None cd = None
for c in CI.CHECK_DESCRIPTIONS: for c in CHECK_DESCRIPTIONS:
if c.match_func(status.context): if c.match_func(status.context):
cd = c cd = c
break break
if cd is None or cd == CI.CHECK_DESCRIPTIONS[-1]: if cd is None or cd == CHECK_DESCRIPTIONS[-1]:
# This is the case for either non-found description or a fallback # This is the case for either non-found description or a fallback
cd = CI.CheckDescription( cd = CheckDescription(
status.context, status.context,
CI.CHECK_DESCRIPTIONS[-1].description, CHECK_DESCRIPTIONS[-1].description,
CI.CHECK_DESCRIPTIONS[-1].match_func, CHECK_DESCRIPTIONS[-1].match_func,
) )
if cd in grouped_statuses: if cd in grouped_statuses:
@ -459,7 +459,7 @@ def trigger_mergeable_check(
set_from_sync: bool = False, set_from_sync: bool = False,
workflow_failed: bool = False, workflow_failed: bool = False,
) -> StatusType: ) -> StatusType:
"""calculate and update StatusNames.MERGEABLE""" """calculate and update CI.StatusNames.MERGEABLE"""
required_checks = [status for status in statuses if CI.is_required(status.context)] required_checks = [status for status in statuses if CI.is_required(status.context)]
mergeable_status = None mergeable_status = None
@ -536,3 +536,188 @@ def update_upstream_sync_status(
get_commit_filtered_statuses(last_synced_upstream_commit), get_commit_filtered_statuses(last_synced_upstream_commit),
set_from_sync=True, set_from_sync=True,
) )
@dataclass
class CheckDescription:
name: str
description: str # the check descriptions, will be put into the status table
match_func: Callable[[str], bool] # the function to check vs the commit status
def __hash__(self) -> int:
return hash(self.name + self.description)
CHECK_DESCRIPTIONS = [
CheckDescription(
CI.StatusNames.PR_CHECK,
"Checks correctness of the PR's body",
lambda x: x == "PR Check",
),
CheckDescription(
CI.StatusNames.SYNC,
"If it fails, ask a maintainer for help",
lambda x: x == CI.StatusNames.SYNC,
),
CheckDescription(
"AST fuzzer",
"Runs randomly generated queries to catch program errors. "
"The build type is optionally given in parenthesis. "
"If it fails, ask a maintainer for help",
lambda x: x.startswith("AST fuzzer"),
),
CheckDescription(
CI.JobNames.BUGFIX_VALIDATE,
"Checks that either a new test (functional or integration) or there "
"some changed tests that fail with the binary built on master branch",
lambda x: x == CI.JobNames.BUGFIX_VALIDATE,
),
CheckDescription(
CI.StatusNames.CI,
"A meta-check that indicates the running CI. Normally, it's in <b>success</b> or "
"<b>pending</b> state. The failed status indicates some problems with the PR",
lambda x: x == "CI running",
),
CheckDescription(
"Builds",
"Builds ClickHouse in various configurations for use in further steps. "
"You have to fix the builds that fail. Build logs often has enough "
"information to fix the error, but you might have to reproduce the failure "
"locally. The <b>cmake</b> options can be found in the build log, grepping for "
'<b>cmake</b>. Use these options and follow the <a href="'
'https://clickhouse.com/docs/en/development/build">general build process</a>',
lambda x: x.startswith("ClickHouse") and x.endswith("build check"),
),
CheckDescription(
"Compatibility check",
"Checks that <b>clickhouse</b> binary runs on distributions with old libc "
"versions. If it fails, ask a maintainer for help",
lambda x: x.startswith("Compatibility check"),
),
CheckDescription(
CI.JobNames.DOCKER_SERVER,
"The check to build and optionally push the mentioned image to docker hub",
lambda x: x.startswith("Docker server"),
),
CheckDescription(
CI.JobNames.DOCKER_KEEPER,
"The check to build and optionally push the mentioned image to docker hub",
lambda x: x.startswith("Docker keeper"),
),
CheckDescription(
CI.JobNames.DOCS_CHECK,
"Builds and tests the documentation",
lambda x: x == CI.JobNames.DOCS_CHECK,
),
CheckDescription(
CI.JobNames.FAST_TEST,
"Normally this is the first check that is ran for a PR. It builds ClickHouse "
'and runs most of <a href="https://clickhouse.com/docs/en/development/tests'
'#functional-tests">stateless functional tests</a>, '
"omitting some. If it fails, further checks are not started until it is fixed. "
"Look at the report to see which tests fail, then reproduce the failure "
'locally as described <a href="https://clickhouse.com/docs/en/development/'
'tests#functional-test-locally">here</a>',
lambda x: x == CI.JobNames.FAST_TEST,
),
CheckDescription(
"Flaky tests",
"Checks if new added or modified tests are flaky by running them repeatedly, "
"in parallel, with more randomization. Functional tests are run 100 times "
"with address sanitizer, and additional randomization of thread scheduling. "
"Integration tests are run up to 10 times. If at least once a new test has "
"failed, or was too long, this check will be red. We don't allow flaky tests, "
'read <a href="https://clickhouse.com/blog/decorating-a-christmas-tree-with-'
'the-help-of-flaky-tests/">the doc</a>',
lambda x: "tests flaky check" in x,
),
CheckDescription(
"Install packages",
"Checks that the built packages are installable in a clear environment",
lambda x: x.startswith("Install packages ("),
),
CheckDescription(
"Integration tests",
"The integration tests report. In parenthesis the package type is given, "
"and in square brackets are the optional part/total tests",
lambda x: x.startswith("Integration tests ("),
),
CheckDescription(
CI.StatusNames.MERGEABLE,
"Checks if all other necessary checks are successful",
lambda x: x == CI.StatusNames.MERGEABLE,
),
CheckDescription(
"Performance Comparison",
"Measure changes in query performance. The performance test report is "
'described in detail <a href="https://github.com/ClickHouse/ClickHouse/tree'
'/master/docker/test/performance-comparison#how-to-read-the-report">here</a>. '
"In square brackets are the optional part/total tests",
lambda x: x.startswith("Performance Comparison"),
),
CheckDescription(
"Push to Dockerhub",
"The check for building and pushing the CI related docker images to docker hub",
lambda x: x.startswith("Push") and "to Dockerhub" in x,
),
CheckDescription(
"Sqllogic",
"Run clickhouse on the "
'<a href="https://www.sqlite.org/sqllogictest">sqllogic</a> '
"test set against sqlite and checks that all statements are passed",
lambda x: x.startswith("Sqllogic test"),
),
CheckDescription(
"SQLancer",
"Fuzzing tests that detect logical bugs with "
'<a href="https://github.com/sqlancer/sqlancer">SQLancer</a> tool',
lambda x: x.startswith("SQLancer"),
),
CheckDescription(
"Stateful tests",
"Runs stateful functional tests for ClickHouse binaries built in various "
"configurations -- release, debug, with sanitizers, etc",
lambda x: x.startswith("Stateful tests ("),
),
CheckDescription(
"Stateless tests",
"Runs stateless functional tests for ClickHouse binaries built in various "
"configurations -- release, debug, with sanitizers, etc",
lambda x: x.startswith("Stateless tests ("),
),
CheckDescription(
"Stress test",
"Runs stateless functional tests concurrently from several clients to detect "
"concurrency-related errors",
lambda x: x.startswith("Stress test ("),
),
CheckDescription(
CI.JobNames.STYLE_CHECK,
"Runs a set of checks to keep the code style clean. If some of tests failed, "
"see the related log from the report",
lambda x: x == CI.JobNames.STYLE_CHECK,
),
CheckDescription(
"Unit tests",
"Runs the unit tests for different release types",
lambda x: x.startswith("Unit tests ("),
),
CheckDescription(
"Upgrade check",
"Runs stress tests on server version from last release and then tries to "
"upgrade it to the version from the PR. It checks if the new server can "
"successfully startup without any errors, crashes or sanitizer asserts",
lambda x: x.startswith("Upgrade check ("),
),
CheckDescription(
"ClickBench",
"Runs [ClickBench](https://github.com/ClickHouse/ClickBench/) with instant-attach table",
lambda x: x.startswith("ClickBench"),
),
CheckDescription(
"Fallback for unknown",
"There's no description for the check yet, please add it to "
"tests/ci/ci_config.py:CHECK_DESCRIPTIONS",
lambda x: True,
),
]

View File

@ -23,7 +23,7 @@ from commit_status_helper import (
from get_robot_token import get_best_robot_token from get_robot_token import get_best_robot_token
from github_helper import GitHub, NamedUser, PullRequest, Repository from github_helper import GitHub, NamedUser, PullRequest, Repository
from pr_info import PRInfo from pr_info import PRInfo
from report import SUCCESS, FAILURE from report import SUCCESS
from env_helper import GITHUB_UPSTREAM_REPOSITORY, GITHUB_REPOSITORY from env_helper import GITHUB_UPSTREAM_REPOSITORY, GITHUB_REPOSITORY
from synchronizer_utils import SYNC_BRANCH_PREFIX from synchronizer_utils import SYNC_BRANCH_PREFIX
from ci_config import CI from ci_config import CI
@ -248,23 +248,27 @@ def main():
repo = gh.get_repo(args.repo) repo = gh.get_repo(args.repo)
if args.set_ci_status: if args.set_ci_status:
CI.GH.print_workflow_results()
# set Mergeable check status and exit # set Mergeable check status and exit
assert args.wf_status in (FAILURE, SUCCESS)
commit = get_commit(gh, args.pr_info.sha) commit = get_commit(gh, args.pr_info.sha)
statuses = get_commit_filtered_statuses(commit) statuses = get_commit_filtered_statuses(commit)
has_failed_statuses = False has_failed_statuses = False
has_native_failed_status = False
for status in statuses: for status in statuses:
print(f"Check status [{status.context}], [{status.state}]") print(f"Check status [{status.context}], [{status.state}]")
if CI.is_required(status.context) and status.state != SUCCESS: if (
print(f"WARNING: Failed status [{status.context}], [{status.state}]") CI.is_required(status.context)
and status.state != SUCCESS
and status.context != CI.StatusNames.SYNC
):
print(
f"WARNING: Not success status [{status.context}], [{status.state}]"
)
has_failed_statuses = True has_failed_statuses = True
if status.context != CI.StatusNames.SYNC:
has_native_failed_status = True
if args.wf_status == SUCCESS or has_failed_statuses: workflow_ok = CI.is_workflow_ok()
# set Mergeable check if workflow is successful (green) if workflow_ok or has_failed_statuses:
# set Mergeable Check if workflow is successful (green)
# or if we have GH statuses with failures (red) # or if we have GH statuses with failures (red)
# to avoid false-green on a died runner # to avoid false-green on a died runner
state = trigger_mergeable_check( state = trigger_mergeable_check(
@ -283,7 +287,7 @@ def main():
print( print(
"Workflow failed but no failed statuses found (died runner?) - cannot set Mergeable Check status" "Workflow failed but no failed statuses found (died runner?) - cannot set Mergeable Check status"
) )
if args.wf_status == SUCCESS and not has_native_failed_status: if workflow_ok and not has_failed_statuses:
sys.exit(0) sys.exit(0)
else: else:
sys.exit(1) sys.exit(1)

View File

@ -22,7 +22,6 @@ from typing import (
from build_download_helper import get_gh_api from build_download_helper import get_gh_api
from ci_config import CI from ci_config import CI
from ci_utils import normalize_string
from env_helper import REPORT_PATH, GITHUB_WORKSPACE from env_helper import REPORT_PATH, GITHUB_WORKSPACE
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -622,7 +621,7 @@ class BuildResult:
def write_json(self, directory: Union[Path, str] = REPORT_PATH) -> Path: def write_json(self, directory: Union[Path, str] = REPORT_PATH) -> Path:
path = Path(directory) / self.get_report_name( path = Path(directory) / self.get_report_name(
self.build_name, self.pr_number or normalize_string(self.head_ref) self.build_name, self.pr_number or CI.Utils.normalize_string(self.head_ref)
) )
path.write_text( path.write_text(
json.dumps( json.dumps(

View File

@ -1,5 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import logging import logging
import re
import sys import sys
from typing import Tuple from typing import Tuple
@ -16,7 +17,6 @@ from commit_status_helper import (
from env_helper import GITHUB_REPOSITORY, GITHUB_SERVER_URL from env_helper import GITHUB_REPOSITORY, GITHUB_SERVER_URL
from get_robot_token import get_best_robot_token from get_robot_token import get_best_robot_token
from ci_config import CI from ci_config import CI
from ci_utils import Utils
from pr_info import PRInfo from pr_info import PRInfo
from report import FAILURE, PENDING, SUCCESS, StatusType from report import FAILURE, PENDING, SUCCESS, StatusType
@ -25,12 +25,144 @@ TRUSTED_ORG_IDS = {
54801242, # clickhouse 54801242, # clickhouse
} }
TRUSTED_CONTRIBUTORS = {
e.lower()
for e in [
"amosbird",
"azat", # SEMRush
"bharatnc", # Many contributions.
"cwurm", # ClickHouse, Inc
"den-crane", # Documentation contributor
"ildus", # adjust, ex-pgpro
"nvartolomei", # Seasoned contributor, CloudFlare
"taiyang-li",
"ucasFL", # Amos Bird's friend
"thomoco", # ClickHouse, Inc
"tonickkozlov", # Cloudflare
"tylerhannan", # ClickHouse, Inc
"tsolodov", # ClickHouse, Inc
"justindeguzman", # ClickHouse, Inc
"XuJia0210", # ClickHouse, Inc
]
}
OK_SKIP_LABELS = {CI.Labels.RELEASE, CI.Labels.PR_BACKPORT, CI.Labels.PR_CHERRYPICK} OK_SKIP_LABELS = {CI.Labels.RELEASE, CI.Labels.PR_BACKPORT, CI.Labels.PR_CHERRYPICK}
PR_CHECK = "PR Check" PR_CHECK = "PR Check"
LABEL_CATEGORIES = {
"pr-backward-incompatible": ["Backward Incompatible Change"],
"pr-bugfix": [
"Bug Fix",
"Bug Fix (user-visible misbehavior in an official stable release)",
"Bug Fix (user-visible misbehaviour in official stable or prestable release)",
"Bug Fix (user-visible misbehavior in official stable or prestable release)",
],
"pr-critical-bugfix": ["Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)"],
"pr-build": [
"Build/Testing/Packaging Improvement",
"Build Improvement",
"Build/Testing Improvement",
"Build",
"Packaging Improvement",
],
"pr-documentation": [
"Documentation (changelog entry is not required)",
"Documentation",
],
"pr-feature": ["New Feature"],
"pr-improvement": ["Improvement"],
"pr-not-for-changelog": [
"Not for changelog (changelog entry is not required)",
"Not for changelog",
],
"pr-performance": ["Performance Improvement"],
"pr-ci": ["CI Fix or Improvement (changelog entry is not required)"],
}
CATEGORY_TO_LABEL = {
c: lb for lb, categories in LABEL_CATEGORIES.items() for c in categories
}
def check_pr_description(pr_body: str, repo_name: str) -> Tuple[str, str]:
"""The function checks the body to being properly formatted according to
.github/PULL_REQUEST_TEMPLATE.md, if the first returned string is not empty,
then there is an error."""
lines = list(map(lambda x: x.strip(), pr_body.split("\n") if pr_body else []))
lines = [re.sub(r"\s+", " ", line) for line in lines]
# Check if body contains "Reverts ClickHouse/ClickHouse#36337"
if [True for line in lines if re.match(rf"\AReverts {repo_name}#[\d]+\Z", line)]:
return "", LABEL_CATEGORIES["pr-not-for-changelog"][0]
category = ""
entry = ""
description_error = ""
i = 0
while i < len(lines):
if re.match(r"(?i)^[#>*_ ]*change\s*log\s*category", lines[i]):
i += 1
if i >= len(lines):
break
# Can have one empty line between header and the category
# itself. Filter it out.
if not lines[i]:
i += 1
if i >= len(lines):
break
category = re.sub(r"^[-*\s]*", "", lines[i])
i += 1
# Should not have more than one category. Require empty line
# after the first found category.
if i >= len(lines):
break
if lines[i]:
second_category = re.sub(r"^[-*\s]*", "", lines[i])
description_error = (
"More than one changelog category specified: "
f"'{category}', '{second_category}'"
)
return description_error, category
elif re.match(
r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
):
i += 1
# Can have one empty line between header and the entry itself.
# Filter it out.
if i < len(lines) and not lines[i]:
i += 1
# All following lines until empty one are the changelog entry.
entry_lines = []
while i < len(lines) and lines[i]:
entry_lines.append(lines[i])
i += 1
entry = " ".join(entry_lines)
# Don't accept changelog entries like '...'.
entry = re.sub(r"[#>*_.\- ]", "", entry)
# Don't accept changelog entries like 'Close #12345'.
entry = re.sub(r"^[\w\-\s]{0,10}#?\d{5,6}\.?$", "", entry)
else:
i += 1
if not category:
description_error = "Changelog category is empty"
# Filter out the PR categories that are not for changelog.
elif "(changelog entry is not required)" in category:
pass # to not check the rest of the conditions
elif category not in CATEGORY_TO_LABEL:
description_error, category = f"Category '{category}' is not valid", ""
elif not entry:
description_error = f"Changelog entry required for category '{category}'"
return description_error, category
def pr_is_by_trusted_user(pr_user_login, pr_user_orgs): def pr_is_by_trusted_user(pr_user_login, pr_user_orgs):
if pr_user_login.lower() in CI.TRUSTED_CONTRIBUTORS: if pr_user_login.lower() in TRUSTED_CONTRIBUTORS:
logging.info("User '%s' is trusted", pr_user_login) logging.info("User '%s' is trusted", pr_user_login)
return True return True
@ -92,22 +224,20 @@ def main():
commit = get_commit(gh, pr_info.sha) commit = get_commit(gh, pr_info.sha)
status = SUCCESS # type: StatusType status = SUCCESS # type: StatusType
description_error, category = Utils.check_pr_description( description_error, category = check_pr_description(pr_info.body, GITHUB_REPOSITORY)
pr_info.body, GITHUB_REPOSITORY
)
pr_labels_to_add = [] pr_labels_to_add = []
pr_labels_to_remove = [] pr_labels_to_remove = []
if ( if (
category in CI.CATEGORY_TO_LABEL category in CATEGORY_TO_LABEL
and CI.CATEGORY_TO_LABEL[category] not in pr_info.labels and CATEGORY_TO_LABEL[category] not in pr_info.labels
): ):
pr_labels_to_add.append(CI.CATEGORY_TO_LABEL[category]) pr_labels_to_add.append(CATEGORY_TO_LABEL[category])
for label in pr_info.labels: for label in pr_info.labels:
if ( if (
label in CI.CATEGORY_TO_LABEL.values() label in CATEGORY_TO_LABEL.values()
and category in CI.CATEGORY_TO_LABEL and category in CATEGORY_TO_LABEL
and label != CI.CATEGORY_TO_LABEL[category] and label != CATEGORY_TO_LABEL[category]
): ):
pr_labels_to_remove.append(label) pr_labels_to_remove.append(label)

View File

@ -9,7 +9,7 @@ from ci_settings import CiSettings
from pr_info import PRInfo, EventType from pr_info import PRInfo, EventType
from s3_helper import S3Helper from s3_helper import S3Helper
from ci_cache import CiCache from ci_cache import CiCache
from ci_utils import normalize_string from ci_utils import Utils
_TEST_EVENT_JSON = {"dummy": "dummy"} _TEST_EVENT_JSON = {"dummy": "dummy"}
@ -55,7 +55,7 @@ class TestCIConfig(unittest.TestCase):
if CI.JOB_CONFIGS[job].job_name_keyword: if CI.JOB_CONFIGS[job].job_name_keyword:
self.assertTrue( self.assertTrue(
CI.JOB_CONFIGS[job].job_name_keyword.lower() CI.JOB_CONFIGS[job].job_name_keyword.lower()
in normalize_string(job), in Utils.normalize_string(job),
f"Job [{job}] apparently uses wrong common config with job keyword [{CI.JOB_CONFIGS[job].job_name_keyword}]", f"Job [{job}] apparently uses wrong common config with job keyword [{CI.JOB_CONFIGS[job].job_name_keyword}]",
) )
@ -291,7 +291,9 @@ class TestCIConfig(unittest.TestCase):
assert tag_config assert tag_config
set_jobs = tag_config.run_jobs set_jobs = tag_config.run_jobs
for job in set_jobs: for job in set_jobs:
if any(k in normalize_string(job) for k in settings.exclude_keywords): if any(
k in Utils.normalize_string(job) for k in settings.exclude_keywords
):
continue continue
expected_jobs_to_do.append(job) expected_jobs_to_do.append(job)
for job, config in CI.JOB_CONFIGS.items(): for job, config in CI.JOB_CONFIGS.items():
@ -303,12 +305,12 @@ class TestCIConfig(unittest.TestCase):
# expected to run all builds jobs # expected to run all builds jobs
expected_jobs_to_do.append(job) expected_jobs_to_do.append(job)
if not any( if not any(
keyword in normalize_string(job) keyword in Utils.normalize_string(job)
for keyword in settings.include_keywords for keyword in settings.include_keywords
): ):
continue continue
if any( if any(
keyword in normalize_string(job) keyword in Utils.normalize_string(job)
for keyword in settings.exclude_keywords for keyword in settings.exclude_keywords
): ):
continue continue

View File

@ -1,7 +1,8 @@
<clickhouse> <clickhouse>
<rocksdb> <rocksdb>
<options> <options>
<info_log_level>DEBUG_LEVEL</info_log_level> <!-- https://github.com/ClickHouse/ClickHouse/pull/67274#issuecomment-2255301116 -->
<info_log_level>ERROR_LEVEL</info_log_level>
</options> </options>
</rocksdb> </rocksdb>
</clickhouse> </clickhouse>

View File

@ -19,6 +19,7 @@ services:
ldapsearch -x -H ldap://localhost:$$LDAP_PORT_NUMBER -D $$LDAP_ADMIN_DN -w $$LDAP_ADMIN_PASSWORD -b $$LDAP_ROOT ldapsearch -x -H ldap://localhost:$$LDAP_PORT_NUMBER -D $$LDAP_ADMIN_DN -w $$LDAP_ADMIN_PASSWORD -b $$LDAP_ROOT
| grep -c -E "member: cn=j(ohn|ane)doe" | grep -c -E "member: cn=j(ohn|ane)doe"
| grep 2 >> /dev/null | grep 2 >> /dev/null
&& cat /run/slapd/slapd.pid
interval: 10s interval: 10s
retries: 10 retries: 10
timeout: 2s timeout: 2s

View File

@ -4484,7 +4484,7 @@ class ClickHouseInstance:
use_old_analyzer = os.environ.get("CLICKHOUSE_USE_OLD_ANALYZER") is not None use_old_analyzer = os.environ.get("CLICKHOUSE_USE_OLD_ANALYZER") is not None
# If specific version was used there can be no # If specific version was used there can be no
# allow_experimental_analyzer setting, so do this only if it was # enable_analyzer setting, so do this only if it was
# explicitly requested. # explicitly requested.
if self.tag: if self.tag:
use_old_analyzer = False use_old_analyzer = False

View File

@ -1,4 +1,5 @@
import uuid import uuid
import time
import pytest import pytest
from helpers.cluster import ClickHouseCluster from helpers.cluster import ClickHouseCluster
@ -58,12 +59,12 @@ ORDER BY h;"""
== TSV([["backward", "true"], ["current", "true"]]) == TSV([["backward", "true"], ["current", "true"]])
) )
# Should be enabled everywhere # Should be enabled explicitly on the old instance.
analyzer_enabled = current.query( analyzer_enabled = backward.query(
f""" f"""
SELECT SELECT
DISTINCT Settings['allow_experimental_analyzer'] DISTINCT Settings['allow_experimental_analyzer']
FROM clusterAllReplicas('test_cluster_mixed', system.query_log) FROM system.query_log
WHERE initial_query_id = '{query_id}';""" WHERE initial_query_id = '{query_id}';"""
) )
@ -78,6 +79,8 @@ WHERE initial_query_id = '{query_id}';"""
current.query("SYSTEM FLUSH LOGS") current.query("SYSTEM FLUSH LOGS")
backward.query("SYSTEM FLUSH LOGS") backward.query("SYSTEM FLUSH LOGS")
# The old version doesn't know about the alias.
# For this we will ask about the old experimental name.
assert ( assert (
backward.query( backward.query(
""" """
@ -98,3 +101,26 @@ WHERE initial_query_id = '{query_id}';"""
) )
assert TSV(analyzer_enabled) == TSV("0") assert TSV(analyzer_enabled) == TSV("0")
# Only new version knows about the alias
# and it will send the old setting `allow_experimental_analyzer`
# to the remote server.
query_id = str(uuid.uuid4())
current.query(
"SELECT * FROM clusterAllReplicas('test_cluster_mixed', system.tables) SETTINGS enable_analyzer = 1;",
query_id=query_id,
)
current.query("SYSTEM FLUSH LOGS")
backward.query("SYSTEM FLUSH LOGS")
# Should be disabled explicitly everywhere.
analyzer_enabled = current.query(
f"""
SELECT
DISTINCT Settings['allow_experimental_analyzer']
FROM system.query_log
WHERE initial_query_id = '{query_id}';"""
)
assert TSV(analyzer_enabled) == TSV("1")

View File

@ -31,6 +31,9 @@ def started_cluster():
def test_distributed_type_object(started_cluster): def test_distributed_type_object(started_cluster):
node1.query("TRUNCATE TABLE local_table")
node2.query("TRUNCATE TABLE local_table")
node1.query( node1.query(
'INSERT INTO local_table FORMAT JSONEachRow {"id": 1, "data": {"k1": 10}}' 'INSERT INTO local_table FORMAT JSONEachRow {"id": 1, "data": {"k1": 10}}'
) )
@ -89,7 +92,7 @@ def test_distributed_type_object(started_cluster):
assert ( assert (
TSV( TSV(
node1.query( node1.query(
"SELECT id, data.k1, data.k2.k3, data.k2.k4, data.k5 FROM dist_table ORDER BY id SETTINGS allow_experimental_analyzer = 0" "SELECT id, data.k1, data.k2.k3, data.k2.k4, data.k5 FROM dist_table ORDER BY id SETTINGS enable_analyzer = 0"
) )
) )
== expected == expected

View File

@ -1,7 +1,7 @@
<clickhouse> <clickhouse>
<profiles> <profiles>
<default> <default>
<allow_experimental_analyzer>1</allow_experimental_analyzer> <enable_analyzer>1</enable_analyzer>
<allow_experimental_parallel_reading_from_replicas>1</allow_experimental_parallel_reading_from_replicas> <allow_experimental_parallel_reading_from_replicas>1</allow_experimental_parallel_reading_from_replicas>
<cluster_for_parallel_replicas>default</cluster_for_parallel_replicas> <cluster_for_parallel_replicas>default</cluster_for_parallel_replicas>
<max_parallel_replicas>100</max_parallel_replicas> <max_parallel_replicas>100</max_parallel_replicas>

View File

@ -459,7 +459,7 @@ def test_show_profiles():
query_possible_response = [ query_possible_response = [
"CREATE SETTINGS PROFILE `default`\n", "CREATE SETTINGS PROFILE `default`\n",
"CREATE SETTINGS PROFILE `default` SETTINGS allow_experimental_analyzer = true\n", "CREATE SETTINGS PROFILE `default` SETTINGS enable_analyzer = true\n",
] ]
assert ( assert (
instance.query("SHOW CREATE SETTINGS PROFILE default") instance.query("SHOW CREATE SETTINGS PROFILE default")
@ -470,7 +470,7 @@ def test_show_profiles():
"CREATE SETTINGS PROFILE `default`\n" "CREATE SETTINGS PROFILE `default`\n"
"CREATE SETTINGS PROFILE `readonly` SETTINGS readonly = 1\n" "CREATE SETTINGS PROFILE `readonly` SETTINGS readonly = 1\n"
"CREATE SETTINGS PROFILE `xyz`\n", "CREATE SETTINGS PROFILE `xyz`\n",
"CREATE SETTINGS PROFILE `default` SETTINGS allow_experimental_analyzer = true\n" "CREATE SETTINGS PROFILE `default` SETTINGS enable_analyzer = true\n"
"CREATE SETTINGS PROFILE `readonly` SETTINGS readonly = 1\n" "CREATE SETTINGS PROFILE `readonly` SETTINGS readonly = 1\n"
"CREATE SETTINGS PROFILE `xyz`\n", "CREATE SETTINGS PROFILE `xyz`\n",
] ]
@ -482,7 +482,7 @@ def test_show_profiles():
"CREATE SETTINGS PROFILE `xyz`\n" "CREATE SETTINGS PROFILE `xyz`\n"
) )
expected_access_analyzer = ( expected_access_analyzer = (
"CREATE SETTINGS PROFILE `default` SETTINGS allow_experimental_analyzer = true\n" "CREATE SETTINGS PROFILE `default` SETTINGS enable_analyzer = true\n"
"CREATE SETTINGS PROFILE `readonly` SETTINGS readonly = 1\n" "CREATE SETTINGS PROFILE `readonly` SETTINGS readonly = 1\n"
"CREATE SETTINGS PROFILE `xyz`\n" "CREATE SETTINGS PROFILE `xyz`\n"
) )

View File

@ -71,6 +71,8 @@ function configure
{ {
# Use the new config for both servers, so that we can change it in a PR. # Use the new config for both servers, so that we can change it in a PR.
rm right/config/config.d/text_log.xml ||: rm right/config/config.d/text_log.xml ||:
# backups disk uses absolute path, and this overlaps between servers, that could lead to errors
rm right/config/config.d/backups.xml ||:
cp -rv right/config left ||: cp -rv right/config left ||:
# Start a temporary server to rename the tables # Start a temporary server to rename the tables

View File

@ -28,8 +28,8 @@ RENAME TABLE set2 TO set;
SELECT arrayJoin(['Hello', 'test', 'World', 'world', 'abc', 'xyz']) AS s WHERE s IN set; SELECT arrayJoin(['Hello', 'test', 'World', 'world', 'abc', 'xyz']) AS s WHERE s IN set;
create table tab (x String) engine = MergeTree order by x as select 'Hello'; create table tab (x String) engine = MergeTree order by x as select 'Hello';
SELECT * FROM tab PREWHERE x IN (set) WHERE x IN (set) LIMIT 1 settings allow_experimental_analyzer=0; SELECT * FROM tab PREWHERE x IN (set) WHERE x IN (set) LIMIT 1 settings enable_analyzer=0;
SELECT * FROM tab PREWHERE x IN (set) WHERE x IN (set) LIMIT 1 settings allow_experimental_analyzer=1; SELECT * FROM tab PREWHERE x IN (set) WHERE x IN (set) LIMIT 1 settings enable_analyzer=1;
DROP TABLE tab; DROP TABLE tab;
DROP TABLE set; DROP TABLE set;

View File

@ -3,5 +3,6 @@ SELECT x, y FROM (SELECT number AS x FROM system.numbers LIMIT 3) js1 CROSS JOIN
SET join_algorithm = 'auto'; SET join_algorithm = 'auto';
SELECT x, y FROM (SELECT number AS x FROM system.numbers LIMIT 3) js1 CROSS JOIN (SELECT number AS y FROM system.numbers LIMIT 5) js2; SELECT x, y FROM (SELECT number AS x FROM system.numbers LIMIT 3) js1 CROSS JOIN (SELECT number AS y FROM system.numbers LIMIT 5) js2;
-- Just to test that we preserved old setting name this we use `enable_analyzer` instead of `enable_analyzer` here.
SET allow_experimental_analyzer = 1; SET allow_experimental_analyzer = 1;
SELECT x, y FROM (SELECT number AS x FROM system.numbers LIMIT 3) js1 CROSS JOIN (SELECT number AS y FROM system.numbers LIMIT 5) js2; SELECT x, y FROM (SELECT number AS x FROM system.numbers LIMIT 3) js1 CROSS JOIN (SELECT number AS y FROM system.numbers LIMIT 5) js2;

View File

@ -1,6 +1,6 @@
-- Tags: shard -- Tags: shard
set allow_experimental_analyzer = 1; set enable_analyzer = 1;
set enable_positional_arguments = 0; set enable_positional_arguments = 0;
select 40 as z from (select * from system.numbers limit 3) group by z; select 40 as z from (select * from system.numbers limit 3) group by z;

View File

@ -4,10 +4,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh # shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh . "$CURDIR"/../shell_config.sh
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1&output_format_write_statistics=0" -d "SELECT 1 AS k, count() GROUP BY k WITH TOTALS SETTINGS allow_experimental_analyzer = 1"; ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1&output_format_write_statistics=0" -d "SELECT 1 AS k, count() GROUP BY k WITH TOTALS SETTINGS enable_analyzer = 1";
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1&output_format_write_statistics=0" -d "SELECT 1234567890123 AS k, count() GROUP BY k WITH TOTALS SETTINGS allow_experimental_analyzer = 1 FORMAT JSON"; ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1&output_format_write_statistics=0" -d "SELECT 1234567890123 AS k, count() GROUP BY k WITH TOTALS SETTINGS enable_analyzer = 1 FORMAT JSON";
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1&output_format_write_statistics=0" -d "SELECT toFloat32(1.23) AS k, count() GROUP BY k WITH TOTALS SETTINGS allow_experimental_analyzer = 1 FORMAT JSONCompact"; ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1&output_format_write_statistics=0" -d "SELECT toFloat32(1.23) AS k, count() GROUP BY k WITH TOTALS SETTINGS enable_analyzer = 1 FORMAT JSONCompact";
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1&output_format_write_statistics=0" -d "SELECT toDate('2010-01-01') AS k, count() GROUP BY k WITH TOTALS SETTINGS allow_experimental_analyzer = 1"; ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1&output_format_write_statistics=0" -d "SELECT toDate('2010-01-01') AS k, count() GROUP BY k WITH TOTALS SETTINGS enable_analyzer = 1";
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1&output_format_write_statistics=0" -d "SELECT toDateTime('2010-01-01 01:02:03', 'UTC') AS k, count() GROUP BY k WITH TOTALS SETTINGS allow_experimental_analyzer = 1 FORMAT JSON"; ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1&output_format_write_statistics=0" -d "SELECT toDateTime('2010-01-01 01:02:03', 'UTC') AS k, count() GROUP BY k WITH TOTALS SETTINGS enable_analyzer = 1 FORMAT JSON";
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1&output_format_write_statistics=0" -d "SELECT 1.1 AS k, count() GROUP BY k WITH TOTALS SETTINGS allow_experimental_analyzer = 1 FORMAT JSONCompact"; ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1&output_format_write_statistics=0" -d "SELECT 1.1 AS k, count() GROUP BY k WITH TOTALS SETTINGS enable_analyzer = 1 FORMAT JSONCompact";

View File

@ -1,3 +1,7 @@
-- Tags: no-msan
-- ^
-- makes SELECTs extremely slow sometimes for some reason: "Aggregated. 1000000 to 1 rows (from 7.63 MiB) in 242.829221645 sec."
DROP TABLE IF EXISTS sample_00314_1; DROP TABLE IF EXISTS sample_00314_1;
DROP TABLE IF EXISTS sample_00314_2; DROP TABLE IF EXISTS sample_00314_2;
DROP TABLE IF EXISTS sample_merge_00314; DROP TABLE IF EXISTS sample_merge_00314;

View File

@ -1,4 +1,4 @@
SET allow_experimental_analyzer = 1; SET enable_analyzer = 1;
-- https://github.com/ClickHouse/ClickHouse/issues/45804 -- https://github.com/ClickHouse/ClickHouse/issues/45804

View File

@ -1,6 +1,6 @@
SET any_join_distinct_right_table_keys = 1; SET any_join_distinct_right_table_keys = 1;
SET joined_subquery_requires_alias = 0; SET joined_subquery_requires_alias = 0;
SET allow_experimental_analyzer = 1; SET enable_analyzer = 1;
select x, y from (select 1 as x, 2 as y, x, y); select x, y from (select 1 as x, 2 as y, x, y);
select x, y from (select 1 as x, 1 as y, x, y); select x, y from (select 1 as x, 1 as y, x, y);

View File

@ -2,7 +2,7 @@
SET output_format_write_statistics = 0; SET output_format_write_statistics = 0;
SET extremes = 1; SET extremes = 1;
SET allow_experimental_analyzer = 1; SET enable_analyzer = 1;
SET output_format_json_quote_64bit_integers = 1; SET output_format_json_quote_64bit_integers = 1;
SELECT toInt64(0) as i0, toUInt64(0) as u0, toInt64(9223372036854775807) as ip, toInt64(-9223372036854775808) as in, toUInt64(18446744073709551615) as up, [toInt64(0)] as arr, (toUInt64(0), toUInt64(0)) as tuple GROUP BY i0, u0, ip, in, up, arr, tuple WITH TOTALS FORMAT JSON; SELECT toInt64(0) as i0, toUInt64(0) as u0, toInt64(9223372036854775807) as ip, toInt64(-9223372036854775808) as in, toUInt64(18446744073709551615) as up, [toInt64(0)] as arr, (toUInt64(0), toUInt64(0)) as tuple GROUP BY i0, u0, ip, in, up, arr, tuple WITH TOTALS FORMAT JSON;

View File

@ -1,4 +1,4 @@
SET allow_experimental_analyzer = 1; SET enable_analyzer = 1;
SET join_use_nulls = 0; SET join_use_nulls = 0;
SET any_join_distinct_right_table_keys = 1; SET any_join_distinct_right_table_keys = 1;

View File

@ -1,4 +1,4 @@
SET allow_experimental_analyzer = 1; SET enable_analyzer = 1;
with pow(2,2) as four select pow(four, 2), 2 as two, pow(two, 2); with pow(2,2) as four select pow(four, 2), 2 as two, pow(two, 2);
select `pow(four, 2)`, `pow(two, 2)` from (with pow(2,2) as four select pow(four, 2), 2 as two, pow(two, 2)); select `pow(four, 2)`, `pow(two, 2)` from (with pow(2,2) as four select pow(four, 2), 2 as two, pow(two, 2));

Some files were not shown because too many files have changed in this diff Show More