qMerge branch 'master' of github.com:ClickHouse/ClickHouse into divanik/add_local_and_azure_iceberg_support

This commit is contained in:
divanik 2024-08-06 11:59:47 +00:00
commit c4e29466de
754 changed files with 2838 additions and 2295 deletions

View File

@ -260,13 +260,18 @@ jobs:
- name: Finish label
if: ${{ !failure() }}
run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ toJson(needs) }}
EOF
cd "$GITHUB_WORKSPACE/tests/ci"
# update mergeable check
python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
python3 merge_pr.py --set-ci-status
# update overall ci report
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
python3 merge_pr.py
- name: Check Workflow results
if: ${{ !cancelled() }}
run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'

View File

@ -64,6 +64,7 @@ jobs:
- name: Check out repository code
uses: ClickHouse/checkout@v1
- name: Check Workflow results
if: ${{ !cancelled() }}
run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat >> "$WORKFLOW_RESULT_FILE" << 'EOF'

View File

@ -103,9 +103,14 @@ jobs:
- name: Check and set merge status
if: ${{ needs.StyleCheck.result == 'success' }}
run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ toJson(needs) }}
EOF
cd "$GITHUB_WORKSPACE/tests/ci"
python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
python3 merge_pr.py --set-ci-status
- name: Check Workflow results
if: ${{ !cancelled() }}
run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'

View File

@ -52,6 +52,7 @@ jobs:
- name: Check out repository code
uses: ClickHouse/checkout@v1
- name: Check Workflow results
if: ${{ !cancelled() }}
run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'

View File

@ -170,7 +170,11 @@ jobs:
if: ${{ needs.StyleCheck.result == 'success' }}
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ toJson(needs) }}
EOF
python3 merge_pr.py --set-ci-status
- name: Check Workflow results
uses: ./.github/actions/check_workflow
with:

View File

@ -481,12 +481,10 @@ jobs:
- name: Finish label
if: ${{ !failure() }}
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
# update mergeable check
python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
# update overall ci report
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
- name: Check Workflow results
if: ${{ !cancelled() }}
run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'

View File

@ -5,12 +5,6 @@ rules:
indentation:
level: warning
indent-sequences: consistent
line-length:
# there are:
# - bash -c "", so this is OK
# - yaml in tests
max: 1000
level: warning
comments:
min-spaces-from-content: 1
document-start: disable

View File

@ -64,6 +64,7 @@
* The setting `optimize_trivial_insert_select` is disabled by default. In most cases, it should be beneficial. Nevertheless, if you are seeing slower INSERT SELECT or increased memory usage, you can enable it back or `SET compatibility = '24.6'`. [#58970](https://github.com/ClickHouse/ClickHouse/pull/58970) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Print stacktrace and diagnostic info if `clickhouse-client` or `clickhouse-local` crashes. [#61109](https://github.com/ClickHouse/ClickHouse/pull/61109) ([Alexander Tokmakov](https://github.com/tavplubix)).
* The result of `SHOW INDEX | INDEXES | INDICES | KEYS` was previously sorted by the primary key column names. Since this was unintuitive, the result is now sorted by the position of the primary key columns within the primary key. [#61131](https://github.com/ClickHouse/ClickHouse/pull/61131) ([Robert Schulze](https://github.com/rschu1ze)).
* Change how deduplication for Materialized Views works. Fixed a lot of cases like: - on destination table: data is split for 2 or more blocks and that blocks is considered as duplicate when that block is inserted in parallel. - on MV destination table: the equal blocks are deduplicated, that happens when MV often produces equal data as a result for different input data due to performing aggregation. - on MV destination table: the equal blocks which comes from different MV are deduplicated. [#61601](https://github.com/ClickHouse/ClickHouse/pull/61601) ([Sema Checherinda](https://github.com/CheSema)).
* Support reading partitioned data DeltaLake data. Infer DeltaLake schema by reading metadata instead of data. [#63201](https://github.com/ClickHouse/ClickHouse/pull/63201) ([Kseniia Sumarokova](https://github.com/kssenii)).
* In composable protocols TLS layer accepted only `certificateFile` and `privateKeyFile` parameters. https://clickhouse.com/docs/en/operations/settings/composable-protocols. [#63985](https://github.com/ClickHouse/ClickHouse/pull/63985) ([Anton Ivashkin](https://github.com/ianton-ru)).
* Added profile event `SelectQueriesWithPrimaryKeyUsage` which indicates how many SELECT queries use the primary key to evaluate the WHERE clause. [#64492](https://github.com/ClickHouse/ClickHouse/pull/64492) ([0x01f](https://github.com/0xfei)).

2
contrib/rocksdb vendored

@ -1 +1 @@
Subproject commit 01e43568fa9f3f7bf107b2b66c00b286b456f33e
Subproject commit 49ce8a1064dd1ad89117899839bf136365e49e79

View File

@ -1,6 +1,6 @@
option (ENABLE_ROCKSDB "Enable RocksDB" ${ENABLE_LIBRARIES})
if (NOT ENABLE_ROCKSDB)
if (NOT ENABLE_ROCKSDB OR NO_SSE3_OR_HIGHER) # assumes SSE4.2 and PCLMUL
message (STATUS "Not using RocksDB")
return()
endif()
@ -39,13 +39,6 @@ if(WITH_ZSTD)
list(APPEND THIRDPARTY_LIBS ch_contrib::zstd)
endif()
add_definitions(-DROCKSDB_PORTABLE)
if(ENABLE_SSE42 AND ENABLE_PCLMULQDQ)
add_definitions(-DHAVE_SSE42)
add_definitions(-DHAVE_PCLMUL)
endif()
if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64|aarch64|AARCH64")
set (HAS_ARMV8_CRC 1)
# the original build descriptions set specific flags for ARM. These flags are already subsumed by ClickHouse's general
@ -91,7 +84,9 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/cache/compressed_secondary_cache.cc
${ROCKSDB_SOURCE_DIR}/cache/lru_cache.cc
${ROCKSDB_SOURCE_DIR}/cache/secondary_cache.cc
${ROCKSDB_SOURCE_DIR}/cache/secondary_cache_adapter.cc
${ROCKSDB_SOURCE_DIR}/cache/sharded_cache.cc
${ROCKSDB_SOURCE_DIR}/cache/tiered_secondary_cache.cc
${ROCKSDB_SOURCE_DIR}/db/arena_wrapped_db_iter.cc
${ROCKSDB_SOURCE_DIR}/db/blob/blob_contents.cc
${ROCKSDB_SOURCE_DIR}/db/blob/blob_fetcher.cc
@ -174,9 +169,11 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/db/wal_manager.cc
${ROCKSDB_SOURCE_DIR}/db/wide/wide_column_serialization.cc
${ROCKSDB_SOURCE_DIR}/db/wide/wide_columns.cc
${ROCKSDB_SOURCE_DIR}/db/wide/wide_columns_helper.cc
${ROCKSDB_SOURCE_DIR}/db/write_batch.cc
${ROCKSDB_SOURCE_DIR}/db/write_batch_base.cc
${ROCKSDB_SOURCE_DIR}/db/write_controller.cc
${ROCKSDB_SOURCE_DIR}/db/write_stall_stats.cc
${ROCKSDB_SOURCE_DIR}/db/write_thread.cc
${ROCKSDB_SOURCE_DIR}/env/composite_env.cc
${ROCKSDB_SOURCE_DIR}/env/env.cc
@ -229,6 +226,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/options/configurable.cc
${ROCKSDB_SOURCE_DIR}/options/customizable.cc
${ROCKSDB_SOURCE_DIR}/options/db_options.cc
${ROCKSDB_SOURCE_DIR}/options/offpeak_time_info.cc
${ROCKSDB_SOURCE_DIR}/options/options.cc
${ROCKSDB_SOURCE_DIR}/options/options_helper.cc
${ROCKSDB_SOURCE_DIR}/options/options_parser.cc
@ -268,6 +266,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/table/get_context.cc
${ROCKSDB_SOURCE_DIR}/table/iterator.cc
${ROCKSDB_SOURCE_DIR}/table/merging_iterator.cc
${ROCKSDB_SOURCE_DIR}/table/compaction_merging_iterator.cc
${ROCKSDB_SOURCE_DIR}/table/meta_blocks.cc
${ROCKSDB_SOURCE_DIR}/table/persistent_cache_helper.cc
${ROCKSDB_SOURCE_DIR}/table/plain/plain_table_bloom.cc
@ -309,6 +308,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/util/compression_context_cache.cc
${ROCKSDB_SOURCE_DIR}/util/concurrent_task_limiter_impl.cc
${ROCKSDB_SOURCE_DIR}/util/crc32c.cc
${ROCKSDB_SOURCE_DIR}/util/data_structure.cc
${ROCKSDB_SOURCE_DIR}/util/dynamic_bloom.cc
${ROCKSDB_SOURCE_DIR}/util/hash.cc
${ROCKSDB_SOURCE_DIR}/util/murmurhash.cc
@ -322,6 +322,8 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/util/string_util.cc
${ROCKSDB_SOURCE_DIR}/util/thread_local.cc
${ROCKSDB_SOURCE_DIR}/util/threadpool_imp.cc
${ROCKSDB_SOURCE_DIR}/util/udt_util.cc
${ROCKSDB_SOURCE_DIR}/util/write_batch_util.cc
${ROCKSDB_SOURCE_DIR}/util/xxhash.cc
${ROCKSDB_SOURCE_DIR}/utilities/agg_merge/agg_merge.cc
${ROCKSDB_SOURCE_DIR}/utilities/backup/backup_engine.cc
@ -404,12 +406,6 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/util/memarena.cc
build_version.cc) # generated by hand
if(ENABLE_SSE42 AND ENABLE_PCLMULQDQ)
set_source_files_properties(
"${ROCKSDB_SOURCE_DIR}/util/crc32c.cc"
PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul")
endif()
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64")
list(APPEND SOURCES
"${ROCKSDB_SOURCE_DIR}/util/crc32c_ppc.c"

View File

@ -41,7 +41,7 @@ export FASTTEST_WORKSPACE
export FASTTEST_SOURCE
export FASTTEST_BUILD
export FASTTEST_DATA
export FASTTEST_OUT
export FASTTEST_OUTPUT
export PATH
function ccache_status

View File

@ -28,9 +28,9 @@
</table_function_remote_max_addresses>
<!-- Don't waste cycles testing the old interpreter. Spend time in the new analyzer instead -->
<allow_experimental_analyzer>
<enable_analyzer>
<readonly/>
</allow_experimental_analyzer>
</enable_analyzer>
<!-- This feature is broken, deprecated and will be removed. We don't want more reports about it -->
<allow_experimental_object_type>

View File

@ -11,7 +11,8 @@ function attach_gdb_to_clickhouse()
# explicitly ignore non-fatal signals that are used by server.
# Number of SIGRTMIN can be determined only in runtime.
RTMIN=$(kill -l SIGRTMIN)
echo "
# shellcheck disable=SC2016
echo "
set follow-fork-mode parent
handle SIGHUP nostop noprint pass
handle SIGINT nostop noprint pass
@ -24,8 +25,11 @@ handle SIG$RTMIN nostop noprint pass
info signals
continue
backtrace full
thread apply all backtrace full
info registers
p "top 1 KiB of the stack:"
p/x *(uint64_t[128]*)"'$sp'"
maintenance info sections
thread apply all backtrace full
disassemble /s
up
disassemble /s

View File

@ -139,9 +139,9 @@ EOL
</table_function_remote_max_addresses>
<!-- Don't waste cycles testing the old interpreter. Spend time in the new analyzer instead -->
<allow_experimental_analyzer>
<enable_analyzer>
<readonly/>
</allow_experimental_analyzer>
</enable_analyzer>
<!-- This feature is broken, deprecated and will be removed. We don't want more reports about it -->
<allow_experimental_object_type>

View File

@ -0,0 +1,55 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.5.5.78-stable (0138248cb62) FIXME as compared to v24.5.4.49-stable (63b760955a0)
#### Improvement
* Backported in [#66768](https://github.com/ClickHouse/ClickHouse/issues/66768): Make allow_experimental_analyzer be controlled by the initiator for distributed queries. This ensures compatibility and correctness during operations in mixed version clusters. [#65777](https://github.com/ClickHouse/ClickHouse/pull/65777) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)
* Backported in [#66884](https://github.com/ClickHouse/ClickHouse/issues/66884): Fix unexpeced size of low cardinality column in function calls. [#65298](https://github.com/ClickHouse/ClickHouse/pull/65298) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#66691](https://github.com/ClickHouse/ClickHouse/issues/66691): Fix the VALID UNTIL clause in the user definition resetting after a restart. Closes [#66405](https://github.com/ClickHouse/ClickHouse/issues/66405). [#66409](https://github.com/ClickHouse/ClickHouse/pull/66409) ([Nikolay Degterinsky](https://github.com/evillique)).
* Backported in [#67814](https://github.com/ClickHouse/ClickHouse/issues/67814): Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#67501](https://github.com/ClickHouse/ClickHouse/issues/67501): Fix crash in DistributedAsyncInsert when connection is empty. [#67219](https://github.com/ClickHouse/ClickHouse/pull/67219) ([Pablo Marcos](https://github.com/pamarcos)).
* Backported in [#67850](https://github.com/ClickHouse/ClickHouse/issues/67850): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Backported in [#65350](https://github.com/ClickHouse/ClickHouse/issues/65350): Fix possible abort on uncaught exception in ~WriteBufferFromFileDescriptor in StatusFile. [#64206](https://github.com/ClickHouse/ClickHouse/pull/64206) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#65621](https://github.com/ClickHouse/ClickHouse/issues/65621): Fix `Cannot find column` in distributed query with `ARRAY JOIN` by `Nested` column. Fixes [#64755](https://github.com/ClickHouse/ClickHouse/issues/64755). [#64801](https://github.com/ClickHouse/ClickHouse/pull/64801) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#65933](https://github.com/ClickHouse/ClickHouse/issues/65933): For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)).
* Backported in [#66301](https://github.com/ClickHouse/ClickHouse/issues/66301): Better handling of join conditions involving `IS NULL` checks (for example `ON (a = b AND (a IS NOT NULL) AND (b IS NOT NULL) ) OR ( (a IS NULL) AND (b IS NULL) )` is rewritten to `ON a <=> b`), fix incorrect optimization when condition other then `IS NULL` are present. [#65835](https://github.com/ClickHouse/ClickHouse/pull/65835) ([vdimir](https://github.com/vdimir)).
* Backported in [#66328](https://github.com/ClickHouse/ClickHouse/issues/66328): Add missing settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines/input_format_csv_try_infer_numbers_from_strings/input_format_csv_try_infer_strings_from_quoted_tuples` in schema inference cache because they can change the resulting schema. It prevents from incorrect result of schema inference with these settings changed. [#65980](https://github.com/ClickHouse/ClickHouse/pull/65980) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#66155](https://github.com/ClickHouse/ClickHouse/issues/66155): Fixed buffer overflow bug in `unbin`/`unhex` implementation. [#66106](https://github.com/ClickHouse/ClickHouse/pull/66106) ([Nikita Taranov](https://github.com/nickitat)).
* Backported in [#66454](https://github.com/ClickHouse/ClickHouse/issues/66454): Fixed a bug in ZooKeeper client: a session could get stuck in unusable state after receiving a hardware error from ZooKeeper. For example, this might happen due to "soft memory limit" in ClickHouse Keeper. [#66140](https://github.com/ClickHouse/ClickHouse/pull/66140) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#66226](https://github.com/ClickHouse/ClickHouse/issues/66226): Fix issue in SumIfToCountIfVisitor and signed integers. [#66146](https://github.com/ClickHouse/ClickHouse/pull/66146) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#66680](https://github.com/ClickHouse/ClickHouse/issues/66680): Fix handling limit for `system.numbers_mt` when no index can be used. [#66231](https://github.com/ClickHouse/ClickHouse/pull/66231) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Backported in [#66604](https://github.com/ClickHouse/ClickHouse/issues/66604): Fixed how the ClickHouse server detects the maximum number of usable CPU cores as specified by cgroups v2 if the server runs in a container such as Docker. In more detail, containers often run their process in the root cgroup which has an empty name. In that case, ClickHouse ignored the CPU limits set by cgroups v2. [#66237](https://github.com/ClickHouse/ClickHouse/pull/66237) ([filimonov](https://github.com/filimonov)).
* Backported in [#66360](https://github.com/ClickHouse/ClickHouse/issues/66360): Fix the `Not-ready set` error when a subquery with `IN` is used in the constraint. [#66261](https://github.com/ClickHouse/ClickHouse/pull/66261) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66972](https://github.com/ClickHouse/ClickHouse/issues/66972): Fix `Column identifier is already registered` error with `group_by_use_nulls=true` and new analyzer. [#66400](https://github.com/ClickHouse/ClickHouse/pull/66400) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66969](https://github.com/ClickHouse/ClickHouse/issues/66969): Fix `Cannot find column` error for queries with constant expression in `GROUP BY` key and new analyzer enabled. [#66433](https://github.com/ClickHouse/ClickHouse/pull/66433) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66720](https://github.com/ClickHouse/ClickHouse/issues/66720): Correctly track memory for `Allocator::realloc`. [#66548](https://github.com/ClickHouse/ClickHouse/pull/66548) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#66951](https://github.com/ClickHouse/ClickHouse/issues/66951): Fix an invalid result for queries with `WINDOW`. This could happen when `PARTITION` columns have sparse serialization and window functions are executed in parallel. [#66579](https://github.com/ClickHouse/ClickHouse/pull/66579) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66757](https://github.com/ClickHouse/ClickHouse/issues/66757): Fix `Unknown identifier` and `Column is not under aggregate function` errors for queries with the expression `(column IS NULL).` The bug was triggered by [#65088](https://github.com/ClickHouse/ClickHouse/issues/65088), with the disabled analyzer only. [#66654](https://github.com/ClickHouse/ClickHouse/pull/66654) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66948](https://github.com/ClickHouse/ClickHouse/issues/66948): Fix `Method getResultType is not supported for QUERY query node` error when scalar subquery was used as the first argument of IN (with new analyzer). [#66655](https://github.com/ClickHouse/ClickHouse/pull/66655) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#67633](https://github.com/ClickHouse/ClickHouse/issues/67633): Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)).
* Backported in [#67481](https://github.com/ClickHouse/ClickHouse/issues/67481): In rare cases ClickHouse could consider parts as broken because of some unexpected projections on disk. Now it's fixed. [#66898](https://github.com/ClickHouse/ClickHouse/pull/66898) ([alesapin](https://github.com/alesapin)).
* Backported in [#67197](https://github.com/ClickHouse/ClickHouse/issues/67197): TRUNCATE DATABASE used to stop replication as if it was a DROP DATABASE query, it's fixed. [#67129](https://github.com/ClickHouse/ClickHouse/pull/67129) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#67379](https://github.com/ClickHouse/ClickHouse/issues/67379): Fix error `Cannot convert column because it is non constant in source stream but must be constant in result.` for a query that reads from the `Merge` table over the `Distriburted` table with one shard. [#67146](https://github.com/ClickHouse/ClickHouse/pull/67146) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#67576](https://github.com/ClickHouse/ClickHouse/issues/67576): Fix execution of nested short-circuit functions. [#67520](https://github.com/ClickHouse/ClickHouse/pull/67520) ([Kruglov Pavel](https://github.com/Avogar)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Backported in [#66387](https://github.com/ClickHouse/ClickHouse/issues/66387): Disable broken cases from 02911_join_on_nullsafe_optimization. [#66310](https://github.com/ClickHouse/ClickHouse/pull/66310) ([vdimir](https://github.com/vdimir)).
* Backported in [#66426](https://github.com/ClickHouse/ClickHouse/issues/66426): Ignore subquery for IN in DDLLoadingDependencyVisitor. [#66395](https://github.com/ClickHouse/ClickHouse/pull/66395) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66544](https://github.com/ClickHouse/ClickHouse/issues/66544): Add additional log masking in CI. [#66523](https://github.com/ClickHouse/ClickHouse/pull/66523) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#66859](https://github.com/ClickHouse/ClickHouse/issues/66859): Fix data race in S3::ClientCache. [#66644](https://github.com/ClickHouse/ClickHouse/pull/66644) ([Konstantin Morozov](https://github.com/k-morozov)).
* Backported in [#66875](https://github.com/ClickHouse/ClickHouse/issues/66875): Support one more case in JOIN ON ... IS NULL. [#66725](https://github.com/ClickHouse/ClickHouse/pull/66725) ([vdimir](https://github.com/vdimir)).
* Backported in [#67059](https://github.com/ClickHouse/ClickHouse/issues/67059): Increase asio pool size in case the server is tiny. [#66761](https://github.com/ClickHouse/ClickHouse/pull/66761) ([alesapin](https://github.com/alesapin)).
* Backported in [#66945](https://github.com/ClickHouse/ClickHouse/issues/66945): Small fix in realloc memory tracking. [#66820](https://github.com/ClickHouse/ClickHouse/pull/66820) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#67252](https://github.com/ClickHouse/ClickHouse/issues/67252): Followup [#66725](https://github.com/ClickHouse/ClickHouse/issues/66725). [#66869](https://github.com/ClickHouse/ClickHouse/pull/66869) ([vdimir](https://github.com/vdimir)).
* Backported in [#67412](https://github.com/ClickHouse/ClickHouse/issues/67412): CI: Fix build results for release branches. [#67402](https://github.com/ClickHouse/ClickHouse/pull/67402) ([Max K.](https://github.com/maxknv)).

View File

@ -118,7 +118,7 @@ And the result of interpreting the `INSERT SELECT` query is a "completed" `Query
`InterpreterSelectQuery` uses `ExpressionAnalyzer` and `ExpressionActions` machinery for query analysis and transformations. This is where most rule-based query optimizations are performed. `ExpressionAnalyzer` is quite messy and should be rewritten: various query transformations and optimizations should be extracted into separate classes to allow for modular transformations of the query.
To address current problems that exist in interpreters, a new `InterpreterSelectQueryAnalyzer` is being developed. It is a new version of `InterpreterSelectQuery` that does not use `ExpressionAnalyzer` and introduces an additional abstraction level between `AST` and `QueryPipeline` called `QueryTree`. It is not production-ready yet, but it can be tested with the `allow_experimental_analyzer` flag.
To address problems that exist in interpreters, a new `InterpreterSelectQueryAnalyzer` has been developed. This is a new version of the `InterpreterSelectQuery`, which does not use the `ExpressionAnalyzer` and introduces an additional layer of abstraction between `AST` and `QueryPipeline`, called `QueryTree'. It is fully ready for use in production, but just in case it can be turned off by setting the value of the `enable_analyzer` setting to `false`.
## Functions {#functions}

View File

@ -123,7 +123,7 @@ To ensure consistent and expected results, especially when migrating old queries
In the new version of the analyzer, the rules for determining the common supertype for columns specified in the `USING` clause have been standardized to produce more predictable outcomes, especially when dealing with type modifiers like `LowCardinality` and `Nullable`.
- `LowCardinality(T)` and `T`: When a column of type `LowCardinality(T)` is joined with a column of type `T`, the resulting common supertype will be `T`, effectively discarding the `LowCardinality` modifier.
- `Nullable(T)` and `T`: When a column of type `Nullable(T)` is joined with a column of type `T`, the resulting common supertype will be `Nullable(T)`, ensuring that the nullable property is preserved.
**Example:**
@ -144,7 +144,7 @@ During projection names computation, aliases are not substituted.
SELECT
1 + 1 AS x,
x + 1
SETTINGS allow_experimental_analyzer = 0
SETTINGS enable_analyzer = 0
FORMAT PrettyCompact
┌─x─┬─plus(plus(1, 1), 1)─┐
@ -154,7 +154,7 @@ FORMAT PrettyCompact
SELECT
1 + 1 AS x,
x + 1
SETTINGS allow_experimental_analyzer = 1
SETTINGS enable_analyzer = 1
FORMAT PrettyCompact
┌─x─┬─plus(x, 1)─┐
@ -177,7 +177,7 @@ SELECT toTypeName(if(0, [2, 3, 4], 'String'))
### Heterogeneous clusters
The new analyzer significantly changed the communication protocol between servers in the cluster. Thus, it's impossible to run distributed queries on servers with different `allow_experimental_analyzer` setting values.
The new analyzer significantly changed the communication protocol between servers in the cluster. Thus, it's impossible to run distributed queries on servers with different `enable_analyzer` setting values.
### Mutations are interpreted by previous analyzer

View File

@ -4051,7 +4051,7 @@ Rewrite aggregate functions with if expression as argument when logically equiva
For example, `avg(if(cond, col, null))` can be rewritten to `avgOrNullIf(cond, col)`. It may improve performance.
:::note
Supported only with experimental analyzer (`allow_experimental_analyzer = 1`).
Supported only with experimental analyzer (`enable_analyzer = 1`).
:::
## database_replicated_initial_query_timeout_sec {#database_replicated_initial_query_timeout_sec}

View File

@ -115,7 +115,7 @@ ClickHouse — полноценная столбцовая СУБД. Данны
`InterpreterSelectQuery` использует `ExpressionAnalyzer` и `ExpressionActions` механизмы для анализа запросов и преобразований. Именно здесь выполняется большинство оптимизаций запросов на основе правил. `ExpressionAnalyzer` написан довольно грязно и должен быть переписан: различные преобразования запросов и оптимизации должны быть извлечены в отдельные классы, чтобы позволить модульные преобразования или запросы.
Для решения текущих проблем, существующих в интерпретаторах, разрабатывается новый `InterpreterSelectQueryAnalyzer`. Это новая версия `InterpreterSelectQuery`, которая не использует `ExpressionAnalyzer` и вводит дополнительный уровень абстракции между `AST` и `QueryPipeline`, называемый `QueryTree`. Он еще не готов к использованию в продакшене, но его можно протестировать с помощью флага `allow_experimental_analyzer`.
Для решения проблем, существующих в интерпретаторах, был разработан новый `InterpreterSelectQueryAnalyzer`. Это новая версия `InterpreterSelectQuery`, которая не использует `ExpressionAnalyzer` и вводит дополнительный уровень абстракции между `AST` и `QueryPipeline`, называемый `QueryTree`. Он полностью готов к использованию в продакшене, но на всякий случай его можно выключить, установив значение настройки `enable_analyzer` в `false`.
## Функции {#functions}

View File

@ -522,6 +522,9 @@
const current_url = new URL(window.location);
const opened_locally = location.protocol == 'file:';
/// Run query instantly after page is loaded if the run parameter is present.
const run_immediately = current_url.searchParams.has("run");
const server_address = current_url.searchParams.get('url');
if (server_address) {
document.getElementById('url').value = server_address;
@ -599,6 +602,9 @@
const title = "ClickHouse Query: " + query;
let history_url = window.location.pathname + '?user=' + encodeURIComponent(user);
if (run_immediately) {
history_url += "&run=1";
}
if (server_address != location.origin) {
/// Save server's address in URL if it's not identical to the address of the play UI.
history_url += '&url=' + encodeURIComponent(server_address);
@ -1160,6 +1166,10 @@
});
}
if (run_immediately) {
post();
}
document.getElementById('toggle-light').onclick = function() {
setColorTheme('light', true);
}

View File

@ -1029,7 +1029,8 @@ bool Dwarf::findLocation(
const LocationInfoMode mode,
CompilationUnit & cu,
LocationInfo & info,
std::vector<SymbolizedFrame> & inline_frames) const
std::vector<SymbolizedFrame> & inline_frames,
bool assume_in_cu_range) const
{
Die die = getDieAtOffset(cu, cu.first_die);
// Partial compilation unit (DW_TAG_partial_unit) is not supported.
@ -1041,6 +1042,11 @@ bool Dwarf::findLocation(
std::optional<std::string_view> main_file_name;
std::optional<uint64_t> base_addr_cu;
std::optional<uint64_t> low_pc;
std::optional<uint64_t> high_pc;
std::optional<bool> is_high_pc_addr;
std::optional<uint64_t> range_offset;
forEachAttribute(cu, die, [&](const Attribute & attr)
{
switch (attr.spec.name) // NOLINT(bugprone-switch-missing-default-case)
@ -1058,18 +1064,47 @@ bool Dwarf::findLocation(
// File name of main file being compiled
main_file_name = std::get<std::string_view>(attr.attr_value);
break;
case DW_AT_low_pc:
case DW_AT_entry_pc:
// 2.17.1: historically DW_AT_low_pc was used. DW_AT_entry_pc was
// introduced in DWARF3. Support either to determine the base address of
// the CU.
base_addr_cu = std::get<uint64_t>(attr.attr_value);
break;
case DW_AT_ranges:
range_offset = std::get<uint64_t>(attr.attr_value);
break;
case DW_AT_low_pc:
low_pc = std::get<uint64_t>(attr.attr_value);
base_addr_cu = std::get<uint64_t>(attr.attr_value);
break;
case DW_AT_high_pc:
// The value of the DW_AT_high_pc attribute can be
// an address (DW_FORM_addr*) or an offset (DW_FORM_data*).
is_high_pc_addr = attr.spec.form == DW_FORM_addr || //
attr.spec.form == DW_FORM_addrx || //
attr.spec.form == DW_FORM_addrx1 || //
attr.spec.form == DW_FORM_addrx2 || //
attr.spec.form == DW_FORM_addrx3 || //
attr.spec.form == DW_FORM_addrx4;
high_pc = std::get<uint64_t>(attr.attr_value);
break;
}
// Iterate through all attributes until find all above.
return true;
});
/// Check if the address falls inside this unit's address ranges.
if (!assume_in_cu_range && ((low_pc && high_pc) || range_offset))
{
bool pc_match = low_pc && high_pc && is_high_pc_addr && address >= *low_pc
&& (address < (*is_high_pc_addr ? *high_pc : *low_pc + *high_pc));
bool range_match = range_offset && isAddrInRangeList(cu, address, base_addr_cu, range_offset.value(), cu.addr_size);
if (!pc_match && !range_match)
{
return false;
}
}
if (main_file_name)
{
info.has_main_file = true;
@ -1442,7 +1477,7 @@ bool Dwarf::findAddress(
{
return false;
}
findLocation(address, mode, unit, locationInfo, inline_frames);
findLocation(address, mode, unit, locationInfo, inline_frames, /*assume_in_cu_range*/ true);
return locationInfo.has_file_and_line;
}
else if (mode == LocationInfoMode::FAST)
@ -1471,7 +1506,7 @@ bool Dwarf::findAddress(
{
continue;
}
findLocation(address, mode, unit, locationInfo, inline_frames);
findLocation(address, mode, unit, locationInfo, inline_frames, /*assume_in_cu_range*/ false);
}
return locationInfo.has_file_and_line;

View File

@ -283,7 +283,8 @@ private:
LocationInfoMode mode,
CompilationUnit & cu,
LocationInfo & info,
std::vector<SymbolizedFrame> & inline_frames) const;
std::vector<SymbolizedFrame> & inline_frames,
bool assume_in_cu_range) const;
/**
* Finds a subprogram debugging info entry that contains a given address among

View File

@ -193,8 +193,10 @@
M(ReplicaPartialShutdown, "How many times Replicated table has to deinitialize its state due to session expiration in ZooKeeper. The state is reinitialized every time when ZooKeeper is available again.") \
\
M(SelectedParts, "Number of data parts selected to read from a MergeTree table.") \
M(SelectedPartsTotal, "Number of total data parts before selecting which ones to read from a MergeTree table.") \
M(SelectedRanges, "Number of (non-adjacent) ranges in all data parts selected to read from a MergeTree table.") \
M(SelectedMarks, "Number of marks (index granules) selected to read from a MergeTree table.") \
M(SelectedMarksTotal, "Number of total marks (index granules) before selecting which ones to read from a MergeTree table.") \
M(SelectedRows, "Number of rows SELECTed from all tables.") \
M(SelectedBytes, "Number of bytes (uncompressed; for columns as they stored in memory) SELECTed from all tables.") \
M(RowsReadByMainReader, "Number of rows read from MergeTree tables by the main reader (after PREWHERE step).") \

View File

@ -23,6 +23,7 @@
#if USE_ROCKSDB
#include <rocksdb/table.h>
#include <rocksdb/convenience.h>
#include <rocksdb/statistics.h>
#include <rocksdb/utilities/db_ttl.h>
#endif
@ -88,7 +89,7 @@ static rocksdb::Options getRocksDBOptionsFromConfig(const Poco::Util::AbstractCo
if (config.has("keeper_server.rocksdb.options"))
{
auto config_options = getOptionsFromConfig(config, "keeper_server.rocksdb.options");
status = rocksdb::GetDBOptionsFromMap(merged, config_options, &merged);
status = rocksdb::GetDBOptionsFromMap({}, merged, config_options, &merged);
if (!status.ok())
{
throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from 'rocksdb.options' : {}",
@ -98,7 +99,7 @@ static rocksdb::Options getRocksDBOptionsFromConfig(const Poco::Util::AbstractCo
if (config.has("rocksdb.column_family_options"))
{
auto column_family_options = getOptionsFromConfig(config, "rocksdb.column_family_options");
status = rocksdb::GetColumnFamilyOptionsFromMap(merged, column_family_options, &merged);
status = rocksdb::GetColumnFamilyOptionsFromMap({}, merged, column_family_options, &merged);
if (!status.ok())
{
throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from 'rocksdb.column_family_options' at: {}", status.ToString());
@ -107,7 +108,7 @@ static rocksdb::Options getRocksDBOptionsFromConfig(const Poco::Util::AbstractCo
if (config.has("rocksdb.block_based_table_options"))
{
auto block_based_table_options = getOptionsFromConfig(config, "rocksdb.block_based_table_options");
status = rocksdb::GetBlockBasedTableOptionsFromMap(table_options, block_based_table_options, &table_options);
status = rocksdb::GetBlockBasedTableOptionsFromMap({}, table_options, block_based_table_options, &table_options);
if (!status.ok())
{
throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from 'rocksdb.block_based_table_options' at: {}", status.ToString());

View File

@ -111,10 +111,13 @@ public:
}
};
using Implementation = testing::Types<TestParam<DB::KeeperMemoryStorage, true>,
TestParam<DB::KeeperMemoryStorage, false>,
TestParam<DB::KeeperRocksStorage, true>,
TestParam<DB::KeeperRocksStorage, false>>;
using Implementation = testing::Types<TestParam<DB::KeeperMemoryStorage, true>
,TestParam<DB::KeeperMemoryStorage, false>
#if USE_ROCKSDB
,TestParam<DB::KeeperRocksStorage, true>
,TestParam<DB::KeeperRocksStorage, false>
#endif
>;
TYPED_TEST_SUITE(CoordinationTest, Implementation);
TYPED_TEST(CoordinationTest, RaftServerConfigParse)

View File

@ -944,8 +944,7 @@ class IColumn;
\
M(Bool, allow_experimental_join_condition, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y.", 0) \
\
/* Analyzer: It's not experimental anymore (WIP) */ \
M(Bool, allow_experimental_analyzer, true, "Allow new query analyzer.", IMPORTANT) \
M(Bool, allow_experimental_analyzer, true, "Allow new query analyzer.", IMPORTANT) ALIAS(enable_analyzer) \
M(Bool, analyzer_compatibility_join_using_top_level_identifier, false, "Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`).", 0) \
\
M(Bool, allow_experimental_live_view, false, "Enable LIVE VIEW. Not mature enough.", 0) \
@ -1127,7 +1126,6 @@ class IColumn;
M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \
M(Bool, input_format_json_throw_on_bad_escape_sequence, true, "Throw an exception if JSON string contains bad escape sequence in JSON input formats. If disabled, bad escape sequences will remain as is in the data", 0) \
M(Bool, input_format_json_ignore_unnecessary_fields, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields", 0) \
M(Bool, input_format_json_case_insensitive_column_matching, false, "Ignore case when matching JSON keys with CH columns", 0) \
M(UInt64, input_format_json_max_depth, 1000, "Maximum depth of a field in JSON. This is not a strict limit, it does not have to be applied precisely.", 0) \
M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \
M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \

View File

@ -338,6 +338,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"input_format_json_max_depth", 1000000, 1000, "It was unlimited in previous versions, but that was unsafe."},
{"merge_tree_min_bytes_per_task_for_remote_reading", 4194304, 2097152, "Value is unified with `filesystem_prefetch_min_bytes_for_single_read_task`"},
{"allow_archive_path_syntax", true, true, "Added new setting to allow disabling archive path syntax."},
{"enable_analyzer", 1, 1, "Added an alias to a setting `allow_experimental_analyzer`."},
}
},
{"24.7",
@ -349,7 +350,6 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"input_format_native_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in Native output format"},
{"read_in_order_use_buffering", false, true, "Use buffering before merging while reading in order of primary key"},
{"enable_named_columns_in_function_tuple", false, true, "Generate named tuples in function tuple() when all names are unique and can be treated as unquoted identifiers."},
{"input_format_json_case_insensitive_column_matching", false, false, "Ignore case when matching JSON keys with CH columns."},
{"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."},
{"dictionary_validate_primary_key_type", false, false, "Validate primary key type for dictionaries. By default id type for simple layouts will be implicitly converted to UInt64."},
{"collect_hash_table_stats_during_joins", false, true, "New setting."},

View File

@ -151,7 +151,6 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
format_settings.json.try_infer_objects_as_tuples = settings.input_format_json_try_infer_named_tuples_from_objects;
format_settings.json.throw_on_bad_escape_sequence = settings.input_format_json_throw_on_bad_escape_sequence;
format_settings.json.ignore_unnecessary_fields = settings.input_format_json_ignore_unnecessary_fields;
format_settings.json.case_insensitive_column_matching = settings.input_format_json_case_insensitive_column_matching;
format_settings.null_as_default = settings.input_format_null_as_default;
format_settings.force_null_for_omitted_fields = settings.input_format_force_null_for_omitted_fields;
format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros;

View File

@ -234,7 +234,6 @@ struct FormatSettings
bool infer_incomplete_types_as_strings = true;
bool throw_on_bad_escape_sequence = true;
bool ignore_unnecessary_fields = true;
bool case_insensitive_column_matching = false;
} json{};
struct

View File

@ -42,11 +42,11 @@ public:
{
FunctionArgumentDescriptors mandatory_args{
{"s", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), nullptr, "String"},
{"offset", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeNumber), nullptr, "(U)Int8, (U)Int16, (U)Int32, (U)Int64 or Float"},
{"offset", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeNumber), nullptr, "(U)Int8/16/32/64 or Float"},
};
FunctionArgumentDescriptors optional_args{
{"length", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeNumber), nullptr, "(U)Int8, (U)Int16, (U)Int32, (U)Int64 or Float"},
{"length", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeNumber), nullptr, "(U)Int8/16/32/64 or Float"},
};
validateFunctionArguments(*this, arguments, mandatory_args, optional_args);

View File

@ -1233,6 +1233,12 @@ public:
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
"Must be ColumnUInt8 or ColumnConstUInt8.", arg_cond.column->getName(), getName());
/// If result is Variant, always use generic implementation.
/// Using typed implementations may lead to incorrect result column type when
/// resulting Variant is created by use_variant_when_no_common_type.
if (isVariant(result_type))
return executeGeneric(cond_col, arguments, input_rows_count, use_variant_when_no_common_type);
auto call = [&](const auto & types) -> bool
{
using Types = std::decay_t<decltype(types)>;

View File

@ -100,6 +100,13 @@ bool isConstantFromScalarSubquery(const ActionsDAG::Node * node)
}
bool ActionsDAG::Node::isDeterministic() const
{
bool deterministic_if_func = type != ActionType::FUNCTION || function_base->isDeterministic();
bool deterministic_if_const = type != ActionType::COLUMN || is_deterministic_constant;
return deterministic_if_func && deterministic_if_const;
}
void ActionsDAG::Node::toTree(JSONBuilder::JSONMap & map) const
{
map.add("Node Type", magic_enum::enum_name(type));
@ -318,7 +325,6 @@ const ActionsDAG::Node & ActionsDAG::addFunctionImpl(
node.function_base = function_base;
node.result_type = result_type;
node.function = node.function_base->prepare(arguments);
node.is_deterministic = node.function_base->isDeterministic();
/// If all arguments are constants, and function is suitable to be executed in 'prepare' stage - execute function.
if (node.function_base->isSuitableForConstantFolding())
@ -536,64 +542,99 @@ void ActionsDAG::removeUnusedActions(bool allow_remove_inputs, bool allow_consta
void ActionsDAG::removeUnusedActions(const std::unordered_set<const Node *> & used_inputs, bool allow_constant_folding)
{
std::unordered_set<const Node *> visited_nodes;
std::stack<Node *> stack;
for (const auto * node : outputs)
{
visited_nodes.insert(node);
stack.push(const_cast<Node *>(node));
}
NodeRawConstPtrs roots;
roots.reserve(outputs.size() + used_inputs.size());
roots = outputs;
for (auto & node : nodes)
{
/// We cannot remove arrayJoin because it changes the number of rows.
bool is_array_join = node.type == ActionType::ARRAY_JOIN;
if (is_array_join && !visited_nodes.contains(&node))
{
visited_nodes.insert(&node);
stack.push(&node);
}
if (node.type == ActionType::ARRAY_JOIN)
roots.push_back(&node);
if (node.type == ActionType::INPUT && used_inputs.contains(&node))
visited_nodes.insert(&node);
roots.push_back(&node);
}
while (!stack.empty())
std::unordered_set<const Node *> required_nodes;
std::unordered_set<const Node *> non_deterministic_nodes;
struct Frame
{
auto * node = stack.top();
stack.pop();
const ActionsDAG::Node * node;
size_t next_child_to_visit = 0;
};
/// Constant folding.
if (allow_constant_folding && !node->children.empty() && node->column && isColumnConst(*node->column))
std::stack<Frame> stack;
enum class VisitStage { NonDeterministic, Required };
for (auto stage : {VisitStage::NonDeterministic, VisitStage::Required})
{
required_nodes.clear();
for (const auto * root : roots)
{
node->type = ActionsDAG::ActionType::COLUMN;
for (const auto & child : node->children)
if (!required_nodes.contains(root))
{
if (!child->is_deterministic)
required_nodes.insert(root);
stack.push({.node = root});
}
while (!stack.empty())
{
auto & frame = stack.top();
auto * node = const_cast<Node *>(frame.node);
while (frame.next_child_to_visit < node->children.size())
{
node->is_deterministic = false;
break;
const auto * child = node->children[frame.next_child_to_visit];
++frame.next_child_to_visit;
if (!required_nodes.contains(child))
{
required_nodes.insert(child);
stack.push({.node = child});
break;
}
}
if (stack.top().node != node)
continue;
stack.pop();
if (stage == VisitStage::Required)
continue;
if (!node->isDeterministic())
non_deterministic_nodes.insert(node);
else
{
for (const auto * child : node->children)
{
if (non_deterministic_nodes.contains(child))
{
non_deterministic_nodes.insert(node);
break;
}
}
}
/// Constant folding.
if (allow_constant_folding && !node->children.empty()
&& node->column && isColumnConst(*node->column))
{
node->type = ActionsDAG::ActionType::COLUMN;
node->children.clear();
node->is_deterministic_constant = !non_deterministic_nodes.contains(node);
}
}
node->children.clear();
}
for (const auto * child : node->children)
{
if (!visited_nodes.contains(child))
{
stack.push(const_cast<Node *>(child));
visited_nodes.insert(child);
}
}
}
std::erase_if(nodes, [&](const Node & node) { return !visited_nodes.contains(&node); });
std::erase_if(inputs, [&](const Node * node) { return !visited_nodes.contains(node); });
std::erase_if(nodes, [&](const Node & node) { return !required_nodes.contains(&node); });
std::erase_if(inputs, [&](const Node * node) { return !required_nodes.contains(node); });
}
@ -1379,7 +1420,7 @@ bool ActionsDAG::trivial() const
void ActionsDAG::assertDeterministic() const
{
for (const auto & node : nodes)
if (!node.is_deterministic)
if (!node.isDeterministic())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Expression must be deterministic but it contains non-deterministic part `{}`", node.result_name);
}
@ -1387,7 +1428,7 @@ void ActionsDAG::assertDeterministic() const
bool ActionsDAG::hasNonDeterministic() const
{
for (const auto & node : nodes)
if (!node.is_deterministic)
if (!node.isDeterministic())
return true;
return false;
}

View File

@ -80,13 +80,15 @@ public:
ExecutableFunctionPtr function;
/// If function is a compiled statement.
bool is_function_compiled = false;
/// It is deterministic (See IFunction::isDeterministic).
/// This property is kept after constant folding of non-deterministic functions like 'now', 'today'.
bool is_deterministic = true;
/// It is a constant calculated from deterministic functions (See IFunction::isDeterministic).
/// This property is kept after constant folding of non-deterministic functions like 'now', 'today'.
bool is_deterministic_constant = true;
/// For COLUMN node and propagated constants.
ColumnPtr column;
/// If result of this not is deterministic. Checks only this node, not a subtree.
bool isDeterministic() const;
void toTree(JSONBuilder::JSONMap & map) const;
};

View File

@ -326,6 +326,8 @@ std::vector<FileSegment::Range> FileCache::splitRange(size_t offset, size_t size
/// ^ ^
/// right offset aligned_right_offset
/// [_________] <-- last cached file segment, e.g. we have uncovered suffix of the requested range
/// ^
/// last_file_segment_right_offset
/// [________________]
/// size
/// [____________________________________]
@ -335,9 +337,10 @@ std::vector<FileSegment::Range> FileCache::splitRange(size_t offset, size_t size
/// and get something like this:
///
/// [________________________]
/// ^ ^
/// right_offset right_offset + max_file_segment_size
/// e.g. there is no need to create sub-segment for range (right_offset + max_file_segment_size, aligned_right_offset].
/// ^ ^
/// | last_file_segment_right_offset + max_file_segment_size
/// last_file_segment_right_offset
/// e.g. there is no need to create sub-segment for range (last_file_segment_right_offset + max_file_segment_size, aligned_right_offset].
/// Because its left offset would be bigger than right_offset.
/// Therefore, we set end_pos_non_included as offset+size, but remaining_size as aligned_size.
@ -557,7 +560,7 @@ FileCache::getOrSet(
FileSegment::Range initial_range(offset, offset + size - 1);
/// result_range is initial range, which will be adjusted according to
/// 1. aligned offset, alighed_end_offset
/// 1. aligned_offset, aligned_end_offset
/// 2. max_file_segments_limit
FileSegment::Range result_range = initial_range;

View File

@ -102,7 +102,7 @@ public:
///
/// @param allow_duplicates_in_input - actions are allowed to have
/// duplicated input (that will refer into the block). This is needed for
/// preliminary query filtering (filterBlockWithDAG()), because they just
/// preliminary query filtering (filterBlockWithExpression()), because they just
/// pass available virtual columns, which cannot be moved in case they are
/// used multiple times.
void execute(Block & block, size_t & num_rows, bool dry_run = false, bool allow_duplicates_in_input = false) const;

View File

@ -90,9 +90,18 @@ private:
using FutureSetFromTuplePtr = std::shared_ptr<FutureSetFromTuple>;
/// Set from subquery can be built inplace for PK or in CreatingSet step.
/// If use_index_for_in_with_subqueries_max_values is reached, set for PK won't be created,
/// but ordinary set would be created instead.
/// Set from subquery can be filled (by running the subquery) in one of two ways:
/// 1. During query analysis. Specifically, inside `SourceStepWithFilter::applyFilters()`.
/// Useful if the query plan depends on the set contents, e.g. to determine which files to read.
/// 2. During query execution. This is the preferred way.
/// Sets are created by CreatingSetStep, which runs before other steps.
/// Be careful: to build the set during query analysis, the `buildSetInplace()` call must happen
/// inside `SourceStepWithFilter::applyFilters()`. Calling it later, e.g. from `initializePipeline()`
/// will result in LOGICAL_ERROR "Not-ready Set is passed" (because a CreatingSetStep was already
/// added to pipeline but hasn't executed yet).
///
/// If use_index_for_in_with_subqueries_max_values is reached, the built set won't be suitable for
/// key analysis, but will work with function IN (the set will contain only hashes of elements).
class FutureSetFromSubquery final : public FutureSet
{
public:

View File

@ -690,6 +690,12 @@ void validateAnalyzerSettings(ASTPtr ast, bool context_value)
if (top_level != value->safeGet<bool>())
throw Exception(ErrorCodes::INCORRECT_QUERY, "Setting 'allow_experimental_analyzer' is changed in the subquery. Top level value: {}", top_level);
}
if (auto * value = set_query->changes.tryGet("enable_analyzer"))
{
if (top_level != value->safeGet<bool>())
throw Exception(ErrorCodes::INCORRECT_QUERY, "Setting 'enable_analyzer' is changed in the subquery. Top level value: {}", top_level);
}
}
for (auto child : node->children)

View File

@ -323,9 +323,7 @@ void ASTColumnsReplaceTransformer::formatImpl(const FormatSettings & settings, F
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << "REPLACE" << (is_strict ? " STRICT " : " ") << (settings.hilite ? hilite_none : "");
if (children.size() > 1)
settings.ostr << "(";
settings.ostr << "(";
for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it)
{
if (it != children.begin())
@ -333,9 +331,7 @@ void ASTColumnsReplaceTransformer::formatImpl(const FormatSettings & settings, F
(*it)->formatImpl(settings, state, frame);
}
if (children.size() > 1)
settings.ostr << ")";
settings.ostr << ")";
}
void ASTColumnsReplaceTransformer::appendColumnName(WriteBuffer & ostr) const

View File

@ -1099,7 +1099,7 @@ void addBuildSubqueriesForSetsStepIfNeeded(
auto query_tree = subquery->detachQueryTree();
auto subquery_options = select_query_options.subquery();
/// I don't know if this is a good decision,
/// But for now it is done in the same way as in old analyzer.
/// but for now it is done in the same way as in old analyzer.
/// This would not ignore limits for subqueries (affects mutations only).
/// See test_build_sets_from_multiple_threads-analyzer.
subquery_options.ignore_limits = false;

View File

@ -46,15 +46,6 @@ JSONEachRowRowInputFormat::JSONEachRowRowInputFormat(
{
const auto & header = getPort().getHeader();
name_map = header.getNamesToIndexesMap();
if (format_settings_.json.case_insensitive_column_matching)
{
for (auto & it : name_map)
{
StringRef key = it.first;
String lower_case_key = transformFieldNameToLowerCase(key);
lower_case_name_map[lower_case_key] = key;
}
}
if (format_settings_.import_nested_json)
{
for (size_t i = 0; i != header.columns(); ++i)
@ -180,15 +171,7 @@ void JSONEachRowRowInputFormat::readJSONObject(MutableColumns & columns)
skipUnknownField(name_ref);
continue;
}
size_t column_index = 0;
if (format_settings.json.case_insensitive_column_matching)
{
String lower_case_name = transformFieldNameToLowerCase(name_ref);
StringRef field_name_ref = lower_case_name_map[lower_case_name];
column_index = columnIndex(field_name_ref, key_index);
}
else
column_index = columnIndex(name_ref, key_index);
const size_t column_index = columnIndex(name_ref, key_index);
if (unlikely(ssize_t(column_index) < 0))
{

View File

@ -55,13 +55,7 @@ private:
virtual void readRowStart(MutableColumns &) {}
virtual void skipRowStart() {}
String transformFieldNameToLowerCase(const StringRef & field_name)
{
String field_name_str = field_name.toString();
std::transform(field_name_str.begin(), field_name_str.end(), field_name_str.begin(),
[](unsigned char c) { return std::tolower(c); });
return field_name_str;
}
/// Buffer for the read from the stream field name. Used when you have to copy it.
/// Also, if processing of Nested data is in progress, it holds the common prefix
/// of the nested column names (so that appending the field name to it produces
@ -80,8 +74,7 @@ private:
/// Hash table match `field name -> position in the block`. NOTE You can use perfect hash map.
Block::NameMap name_map;
/// Hash table match `lower_case field name -> field name in the block`.
std::unordered_map<String, StringRef> lower_case_name_map;
/// Cached search results for previous row (keyed as index in JSON object) - used as a hint.
std::vector<Block::NameMap::const_iterator> prev_positions;

View File

@ -119,8 +119,10 @@ bool restorePrewhereInputs(PrewhereInfo & info, const NameSet & inputs)
namespace ProfileEvents
{
extern const Event SelectedParts;
extern const Event SelectedPartsTotal;
extern const Event SelectedRanges;
extern const Event SelectedMarks;
extern const Event SelectedMarksTotal;
extern const Event SelectQueriesWithPrimaryKeyUsage;
}
@ -1970,8 +1972,10 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons
}
ProfileEvents::increment(ProfileEvents::SelectedParts, result.selected_parts);
ProfileEvents::increment(ProfileEvents::SelectedPartsTotal, result.total_parts);
ProfileEvents::increment(ProfileEvents::SelectedRanges, result.selected_ranges);
ProfileEvents::increment(ProfileEvents::SelectedMarks, result.selected_marks);
ProfileEvents::increment(ProfileEvents::SelectedMarksTotal, result.total_marks_pk);
auto query_id_holder = MergeTreeDataSelectExecutor::checkLimits(data, result, context);

View File

@ -1,4 +1,5 @@
#include <Storages/MergeTree/IMergeTreeDataPartWriter.h>
#include <Common/MemoryTrackerBlockerInThread.h>
namespace DB
{
@ -71,9 +72,21 @@ IMergeTreeDataPartWriter::IMergeTreeDataPartWriter(
Columns IMergeTreeDataPartWriter::releaseIndexColumns()
{
return Columns(
std::make_move_iterator(index_columns.begin()),
std::make_move_iterator(index_columns.end()));
/// The memory for index was allocated without thread memory tracker.
/// We need to deallocate it in shrinkToFit without memory tracker as well.
MemoryTrackerBlockerInThread temporarily_disable_memory_tracker;
Columns result;
result.reserve(index_columns.size());
for (auto & column : index_columns)
{
column->shrinkToFit();
result.push_back(std::move(column));
}
index_columns.clear();
return result;
}
SerializationPtr IMergeTreeDataPartWriter::getSerialization(const String & column_name) const

View File

@ -566,6 +566,7 @@ static const ActionsDAG::Node & cloneASTWithInversionPushDown(
}
const ActionsDAG::Node * res = nullptr;
bool handled_inversion = false;
switch (node.type)
{
@ -582,7 +583,7 @@ static const ActionsDAG::Node & cloneASTWithInversionPushDown(
/// Re-generate column name for constant.
/// DAG form query (with enabled analyzer) uses suffixes for constants, like 1_UInt8.
/// DAG from PK does not use it. This breaks matching by column name sometimes.
/// Ideally, we should not compare manes, but DAG subtrees instead.
/// Ideally, we should not compare names, but DAG subtrees instead.
name = ASTLiteral(column_const->getDataColumn()[0]).getColumnName();
else
name = node.result_name;
@ -593,9 +594,9 @@ static const ActionsDAG::Node & cloneASTWithInversionPushDown(
case (ActionsDAG::ActionType::ALIAS):
{
/// Ignore aliases
const auto & alias = cloneASTWithInversionPushDown(*node.children.front(), inverted_dag, to_inverted, context, need_inversion);
to_inverted[&node] = &alias;
return alias;
res = &cloneASTWithInversionPushDown(*node.children.front(), inverted_dag, to_inverted, context, need_inversion);
handled_inversion = true;
break;
}
case (ActionsDAG::ActionType::ARRAY_JOIN):
{
@ -608,20 +609,10 @@ static const ActionsDAG::Node & cloneASTWithInversionPushDown(
auto name = node.function_base->getName();
if (name == "not")
{
const auto & arg = cloneASTWithInversionPushDown(*node.children.front(), inverted_dag, to_inverted, context, !need_inversion);
to_inverted[&node] = &arg;
return arg;
res = &cloneASTWithInversionPushDown(*node.children.front(), inverted_dag, to_inverted, context, !need_inversion);
handled_inversion = true;
}
if (name == "materialize")
{
/// Ignore materialize
const auto & arg = cloneASTWithInversionPushDown(*node.children.front(), inverted_dag, to_inverted, context, need_inversion);
to_inverted[&node] = &arg;
return arg;
}
if (name == "indexHint")
else if (name == "indexHint")
{
ActionsDAG::NodeRawConstPtrs children;
if (const auto * adaptor = typeid_cast<const FunctionToFunctionBaseAdaptor *>(node.function_base.get()))
@ -636,12 +627,10 @@ static const ActionsDAG::Node & cloneASTWithInversionPushDown(
}
}
const auto & func = inverted_dag.addFunction(node.function_base, children, "");
to_inverted[&node] = &func;
return func;
res = &inverted_dag.addFunction(node.function_base, children, "");
handled_inversion = true;
}
if (need_inversion && (name == "and" || name == "or"))
else if (need_inversion && (name == "and" || name == "or"))
{
ActionsDAG::NodeRawConstPtrs children(node.children);
@ -659,32 +648,56 @@ static const ActionsDAG::Node & cloneASTWithInversionPushDown(
/// We match columns by name, so it is important to fill name correctly.
/// So, use empty string to make it automatically.
const auto & func = inverted_dag.addFunction(function_builder, children, "");
to_inverted[&node] = &func;
return func;
res = &inverted_dag.addFunction(function_builder, children, "");
handled_inversion = true;
}
ActionsDAG::NodeRawConstPtrs children(node.children);
for (auto & arg : children)
arg = &cloneASTWithInversionPushDown(*arg, inverted_dag, to_inverted, context, false);
auto it = inverse_relations.find(name);
if (it != inverse_relations.end())
else
{
const auto & func_name = need_inversion ? it->second : it->first;
auto function_builder = FunctionFactory::instance().get(func_name, context);
const auto & func = inverted_dag.addFunction(function_builder, children, "");
to_inverted[&node] = &func;
return func;
}
ActionsDAG::NodeRawConstPtrs children(node.children);
res = &inverted_dag.addFunction(node.function_base, children, "");
chassert(res->result_type == node.result_type);
for (auto & arg : children)
arg = &cloneASTWithInversionPushDown(*arg, inverted_dag, to_inverted, context, false);
auto it = inverse_relations.find(name);
if (it != inverse_relations.end())
{
const auto & func_name = need_inversion ? it->second : it->first;
auto function_builder = FunctionFactory::instance().get(func_name, context);
res = &inverted_dag.addFunction(function_builder, children, "");
handled_inversion = true;
}
else
{
/// Argument types could change slightly because of our transformations, e.g.
/// LowCardinality can be added because some subexpressions became constant
/// (in particular, sets). If that happens, re-run function overload resolver.
/// Otherwise don't re-run it because some functions may not be available
/// through FunctionFactory::get(), e.g. FunctionCapture.
bool types_changed = false;
for (size_t i = 0; i < children.size(); ++i)
{
if (!node.children[i]->result_type->equals(*children[i]->result_type))
{
types_changed = true;
break;
}
}
if (types_changed)
{
auto function_builder = FunctionFactory::instance().get(name, context);
res = &inverted_dag.addFunction(function_builder, children, "");
}
else
{
res = &inverted_dag.addFunction(node.function_base, children, "");
}
}
}
}
}
if (need_inversion)
if (!handled_inversion && need_inversion)
res = &inverted_dag.addFunction(FunctionFactory::instance().get("not", context), {res}, "");
to_inverted[&node] = res;

View File

@ -6,6 +6,8 @@
#include <Core/Range.h>
#include <Core/PlainRanges.h>
#include <DataTypes/Serializations/ISerialization.h>
#include <Parsers/ASTExpressionList.h>
#include <Interpreters/Set.h>
@ -14,7 +16,6 @@
#include <Storages/SelectQueryInfo.h>
#include <Storages/MergeTree/RPNBuilder.h>
#include "DataTypes/Serializations/ISerialization.h"
namespace DB

View File

@ -1146,7 +1146,7 @@ std::optional<UInt64> MergeTreeData::totalRowsByPartitionPredicateImpl(
auto metadata_snapshot = getInMemoryMetadataPtr();
auto virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, {parts[0]});
auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag.getOutputs().at(0), nullptr, /*allow_non_deterministic_functions=*/ false);
auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag.getOutputs().at(0), nullptr);
if (!filter_dag)
return {};

View File

@ -255,6 +255,12 @@ void MergeTreeDataPartWriterOnDisk::initPrimaryIndex()
index_compressor_stream = std::make_unique<CompressedWriteBuffer>(*index_file_hashing_stream, primary_key_compression_codec, settings.primary_key_compress_block_size);
index_source_hashing_stream = std::make_unique<HashingWriteBuffer>(*index_compressor_stream);
}
const auto & primary_key_types = metadata_snapshot->getPrimaryKey().data_types;
index_serializations.reserve(primary_key_types.size());
for (const auto & type : primary_key_types)
index_serializations.push_back(type->getDefaultSerialization());
}
}
@ -300,22 +306,30 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices()
store = std::make_shared<GinIndexStore>(stream_name, data_part_storage, data_part_storage, storage_settings->max_digestion_size_per_segment);
gin_index_stores[stream_name] = store;
}
skip_indices_aggregators.push_back(skip_index->createIndexAggregatorForPart(store, settings));
skip_index_accumulated_marks.push_back(0);
}
}
void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndexRow(const Block & index_block, size_t row)
{
chassert(index_block.columns() == index_serializations.size());
auto & index_stream = compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream;
for (size_t i = 0; i < index_block.columns(); ++i)
{
const auto & column = index_block.getByPosition(i).column;
index_columns[i]->insertFrom(*column, row);
index_serializations[i]->serializeBinary(*column, row, index_stream, {});
}
}
void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Block & primary_index_block, const Granules & granules_to_write)
{
size_t primary_columns_num = primary_index_block.columns();
if (index_columns.empty())
{
index_types = primary_index_block.getDataTypes();
index_columns.resize(primary_columns_num);
last_block_index_columns.resize(primary_columns_num);
for (size_t i = 0; i < primary_columns_num; ++i)
index_columns[i] = primary_index_block.getByPosition(i).column->cloneEmpty();
}
if (!metadata_snapshot->hasPrimaryKey())
return;
{
/** While filling index (index_columns), disable memory tracker.
@ -326,25 +340,20 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Bloc
*/
MemoryTrackerBlockerInThread temporarily_disable_memory_tracker;
if (index_columns.empty())
index_columns = primary_index_block.cloneEmptyColumns();
/// Write index. The index contains Primary Key value for each `index_granularity` row.
for (const auto & granule : granules_to_write)
{
if (metadata_snapshot->hasPrimaryKey() && granule.mark_on_start)
{
for (size_t j = 0; j < primary_columns_num; ++j)
{
const auto & primary_column = primary_index_block.getByPosition(j);
index_columns[j]->insertFrom(*primary_column.column, granule.start_row);
primary_column.type->getDefaultSerialization()->serializeBinary(
*primary_column.column, granule.start_row, compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream, {});
}
}
if (granule.mark_on_start)
calculateAndSerializePrimaryIndexRow(primary_index_block, granule.start_row);
}
}
/// store last index row to write final mark at the end of column
for (size_t j = 0; j < primary_columns_num; ++j)
last_block_index_columns[j] = primary_index_block.getByPosition(j).column;
/// Store block with last index row to write final mark at the end of column
if (with_final_mark)
last_index_block = primary_index_block;
}
void MergeTreeDataPartWriterOnDisk::calculateAndSerializeStatistics(const Block & block)
@ -421,19 +430,14 @@ void MergeTreeDataPartWriterOnDisk::fillPrimaryIndexChecksums(MergeTreeData::Dat
if (index_file_hashing_stream)
{
if (write_final_mark)
if (write_final_mark && last_index_block)
{
for (size_t j = 0; j < index_columns.size(); ++j)
{
const auto & column = *last_block_index_columns[j];
size_t last_row_number = column.size() - 1;
index_columns[j]->insertFrom(column, last_row_number);
index_types[j]->getDefaultSerialization()->serializeBinary(
column, last_row_number, compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream, {});
}
last_block_index_columns.clear();
MemoryTrackerBlockerInThread temporarily_disable_memory_tracker;
calculateAndSerializePrimaryIndexRow(last_index_block, last_index_block.rows() - 1);
}
last_index_block.clear();
if (compress_primary_key)
{
index_source_hashing_stream->finalize();

View File

@ -173,10 +173,10 @@ protected:
std::unique_ptr<HashingWriteBuffer> index_source_hashing_stream;
bool compress_primary_key;
DataTypes index_types;
/// Index columns from the last block
/// It's written to index file in the `writeSuffixAndFinalizePart` method
Columns last_block_index_columns;
/// Last block with index columns.
/// It's written to index file in the `writeSuffixAndFinalizePart` method.
Block last_index_block;
Serializations index_serializations;
bool data_written = false;
@ -193,6 +193,7 @@ private:
void initStatistics();
virtual void fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block) = 0;
void calculateAndSerializePrimaryIndexRow(const Block & index_block, size_t row);
struct ExecutionStatistics
{

View File

@ -36,10 +36,12 @@
#include <base/sort.h>
#include <rocksdb/advanced_options.h>
#include <rocksdb/compression_type.h>
#include <rocksdb/convenience.h>
#include <rocksdb/env.h>
#include <rocksdb/options.h>
#include <rocksdb/statistics.h>
#include <rocksdb/table.h>
#include <rocksdb/convenience.h>
#include <rocksdb/utilities/db_ttl.h>
#include <cstddef>
@ -428,6 +430,7 @@ void StorageEmbeddedRocksDB::initDB()
rocksdb::Options base;
base.create_if_missing = true;
base.compression = rocksdb::CompressionType::kZSTD;
base.statistics = rocksdb::CreateDBStatistics();
/// It is too verbose by default, and in fact we don't care about rocksdb logs at all.
base.info_log_level = rocksdb::ERROR_LEVEL;
@ -439,7 +442,7 @@ void StorageEmbeddedRocksDB::initDB()
if (config.has("rocksdb.options"))
{
auto config_options = getOptionsFromConfig(config, "rocksdb.options");
status = rocksdb::GetDBOptionsFromMap(merged, config_options, &merged);
status = rocksdb::GetDBOptionsFromMap({}, merged, config_options, &merged);
if (!status.ok())
{
throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from 'rocksdb.options' at: {}: {}",
@ -449,7 +452,7 @@ void StorageEmbeddedRocksDB::initDB()
if (config.has("rocksdb.column_family_options"))
{
auto column_family_options = getOptionsFromConfig(config, "rocksdb.column_family_options");
status = rocksdb::GetColumnFamilyOptionsFromMap(merged, column_family_options, &merged);
status = rocksdb::GetColumnFamilyOptionsFromMap({}, merged, column_family_options, &merged);
if (!status.ok())
{
throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from 'rocksdb.column_family_options' at: {}: {}",
@ -459,7 +462,7 @@ void StorageEmbeddedRocksDB::initDB()
if (config.has("rocksdb.block_based_table_options"))
{
auto block_based_table_options = getOptionsFromConfig(config, "rocksdb.block_based_table_options");
status = rocksdb::GetBlockBasedTableOptionsFromMap(table_options, block_based_table_options, &table_options);
status = rocksdb::GetBlockBasedTableOptionsFromMap({}, table_options, block_based_table_options, &table_options);
if (!status.ok())
{
throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from 'rocksdb.block_based_table_options' at: {}: {}",
@ -484,7 +487,7 @@ void StorageEmbeddedRocksDB::initDB()
if (config.has(config_key))
{
auto table_config_options = getOptionsFromConfig(config, config_key);
status = rocksdb::GetDBOptionsFromMap(merged, table_config_options, &merged);
status = rocksdb::GetDBOptionsFromMap({}, merged, table_config_options, &merged);
if (!status.ok())
{
throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from '{}' at: {}: {}",
@ -496,7 +499,7 @@ void StorageEmbeddedRocksDB::initDB()
if (config.has(config_key))
{
auto table_column_family_options = getOptionsFromConfig(config, config_key);
status = rocksdb::GetColumnFamilyOptionsFromMap(merged, table_column_family_options, &merged);
status = rocksdb::GetColumnFamilyOptionsFromMap({}, merged, table_column_family_options, &merged);
if (!status.ok())
{
throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from '{}' at: {}: {}",
@ -508,7 +511,7 @@ void StorageEmbeddedRocksDB::initDB()
if (config.has(config_key))
{
auto block_based_table_options = getOptionsFromConfig(config, config_key);
status = rocksdb::GetBlockBasedTableOptionsFromMap(table_options, block_based_table_options, &table_options);
status = rocksdb::GetBlockBasedTableOptionsFromMap({}, table_options, block_based_table_options, &table_options);
if (!status.ok())
{
throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from '{}' at: {}: {}",

View File

@ -41,6 +41,14 @@ ColumnsDescription StorageSystemRocksDB::getColumnsDescription()
}
Block StorageSystemRocksDB::getFilterSampleBlock() const
{
return {
{ {}, std::make_shared<DataTypeString>(), "database" },
{ {}, std::make_shared<DataTypeString>(), "table" },
};
}
void StorageSystemRocksDB::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8>) const
{
const auto access = context->getAccess();

View File

@ -22,6 +22,7 @@ protected:
using IStorageSystemOneBlock::IStorageSystemOneBlock;
void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8>) const override;
Block getFilterSampleBlock() const override;
};
}

View File

@ -275,7 +275,7 @@ public:
private:
std::shared_ptr<StorageMergeTreeIndex> storage;
Poco::Logger * log;
const ActionsDAG::Node * predicate = nullptr;
ExpressionActionsPtr virtual_columns_filter;
};
void ReadFromMergeTreeIndex::applyFilters(ActionDAGNodes added_filter_nodes)
@ -283,7 +283,16 @@ void ReadFromMergeTreeIndex::applyFilters(ActionDAGNodes added_filter_nodes)
SourceStepWithFilter::applyFilters(std::move(added_filter_nodes));
if (filter_actions_dag)
predicate = filter_actions_dag->getOutputs().at(0);
{
Block block_to_filter
{
{ {}, std::make_shared<DataTypeString>(), StorageMergeTreeIndex::part_name_column.name },
};
auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), &block_to_filter);
if (dag)
virtual_columns_filter = VirtualColumnUtils::buildFilterExpression(std::move(*dag), context);
}
}
void StorageMergeTreeIndex::read(
@ -335,7 +344,7 @@ void StorageMergeTreeIndex::read(
void ReadFromMergeTreeIndex::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
{
auto filtered_parts = storage->getFilteredDataParts(predicate, context);
auto filtered_parts = storage->getFilteredDataParts(virtual_columns_filter);
LOG_DEBUG(log, "Reading index{}from {} parts of table {}",
storage->with_marks ? " with marks " : " ",
@ -345,9 +354,9 @@ void ReadFromMergeTreeIndex::initializePipeline(QueryPipelineBuilder & pipeline,
pipeline.init(Pipe(std::make_shared<MergeTreeIndexSource>(getOutputStream().header, storage->key_sample_block, std::move(filtered_parts), context, storage->with_marks)));
}
MergeTreeData::DataPartsVector StorageMergeTreeIndex::getFilteredDataParts(const ActionsDAG::Node * predicate, const ContextPtr & context) const
MergeTreeData::DataPartsVector StorageMergeTreeIndex::getFilteredDataParts(const ExpressionActionsPtr & virtual_columns_filter) const
{
if (!predicate)
if (!virtual_columns_filter)
return data_parts;
auto all_part_names = ColumnString::create();
@ -355,7 +364,7 @@ MergeTreeData::DataPartsVector StorageMergeTreeIndex::getFilteredDataParts(const
all_part_names->insert(part->name);
Block filtered_block{{std::move(all_part_names), std::make_shared<DataTypeString>(), part_name_column.name}};
VirtualColumnUtils::filterBlockWithPredicate(predicate, filtered_block, context);
VirtualColumnUtils::filterBlockWithExpression(virtual_columns_filter, filtered_block);
if (!filtered_block.rows())
return {};

View File

@ -36,7 +36,7 @@ public:
private:
friend class ReadFromMergeTreeIndex;
MergeTreeData::DataPartsVector getFilteredDataParts(const ActionsDAG::Node * predicate, const ContextPtr & context) const;
MergeTreeData::DataPartsVector getFilteredDataParts(const ExpressionActionsPtr & virtual_columns_filter) const;
StoragePtr source_table;
bool with_marks;

View File

@ -5,6 +5,7 @@
// #include <Storages/IStorage.h>
#include <Storages/SelectQueryInfo.h>
#include <Storages/System/getQueriedColumnsMaskAndHeader.h>
#include <Storages/VirtualColumnUtils.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
#include <Processors/QueryPlan/QueryPlan.h>
#include <Processors/QueryPlan/SourceStepWithFilter.h>
@ -44,7 +45,7 @@ public:
private:
std::shared_ptr<IStorageSystemOneBlock> storage;
std::vector<UInt8> columns_mask;
const ActionsDAG::Node * predicate = nullptr;
std::optional<ActionsDAG> filter;
};
void IStorageSystemOneBlock::read(
@ -79,8 +80,9 @@ void IStorageSystemOneBlock::read(
void ReadFromSystemOneBlock::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
{
const auto & sample_block = getOutputStream().header;
const Block & sample_block = getOutputStream().header;
MutableColumns res_columns = sample_block.cloneEmptyColumns();
const ActionsDAG::Node * predicate = filter ? filter->getOutputs().at(0) : nullptr;
storage->fillData(res_columns, context, predicate, std::move(columns_mask));
UInt64 num_rows = res_columns.at(0)->size();
@ -93,8 +95,18 @@ void ReadFromSystemOneBlock::applyFilters(ActionDAGNodes added_filter_nodes)
{
SourceStepWithFilter::applyFilters(std::move(added_filter_nodes));
if (filter_actions_dag)
predicate = filter_actions_dag->getOutputs().at(0);
if (!filter_actions_dag)
return;
Block sample = storage->getFilterSampleBlock();
if (sample.columns() == 0)
return;
filter = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), &sample);
/// Must prepare sets here, initializePipeline() would be too late, see comment on FutureSetFromSubquery.
if (filter)
VirtualColumnUtils::buildSetsForDAG(*filter, context);
}
}

View File

@ -22,8 +22,16 @@ class Context;
class IStorageSystemOneBlock : public IStorage
{
protected:
/// If this method uses `predicate`, getFilterSampleBlock() must list all columns to which
/// it's applied. (Otherwise there'll be a LOGICAL_ERROR "Not-ready Set is passed" on subqueries.)
virtual void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8> columns_mask) const = 0;
/// Columns to which fillData() applies the `predicate`.
virtual Block getFilterSampleBlock() const
{
return {};
}
virtual bool supportsColumnsMask() const { return false; }
friend class ReadFromSystemOneBlock;

View File

@ -338,7 +338,7 @@ private:
std::shared_ptr<StorageSystemColumns> storage;
std::vector<UInt8> columns_mask;
const size_t max_block_size;
const ActionsDAG::Node * predicate = nullptr;
std::optional<ActionsDAG> virtual_columns_filter;
};
void ReadFromSystemColumns::applyFilters(ActionDAGNodes added_filter_nodes)
@ -346,7 +346,17 @@ void ReadFromSystemColumns::applyFilters(ActionDAGNodes added_filter_nodes)
SourceStepWithFilter::applyFilters(std::move(added_filter_nodes));
if (filter_actions_dag)
predicate = filter_actions_dag->getOutputs().at(0);
{
Block block_to_filter;
block_to_filter.insert(ColumnWithTypeAndName(ColumnString::create(), std::make_shared<DataTypeString>(), "database"));
block_to_filter.insert(ColumnWithTypeAndName(ColumnString::create(), std::make_shared<DataTypeString>(), "table"));
virtual_columns_filter = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), &block_to_filter);
/// Must prepare sets here, initializePipeline() would be too late, see comment on FutureSetFromSubquery.
if (virtual_columns_filter)
VirtualColumnUtils::buildSetsForDAG(*virtual_columns_filter, context);
}
}
void StorageSystemColumns::read(
@ -408,7 +418,8 @@ void ReadFromSystemColumns::initializePipeline(QueryPipelineBuilder & pipeline,
block_to_filter.insert(ColumnWithTypeAndName(std::move(database_column_mut), std::make_shared<DataTypeString>(), "database"));
/// Filter block with `database` column.
VirtualColumnUtils::filterBlockWithPredicate(predicate, block_to_filter, context);
if (virtual_columns_filter)
VirtualColumnUtils::filterBlockWithPredicate(virtual_columns_filter->getOutputs().at(0), block_to_filter, context);
if (!block_to_filter.rows())
{
@ -456,7 +467,8 @@ void ReadFromSystemColumns::initializePipeline(QueryPipelineBuilder & pipeline,
}
/// Filter block with `database` and `table` columns.
VirtualColumnUtils::filterBlockWithPredicate(predicate, block_to_filter, context);
if (virtual_columns_filter)
VirtualColumnUtils::filterBlockWithPredicate(virtual_columns_filter->getOutputs().at(0), block_to_filter, context);
if (!block_to_filter.rows())
{

View File

@ -214,7 +214,7 @@ private:
std::shared_ptr<StorageSystemDataSkippingIndices> storage;
std::vector<UInt8> columns_mask;
const size_t max_block_size;
const ActionsDAG::Node * predicate = nullptr;
ExpressionActionsPtr virtual_columns_filter;
};
void ReadFromSystemDataSkippingIndices::applyFilters(ActionDAGNodes added_filter_nodes)
@ -222,7 +222,16 @@ void ReadFromSystemDataSkippingIndices::applyFilters(ActionDAGNodes added_filter
SourceStepWithFilter::applyFilters(std::move(added_filter_nodes));
if (filter_actions_dag)
predicate = filter_actions_dag->getOutputs().at(0);
{
Block block_to_filter
{
{ ColumnString::create(), std::make_shared<DataTypeString>(), "database" },
};
auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), &block_to_filter);
if (dag)
virtual_columns_filter = VirtualColumnUtils::buildFilterExpression(std::move(*dag), context);
}
}
void StorageSystemDataSkippingIndices::read(
@ -268,7 +277,8 @@ void ReadFromSystemDataSkippingIndices::initializePipeline(QueryPipelineBuilder
/// Condition on "database" in a query acts like an index.
Block block { ColumnWithTypeAndName(std::move(column), std::make_shared<DataTypeString>(), "database") };
VirtualColumnUtils::filterBlockWithPredicate(predicate, block, context);
if (virtual_columns_filter)
VirtualColumnUtils::filterBlockWithExpression(virtual_columns_filter, block);
ColumnPtr & filtered_databases = block.getByPosition(0).column;
pipeline.init(Pipe(std::make_shared<DataSkippingIndicesSource>(

View File

@ -73,6 +73,14 @@ static String getEngineFull(const ContextPtr & ctx, const DatabasePtr & database
return engine_full;
}
Block StorageSystemDatabases::getFilterSampleBlock() const
{
return {
{ {}, std::make_shared<DataTypeString>(), "engine" },
{ {}, std::make_shared<DataTypeUUID>(), "uuid" },
};
}
static ColumnPtr getFilteredDatabases(const Databases & databases, const ActionsDAG::Node * predicate, ContextPtr context)
{
MutableColumnPtr name_column = ColumnString::create();

View File

@ -27,6 +27,7 @@ protected:
bool supportsColumnsMask() const override { return true; }
void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8> columns_mask) const override;
Block getFilterSampleBlock() const override;
};
}

View File

@ -107,6 +107,13 @@ ColumnsDescription StorageSystemDistributionQueue::getColumnsDescription()
};
}
Block StorageSystemDistributionQueue::getFilterSampleBlock() const
{
return {
{ {}, std::make_shared<DataTypeString>(), "database" },
{ {}, std::make_shared<DataTypeString>(), "table" },
};
}
void StorageSystemDistributionQueue::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8>) const
{

View File

@ -22,6 +22,7 @@ protected:
using IStorageSystemOneBlock::IStorageSystemOneBlock;
void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8>) const override;
Block getFilterSampleBlock() const override;
};
}

View File

@ -46,6 +46,13 @@ ColumnsDescription StorageSystemMutations::getColumnsDescription()
};
}
Block StorageSystemMutations::getFilterSampleBlock() const
{
return {
{ {}, std::make_shared<DataTypeString>(), "database" },
{ {}, std::make_shared<DataTypeString>(), "table" },
};
}
void StorageSystemMutations::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8>) const
{

View File

@ -22,6 +22,7 @@ protected:
using IStorageSystemOneBlock::IStorageSystemOneBlock;
void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8>) const override;
Block getFilterSampleBlock() const override;
};
}

View File

@ -43,6 +43,14 @@ ColumnsDescription StorageSystemPartMovesBetweenShards::getColumnsDescription()
}
Block StorageSystemPartMovesBetweenShards::getFilterSampleBlock() const
{
return {
{ {}, std::make_shared<DataTypeString>(), "database" },
{ {}, std::make_shared<DataTypeString>(), "table" },
};
}
void StorageSystemPartMovesBetweenShards::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8>) const
{
const auto access = context->getAccess();

View File

@ -20,6 +20,7 @@ protected:
using IStorageSystemOneBlock::IStorageSystemOneBlock;
void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8>) const override;
Block getFilterSampleBlock() const override;
};
}

View File

@ -285,7 +285,7 @@ private:
const bool with_zk_fields;
const size_t max_block_size;
std::shared_ptr<StorageSystemReplicasImpl> impl;
const ActionsDAG::Node * predicate = nullptr;
ExpressionActionsPtr virtual_columns_filter;
};
void ReadFromSystemReplicas::applyFilters(ActionDAGNodes added_filter_nodes)
@ -293,7 +293,18 @@ void ReadFromSystemReplicas::applyFilters(ActionDAGNodes added_filter_nodes)
SourceStepWithFilter::applyFilters(std::move(added_filter_nodes));
if (filter_actions_dag)
predicate = filter_actions_dag->getOutputs().at(0);
{
Block block_to_filter
{
{ ColumnString::create(), std::make_shared<DataTypeString>(), "database" },
{ ColumnString::create(), std::make_shared<DataTypeString>(), "table" },
{ ColumnString::create(), std::make_shared<DataTypeString>(), "engine" },
};
auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), &block_to_filter);
if (dag)
virtual_columns_filter = VirtualColumnUtils::buildFilterExpression(std::move(*dag), context);
}
}
void StorageSystemReplicas::read(
@ -430,7 +441,8 @@ void ReadFromSystemReplicas::initializePipeline(QueryPipelineBuilder & pipeline,
{ col_engine, std::make_shared<DataTypeString>(), "engine" },
};
VirtualColumnUtils::filterBlockWithPredicate(predicate, filtered_block, context);
if (virtual_columns_filter)
VirtualColumnUtils::filterBlockWithExpression(virtual_columns_filter, filtered_block);
if (!filtered_block.rows())
{

View File

@ -62,6 +62,14 @@ ColumnsDescription StorageSystemReplicationQueue::getColumnsDescription()
}
Block StorageSystemReplicationQueue::getFilterSampleBlock() const
{
return {
{ {}, std::make_shared<DataTypeString>(), "database" },
{ {}, std::make_shared<DataTypeString>(), "table" },
};
}
void StorageSystemReplicationQueue::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8>) const
{
const auto access = context->getAccess();

View File

@ -21,6 +21,7 @@ public:
protected:
using IStorageSystemOneBlock::IStorageSystemOneBlock;
void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8>) const override;
Block getFilterSampleBlock() const override;
};
}

View File

@ -275,8 +275,7 @@ bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node)
static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
const ActionsDAG::Node * node,
const Block * allowed_inputs,
ActionsDAG::Nodes & additional_nodes,
bool allow_non_deterministic_functions)
ActionsDAG::Nodes & additional_nodes)
{
if (node->type == ActionsDAG::ActionType::FUNCTION)
{
@ -285,14 +284,8 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
auto & node_copy = additional_nodes.emplace_back(*node);
node_copy.children.clear();
for (const auto * child : node->children)
if (const auto * child_copy = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes, allow_non_deterministic_functions))
if (const auto * child_copy = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes))
node_copy.children.push_back(child_copy);
/// Expression like (now_allowed AND allowed) is not allowed if allow_non_deterministic_functions = true. This is important for
/// trivial count optimization, otherwise we can get incorrect results. For example, if the query is
/// SELECT count() FROM table WHERE _partition_id = '0' AND rowNumberInBlock() = 1, we cannot apply
/// trivial count.
else if (!allow_non_deterministic_functions)
return nullptr;
if (node_copy.children.empty())
return nullptr;
@ -318,7 +311,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
{
auto & node_copy = additional_nodes.emplace_back(*node);
for (auto & child : node_copy.children)
if (child = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes, allow_non_deterministic_functions); !child)
if (child = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes); !child)
return nullptr;
return &node_copy;
@ -332,7 +325,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
auto index_hint_dag = index_hint->getActions().clone();
ActionsDAG::NodeRawConstPtrs atoms;
for (const auto & output : index_hint_dag.getOutputs())
if (const auto * child_copy = splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes, allow_non_deterministic_functions))
if (const auto * child_copy = splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes))
atoms.push_back(child_copy);
if (!atoms.empty())
@ -366,13 +359,13 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
return node;
}
std::optional<ActionsDAG> splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs, bool allow_non_deterministic_functions)
std::optional<ActionsDAG> splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs)
{
if (!predicate)
return {};
ActionsDAG::Nodes additional_nodes;
const auto * res = splitFilterNodeForAllowedInputs(predicate, allowed_inputs, additional_nodes, allow_non_deterministic_functions);
const auto * res = splitFilterNodeForAllowedInputs(predicate, allowed_inputs, additional_nodes);
if (!res)
return {};
@ -381,7 +374,7 @@ std::optional<ActionsDAG> splitFilterDagForAllowedInputs(const ActionsDAG::Node
void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context)
{
auto dag = splitFilterDagForAllowedInputs(predicate, &block, /*allow_non_deterministic_functions=*/ false);
auto dag = splitFilterDagForAllowedInputs(predicate, &block);
if (dag)
filterBlockWithExpression(buildFilterExpression(std::move(*dag), context), block);
}

View File

@ -18,8 +18,16 @@ class NamesAndTypesList;
namespace VirtualColumnUtils
{
/// Similar to filterBlockWithQuery, but uses ActionsDAG as a predicate.
/// Basically it is filterBlockWithDAG(splitFilterDagForAllowedInputs).
/// The filtering functions are tricky to use correctly.
/// There are 2 ways:
/// 1. Call filterBlockWithPredicate() or filterBlockWithExpression() inside SourceStepWithFilter::applyFilters().
/// 2. Call splitFilterDagForAllowedInputs() and buildSetsForDAG() inside SourceStepWithFilter::applyFilters().
/// Then call filterBlockWithPredicate() or filterBlockWithExpression() in initializePipeline().
///
/// Otherwise calling filter*() outside applyFilters() will throw "Not-ready Set is passed"
/// if there are subqueries.
/// Similar to filterBlockWithExpression(buildFilterExpression(splitFilterDagForAllowedInputs(...))).
void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context);
/// Just filters block. Block should contain all the required columns.
@ -33,15 +41,7 @@ void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context);
bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node);
/// Extract a part of predicate that can be evaluated using only columns from input_names.
/// When allow_non_deterministic_functions is true then even if the predicate contains non-deterministic
/// functions, we still allow to extract a part of the predicate, otherwise we return nullptr.
/// allow_non_deterministic_functions must be false when we are going to use the result to filter parts in
/// MergeTreeData::totalRowsByPartitionPredicateImp. For example, if the query is
/// `SELECT count() FROM table WHERE _partition_id = '0' AND rowNumberInBlock() = 1`
/// The predicate will be `_partition_id = '0' AND rowNumberInBlock() = 1`, and `rowNumberInBlock()` is
/// non-deterministic. If we still extract the part `_partition_id = '0'` for filtering parts, then trivial
/// count optimization will be mistakenly applied to the query.
std::optional<ActionsDAG> splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs, bool allow_non_deterministic_functions = true);
std::optional<ActionsDAG> splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs);
/// Extract from the input stream a set of `name` column values
template <typename T>

View File

@ -8,7 +8,6 @@ from pathlib import Path
from typing import List, Sequence, Tuple
from ci_config import CI
from ci_utils import normalize_string
from env_helper import TEMP_PATH
from functional_test_check import NO_CHANGES_MSG
from report import (
@ -142,7 +141,9 @@ def main():
for file in set(jr.additional_files):
file_ = Path(file)
file_name = file_.name
file_name = file_name.replace(".", "__" + normalize_string(job_id) + ".", 1)
file_name = file_name.replace(
".", "__" + CI.Utils.normalize_string(job_id) + ".", 1
)
file_ = file_.rename(file_.parent / file_name)
additional_files.append(file_)

View File

@ -16,7 +16,7 @@ import upload_result_helper
from build_check import get_release_or_pr
from ci_config import CI
from ci_metadata import CiMetadata
from ci_utils import GH, normalize_string, Utils
from ci_utils import GH, Utils
from clickhouse_helper import (
CiLogsCredentials,
ClickHouseHelper,
@ -296,7 +296,7 @@ def _pre_action(s3, job_name, batch, indata, pr_info):
# do not set report prefix for scheduled or dispatched wf (in case it started from feature branch while
# testing), otherwise reports won't be found
if not (pr_info.is_scheduled or pr_info.is_dispatched):
report_prefix = normalize_string(pr_info.head_ref)
report_prefix = Utils.normalize_string(pr_info.head_ref)
print(
f"Use report prefix [{report_prefix}], pr_num [{pr_info.number}], head_ref [{pr_info.head_ref}]"
)
@ -718,7 +718,7 @@ def _upload_build_artifacts(
(
get_release_or_pr(pr_info, get_version_from_repo())[1],
pr_info.sha,
normalize_string(build_name),
Utils.normalize_string(build_name),
"performance.tar.zst",
)
)
@ -1250,7 +1250,7 @@ def main() -> int:
(
get_release_or_pr(pr_info, get_version_from_repo())[0],
pr_info.sha,
normalize_string(
Utils.normalize_string(
job_report.check_name or _get_ext_check_name(args.job_name)
),
)

View File

@ -7,7 +7,7 @@ from typing import Dict, Optional, Any, Union, Sequence, List, Set
from ci_config import CI
from ci_utils import is_hex, GH
from ci_utils import Utils, GH
from commit_status_helper import CommitStatusData
from env_helper import (
TEMP_PATH,
@ -240,7 +240,7 @@ class CiCache:
int(job_properties[-1]),
)
if not is_hex(job_digest):
if not Utils.is_hex(job_digest):
print("ERROR: wrong record job digest")
return None

View File

@ -3,7 +3,7 @@ import re
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
from typing import Dict, Optional, List
from ci_utils import normalize_string
from ci_utils import Utils
from ci_definitions import *
@ -13,7 +13,6 @@ class CI:
each config item in the below dicts should be an instance of JobConfig class or inherited from it
"""
MAX_TOTAL_FAILURES_BEFORE_BLOCKING_CI = 5
MAX_TOTAL_FAILURES_PER_JOB_BEFORE_BLOCKING_CI = 2
# reimport types to CI class so that they visible as CI.* and mypy is happy
@ -21,12 +20,10 @@ class CI:
from ci_definitions import BuildConfig as BuildConfig
from ci_definitions import DigestConfig as DigestConfig
from ci_definitions import JobConfig as JobConfig
from ci_definitions import CheckDescription as CheckDescription
from ci_definitions import Tags as Tags
from ci_definitions import JobNames as JobNames
from ci_definitions import BuildNames as BuildNames
from ci_definitions import StatusNames as StatusNames
from ci_definitions import CHECK_DESCRIPTIONS as CHECK_DESCRIPTIONS
from ci_definitions import REQUIRED_CHECKS as REQUIRED_CHECKS
from ci_definitions import SyncState as SyncState
from ci_definitions import MQ_JOBS as MQ_JOBS
@ -37,9 +34,7 @@ class CI:
from ci_utils import GH as GH
from ci_utils import Shell as Shell
from ci_definitions import Labels as Labels
from ci_definitions import TRUSTED_CONTRIBUTORS as TRUSTED_CONTRIBUTORS
from ci_definitions import WorkFlowNames as WorkFlowNames
from ci_utils import CATEGORY_TO_LABEL as CATEGORY_TO_LABEL
# Jobs that run for doc related updates
_DOCS_CHECK_JOBS = [JobNames.DOCS_CHECK, JobNames.STYLE_CHECK]
@ -558,7 +553,7 @@ class CI:
@classmethod
def get_tag_config(cls, label_name: str) -> Optional[LabelConfig]:
for label, config in cls.TAG_CONFIGS.items():
if normalize_string(label_name) == normalize_string(label):
if Utils.normalize_string(label_name) == Utils.normalize_string(label):
return config
return None
@ -687,6 +682,34 @@ class CI:
assert res, f"not a build [{build_name}] or invalid JobConfig"
return res
@classmethod
def is_workflow_ok(cls) -> bool:
# TODO: temporary method to make Mergeable check working
res = cls.GH.get_workflow_results()
if not res:
print("ERROR: no workflow results found")
return False
for workflow_job, workflow_data in res.items():
status = workflow_data["result"]
if status in (
cls.GH.ActionStatuses.SUCCESS,
cls.GH.ActionStatuses.SKIPPED,
):
print(f"Workflow status for [{workflow_job}] is [{status}] - continue")
elif status in (cls.GH.ActionStatuses.FAILURE,):
if workflow_job in (
WorkflowStages.TESTS_2,
WorkflowStages.TESTS_2_WW,
):
print(
f"Failed Workflow status for [{workflow_job}], it's not required - continue"
)
continue
print(f"Failed Workflow status for [{workflow_job}]")
return False
return True
if __name__ == "__main__":
parser = ArgumentParser(

View File

@ -1,7 +1,7 @@
import copy
from dataclasses import dataclass, field
from pathlib import Path
from typing import Callable, List, Union, Iterable, Optional, Literal, Any
from typing import List, Union, Iterable, Optional, Literal, Any
from ci_utils import WithIter
from integration_test_images import IMAGES
@ -32,28 +32,6 @@ class Labels:
AUTO_BACKPORT = {"pr-critical-bugfix"}
TRUSTED_CONTRIBUTORS = {
e.lower()
for e in [
"amosbird",
"azat", # SEMRush
"bharatnc", # Many contributions.
"cwurm", # ClickHouse, Inc
"den-crane", # Documentation contributor
"ildus", # adjust, ex-pgpro
"nvartolomei", # Seasoned contributor, CloudFlare
"taiyang-li",
"ucasFL", # Amos Bird's friend
"thomoco", # ClickHouse, Inc
"tonickkozlov", # Cloudflare
"tylerhannan", # ClickHouse, Inc
"tsolodov", # ClickHouse, Inc
"justindeguzman", # ClickHouse, Inc
"XuJia0210", # ClickHouse, Inc
]
}
class WorkflowStages(metaclass=WithIter):
"""
Stages of GitHUb actions workflow
@ -668,188 +646,3 @@ MQ_JOBS = [
BuildNames.BINARY_RELEASE,
JobNames.UNIT_TEST,
]
@dataclass
class CheckDescription:
name: str
description: str # the check descriptions, will be put into the status table
match_func: Callable[[str], bool] # the function to check vs the commit status
def __hash__(self) -> int:
return hash(self.name + self.description)
CHECK_DESCRIPTIONS = [
CheckDescription(
StatusNames.PR_CHECK,
"Checks correctness of the PR's body",
lambda x: x == "PR Check",
),
CheckDescription(
StatusNames.SYNC,
"If it fails, ask a maintainer for help",
lambda x: x == StatusNames.SYNC,
),
CheckDescription(
"AST fuzzer",
"Runs randomly generated queries to catch program errors. "
"The build type is optionally given in parenthesis. "
"If it fails, ask a maintainer for help",
lambda x: x.startswith("AST fuzzer"),
),
CheckDescription(
JobNames.BUGFIX_VALIDATE,
"Checks that either a new test (functional or integration) or there "
"some changed tests that fail with the binary built on master branch",
lambda x: x == JobNames.BUGFIX_VALIDATE,
),
CheckDescription(
StatusNames.CI,
"A meta-check that indicates the running CI. Normally, it's in <b>success</b> or "
"<b>pending</b> state. The failed status indicates some problems with the PR",
lambda x: x == "CI running",
),
CheckDescription(
"Builds",
"Builds ClickHouse in various configurations for use in further steps. "
"You have to fix the builds that fail. Build logs often has enough "
"information to fix the error, but you might have to reproduce the failure "
"locally. The <b>cmake</b> options can be found in the build log, grepping for "
'<b>cmake</b>. Use these options and follow the <a href="'
'https://clickhouse.com/docs/en/development/build">general build process</a>',
lambda x: x.startswith("ClickHouse") and x.endswith("build check"),
),
CheckDescription(
"Compatibility check",
"Checks that <b>clickhouse</b> binary runs on distributions with old libc "
"versions. If it fails, ask a maintainer for help",
lambda x: x.startswith("Compatibility check"),
),
CheckDescription(
JobNames.DOCKER_SERVER,
"The check to build and optionally push the mentioned image to docker hub",
lambda x: x.startswith("Docker server"),
),
CheckDescription(
JobNames.DOCKER_KEEPER,
"The check to build and optionally push the mentioned image to docker hub",
lambda x: x.startswith("Docker keeper"),
),
CheckDescription(
JobNames.DOCS_CHECK,
"Builds and tests the documentation",
lambda x: x == JobNames.DOCS_CHECK,
),
CheckDescription(
JobNames.FAST_TEST,
"Normally this is the first check that is ran for a PR. It builds ClickHouse "
'and runs most of <a href="https://clickhouse.com/docs/en/development/tests'
'#functional-tests">stateless functional tests</a>, '
"omitting some. If it fails, further checks are not started until it is fixed. "
"Look at the report to see which tests fail, then reproduce the failure "
'locally as described <a href="https://clickhouse.com/docs/en/development/'
'tests#functional-test-locally">here</a>',
lambda x: x == JobNames.FAST_TEST,
),
CheckDescription(
"Flaky tests",
"Checks if new added or modified tests are flaky by running them repeatedly, "
"in parallel, with more randomization. Functional tests are run 100 times "
"with address sanitizer, and additional randomization of thread scheduling. "
"Integration tests are run up to 10 times. If at least once a new test has "
"failed, or was too long, this check will be red. We don't allow flaky tests, "
'read <a href="https://clickhouse.com/blog/decorating-a-christmas-tree-with-'
'the-help-of-flaky-tests/">the doc</a>',
lambda x: "tests flaky check" in x,
),
CheckDescription(
"Install packages",
"Checks that the built packages are installable in a clear environment",
lambda x: x.startswith("Install packages ("),
),
CheckDescription(
"Integration tests",
"The integration tests report. In parenthesis the package type is given, "
"and in square brackets are the optional part/total tests",
lambda x: x.startswith("Integration tests ("),
),
CheckDescription(
StatusNames.MERGEABLE,
"Checks if all other necessary checks are successful",
lambda x: x == StatusNames.MERGEABLE,
),
CheckDescription(
"Performance Comparison",
"Measure changes in query performance. The performance test report is "
'described in detail <a href="https://github.com/ClickHouse/ClickHouse/tree'
'/master/docker/test/performance-comparison#how-to-read-the-report">here</a>. '
"In square brackets are the optional part/total tests",
lambda x: x.startswith("Performance Comparison"),
),
CheckDescription(
"Push to Dockerhub",
"The check for building and pushing the CI related docker images to docker hub",
lambda x: x.startswith("Push") and "to Dockerhub" in x,
),
CheckDescription(
"Sqllogic",
"Run clickhouse on the "
'<a href="https://www.sqlite.org/sqllogictest">sqllogic</a> '
"test set against sqlite and checks that all statements are passed",
lambda x: x.startswith("Sqllogic test"),
),
CheckDescription(
"SQLancer",
"Fuzzing tests that detect logical bugs with "
'<a href="https://github.com/sqlancer/sqlancer">SQLancer</a> tool',
lambda x: x.startswith("SQLancer"),
),
CheckDescription(
"Stateful tests",
"Runs stateful functional tests for ClickHouse binaries built in various "
"configurations -- release, debug, with sanitizers, etc",
lambda x: x.startswith("Stateful tests ("),
),
CheckDescription(
"Stateless tests",
"Runs stateless functional tests for ClickHouse binaries built in various "
"configurations -- release, debug, with sanitizers, etc",
lambda x: x.startswith("Stateless tests ("),
),
CheckDescription(
"Stress test",
"Runs stateless functional tests concurrently from several clients to detect "
"concurrency-related errors",
lambda x: x.startswith("Stress test ("),
),
CheckDescription(
JobNames.STYLE_CHECK,
"Runs a set of checks to keep the code style clean. If some of tests failed, "
"see the related log from the report",
lambda x: x == JobNames.STYLE_CHECK,
),
CheckDescription(
"Unit tests",
"Runs the unit tests for different release types",
lambda x: x.startswith("Unit tests ("),
),
CheckDescription(
"Upgrade check",
"Runs stress tests on server version from last release and then tries to "
"upgrade it to the version from the PR. It checks if the new server can "
"successfully startup without any errors, crashes or sanitizer asserts",
lambda x: x.startswith("Upgrade check ("),
),
CheckDescription(
"ClickBench",
"Runs [ClickBench](https://github.com/ClickHouse/ClickBench/) with instant-attach table",
lambda x: x.startswith("ClickBench"),
),
CheckDescription(
"Fallback for unknown",
"There's no description for the check yet, please add it to "
"tests/ci/ci_config.py:CHECK_DESCRIPTIONS",
lambda x: True,
),
]

View File

@ -2,7 +2,6 @@ import re
from dataclasses import dataclass, asdict
from typing import Optional, List, Dict, Any, Iterable
from ci_utils import normalize_string
from ci_config import CI
from git_helper import Runner as GitRunner, GIT_PREFIX
from pr_info import PRInfo
@ -89,14 +88,14 @@ class CiSettings:
if not res.include_keywords:
res.include_keywords = []
res.include_keywords.append(
normalize_string(match.removeprefix("ci_include_"))
CI.Utils.normalize_string(match.removeprefix("ci_include_"))
)
elif match.startswith("ci_exclude_"):
if not res.exclude_keywords:
res.exclude_keywords = []
keywords = match.removeprefix("ci_exclude_").split("|")
res.exclude_keywords += [
normalize_string(keyword) for keyword in keywords
CI.Utils.normalize_string(keyword) for keyword in keywords
]
elif match == CI.Tags.NO_CI_CACHE:
res.no_ci_cache = True
@ -163,7 +162,7 @@ class CiSettings:
# do not exclude builds
if self.exclude_keywords and not CI.is_build_job(job):
for keyword in self.exclude_keywords:
if keyword in normalize_string(job):
if keyword in CI.Utils.normalize_string(job):
print(f"Job [{job}] matches Exclude keyword [{keyword}] - deny")
return False
@ -174,7 +173,7 @@ class CiSettings:
# never exclude Style Check by include keywords
return True
for keyword in self.include_keywords:
if keyword in normalize_string(job):
if keyword in CI.Utils.normalize_string(job):
print(f"Job [{job}] matches Include keyword [{keyword}] - pass")
return True
to_deny = True

View File

@ -6,7 +6,7 @@ import sys
import time
from contextlib import contextmanager
from pathlib import Path
from typing import Any, Iterator, List, Union, Optional, Sequence, Tuple
from typing import Any, Iterator, List, Union, Optional, Sequence
import requests
@ -20,41 +20,6 @@ class Envs:
GITHUB_WORKFLOW = os.getenv("GITHUB_WORKFLOW", "")
LABEL_CATEGORIES = {
"pr-backward-incompatible": ["Backward Incompatible Change"],
"pr-bugfix": [
"Bug Fix",
"Bug Fix (user-visible misbehavior in an official stable release)",
"Bug Fix (user-visible misbehaviour in official stable or prestable release)",
"Bug Fix (user-visible misbehavior in official stable or prestable release)",
],
"pr-critical-bugfix": ["Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)"],
"pr-build": [
"Build/Testing/Packaging Improvement",
"Build Improvement",
"Build/Testing Improvement",
"Build",
"Packaging Improvement",
],
"pr-documentation": [
"Documentation (changelog entry is not required)",
"Documentation",
],
"pr-feature": ["New Feature"],
"pr-improvement": ["Improvement"],
"pr-not-for-changelog": [
"Not for changelog (changelog entry is not required)",
"Not for changelog",
],
"pr-performance": ["Performance Improvement"],
"pr-ci": ["CI Fix or Improvement (changelog entry is not required)"],
}
CATEGORY_TO_LABEL = {
c: lb for lb, categories in LABEL_CATEGORIES.items() for c in categories
}
class WithIter(type):
def __iter__(cls):
return (v for k, v in cls.__dict__.items() if not k.startswith("_"))
@ -70,21 +35,6 @@ def cd(path: Union[Path, str]) -> Iterator[None]:
os.chdir(oldpwd)
def is_hex(s):
try:
int(s, 16)
return True
except ValueError:
return False
def normalize_string(string: str) -> str:
res = string.lower()
for r in ((" ", "_"), ("(", "_"), (")", "_"), (",", "_"), ("/", "_"), ("-", "_")):
res = res.replace(*r)
return res
class GH:
class ActionsNames:
RunConfig = "RunConfig"
@ -94,9 +44,10 @@ class GH:
FAILURE = "failure"
PENDING = "pending"
SUCCESS = "success"
SKIPPED = "skipped"
@classmethod
def _get_workflow_results(cls):
def get_workflow_results(cls):
if not Path(Envs.WORKFLOW_RESULT_FILE).exists():
print(
f"ERROR: Failed to get workflow results from file [{Envs.WORKFLOW_RESULT_FILE}]"
@ -115,13 +66,13 @@ class GH:
@classmethod
def print_workflow_results(cls):
res = cls._get_workflow_results()
res = cls.get_workflow_results()
results = [f"{job}: {data['result']}" for job, data in res.items()]
cls.print_in_group("Workflow results", results)
@classmethod
def is_workflow_ok(cls) -> bool:
res = cls._get_workflow_results()
res = cls.get_workflow_results()
for _job, data in res.items():
if data["result"] == "failure":
return False
@ -129,7 +80,7 @@ class GH:
@classmethod
def get_workflow_job_result(cls, wf_job_name: str) -> Optional[str]:
res = cls._get_workflow_results()
res = cls.get_workflow_results()
if wf_job_name in res:
return res[wf_job_name]["result"] # type: ignore
else:
@ -149,8 +100,8 @@ class GH:
) -> str:
assert len(token) == 40
assert len(commit_sha) == 40
assert is_hex(commit_sha)
assert not is_hex(token)
assert Utils.is_hex(commit_sha)
assert not Utils.is_hex(token)
url = f"https://api.github.com/repos/{Envs.GITHUB_REPOSITORY}/commits/{commit_sha}/statuses?per_page={200}"
headers = {
"Authorization": f"token {token}",
@ -298,79 +249,23 @@ class Utils:
Shell.check("sudo dmesg --clear", verbose=True)
@staticmethod
def check_pr_description(pr_body: str, repo_name: str) -> Tuple[str, str]:
"""The function checks the body to being properly formatted according to
.github/PULL_REQUEST_TEMPLATE.md, if the first returned string is not empty,
then there is an error."""
lines = list(map(lambda x: x.strip(), pr_body.split("\n") if pr_body else []))
lines = [re.sub(r"\s+", " ", line) for line in lines]
def is_hex(s):
try:
int(s, 16)
return True
except ValueError:
return False
# Check if body contains "Reverts ClickHouse/ClickHouse#36337"
if [
True for line in lines if re.match(rf"\AReverts {repo_name}#[\d]+\Z", line)
]:
return "", LABEL_CATEGORIES["pr-not-for-changelog"][0]
category = ""
entry = ""
description_error = ""
i = 0
while i < len(lines):
if re.match(r"(?i)^[#>*_ ]*change\s*log\s*category", lines[i]):
i += 1
if i >= len(lines):
break
# Can have one empty line between header and the category
# itself. Filter it out.
if not lines[i]:
i += 1
if i >= len(lines):
break
category = re.sub(r"^[-*\s]*", "", lines[i])
i += 1
# Should not have more than one category. Require empty line
# after the first found category.
if i >= len(lines):
break
if lines[i]:
second_category = re.sub(r"^[-*\s]*", "", lines[i])
description_error = (
"More than one changelog category specified: "
f"'{category}', '{second_category}'"
)
return description_error, category
elif re.match(
r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
):
i += 1
# Can have one empty line between header and the entry itself.
# Filter it out.
if i < len(lines) and not lines[i]:
i += 1
# All following lines until empty one are the changelog entry.
entry_lines = []
while i < len(lines) and lines[i]:
entry_lines.append(lines[i])
i += 1
entry = " ".join(entry_lines)
# Don't accept changelog entries like '...'.
entry = re.sub(r"[#>*_.\- ]", "", entry)
# Don't accept changelog entries like 'Close #12345'.
entry = re.sub(r"^[\w\-\s]{0,10}#?\d{5,6}\.?$", "", entry)
else:
i += 1
if not category:
description_error = "Changelog category is empty"
# Filter out the PR categories that are not for changelog.
elif "(changelog entry is not required)" in category:
pass # to not check the rest of the conditions
elif category not in CATEGORY_TO_LABEL:
description_error, category = f"Category '{category}' is not valid", ""
elif not entry:
description_error = f"Changelog entry required for category '{category}'"
return description_error, category
@staticmethod
def normalize_string(string: str) -> str:
res = string.lower()
for r in (
(" ", "_"),
("(", "_"),
(")", "_"),
(",", "_"),
("/", "_"),
("-", "_"),
):
res = res.replace(*r)
return res

View File

@ -7,7 +7,7 @@ import time
from collections import defaultdict
from dataclasses import asdict, dataclass
from pathlib import Path
from typing import Dict, List, Optional, Union
from typing import Dict, List, Optional, Union, Callable
from github import Github
from github.Commit import Commit
@ -176,7 +176,7 @@ def set_status_comment(commit: Commit, pr_info: PRInfo) -> None:
if not [status for status in statuses if status.context == CI.StatusNames.CI]:
# This is the case, when some statuses already exist for the check,
# but not the StatusNames.CI. We should create it as pending.
# but not the CI.StatusNames.CI. We should create it as pending.
# W/o pr_info to avoid recursion, and yes, one extra create_ci_report
post_commit_status(
commit,
@ -226,20 +226,20 @@ def generate_status_comment(pr_info: PRInfo, statuses: CommitStatuses) -> str:
f"\n"
)
# group checks by the name to get the worst one per each
grouped_statuses = {} # type: Dict[CI.CheckDescription, CommitStatuses]
grouped_statuses = {} # type: Dict[CheckDescription, CommitStatuses]
for status in statuses:
cd = None
for c in CI.CHECK_DESCRIPTIONS:
for c in CHECK_DESCRIPTIONS:
if c.match_func(status.context):
cd = c
break
if cd is None or cd == CI.CHECK_DESCRIPTIONS[-1]:
if cd is None or cd == CHECK_DESCRIPTIONS[-1]:
# This is the case for either non-found description or a fallback
cd = CI.CheckDescription(
cd = CheckDescription(
status.context,
CI.CHECK_DESCRIPTIONS[-1].description,
CI.CHECK_DESCRIPTIONS[-1].match_func,
CHECK_DESCRIPTIONS[-1].description,
CHECK_DESCRIPTIONS[-1].match_func,
)
if cd in grouped_statuses:
@ -459,7 +459,7 @@ def trigger_mergeable_check(
set_from_sync: bool = False,
workflow_failed: bool = False,
) -> StatusType:
"""calculate and update StatusNames.MERGEABLE"""
"""calculate and update CI.StatusNames.MERGEABLE"""
required_checks = [status for status in statuses if CI.is_required(status.context)]
mergeable_status = None
@ -536,3 +536,188 @@ def update_upstream_sync_status(
get_commit_filtered_statuses(last_synced_upstream_commit),
set_from_sync=True,
)
@dataclass
class CheckDescription:
name: str
description: str # the check descriptions, will be put into the status table
match_func: Callable[[str], bool] # the function to check vs the commit status
def __hash__(self) -> int:
return hash(self.name + self.description)
CHECK_DESCRIPTIONS = [
CheckDescription(
CI.StatusNames.PR_CHECK,
"Checks correctness of the PR's body",
lambda x: x == "PR Check",
),
CheckDescription(
CI.StatusNames.SYNC,
"If it fails, ask a maintainer for help",
lambda x: x == CI.StatusNames.SYNC,
),
CheckDescription(
"AST fuzzer",
"Runs randomly generated queries to catch program errors. "
"The build type is optionally given in parenthesis. "
"If it fails, ask a maintainer for help",
lambda x: x.startswith("AST fuzzer"),
),
CheckDescription(
CI.JobNames.BUGFIX_VALIDATE,
"Checks that either a new test (functional or integration) or there "
"some changed tests that fail with the binary built on master branch",
lambda x: x == CI.JobNames.BUGFIX_VALIDATE,
),
CheckDescription(
CI.StatusNames.CI,
"A meta-check that indicates the running CI. Normally, it's in <b>success</b> or "
"<b>pending</b> state. The failed status indicates some problems with the PR",
lambda x: x == "CI running",
),
CheckDescription(
"Builds",
"Builds ClickHouse in various configurations for use in further steps. "
"You have to fix the builds that fail. Build logs often has enough "
"information to fix the error, but you might have to reproduce the failure "
"locally. The <b>cmake</b> options can be found in the build log, grepping for "
'<b>cmake</b>. Use these options and follow the <a href="'
'https://clickhouse.com/docs/en/development/build">general build process</a>',
lambda x: x.startswith("ClickHouse") and x.endswith("build check"),
),
CheckDescription(
"Compatibility check",
"Checks that <b>clickhouse</b> binary runs on distributions with old libc "
"versions. If it fails, ask a maintainer for help",
lambda x: x.startswith("Compatibility check"),
),
CheckDescription(
CI.JobNames.DOCKER_SERVER,
"The check to build and optionally push the mentioned image to docker hub",
lambda x: x.startswith("Docker server"),
),
CheckDescription(
CI.JobNames.DOCKER_KEEPER,
"The check to build and optionally push the mentioned image to docker hub",
lambda x: x.startswith("Docker keeper"),
),
CheckDescription(
CI.JobNames.DOCS_CHECK,
"Builds and tests the documentation",
lambda x: x == CI.JobNames.DOCS_CHECK,
),
CheckDescription(
CI.JobNames.FAST_TEST,
"Normally this is the first check that is ran for a PR. It builds ClickHouse "
'and runs most of <a href="https://clickhouse.com/docs/en/development/tests'
'#functional-tests">stateless functional tests</a>, '
"omitting some. If it fails, further checks are not started until it is fixed. "
"Look at the report to see which tests fail, then reproduce the failure "
'locally as described <a href="https://clickhouse.com/docs/en/development/'
'tests#functional-test-locally">here</a>',
lambda x: x == CI.JobNames.FAST_TEST,
),
CheckDescription(
"Flaky tests",
"Checks if new added or modified tests are flaky by running them repeatedly, "
"in parallel, with more randomization. Functional tests are run 100 times "
"with address sanitizer, and additional randomization of thread scheduling. "
"Integration tests are run up to 10 times. If at least once a new test has "
"failed, or was too long, this check will be red. We don't allow flaky tests, "
'read <a href="https://clickhouse.com/blog/decorating-a-christmas-tree-with-'
'the-help-of-flaky-tests/">the doc</a>',
lambda x: "tests flaky check" in x,
),
CheckDescription(
"Install packages",
"Checks that the built packages are installable in a clear environment",
lambda x: x.startswith("Install packages ("),
),
CheckDescription(
"Integration tests",
"The integration tests report. In parenthesis the package type is given, "
"and in square brackets are the optional part/total tests",
lambda x: x.startswith("Integration tests ("),
),
CheckDescription(
CI.StatusNames.MERGEABLE,
"Checks if all other necessary checks are successful",
lambda x: x == CI.StatusNames.MERGEABLE,
),
CheckDescription(
"Performance Comparison",
"Measure changes in query performance. The performance test report is "
'described in detail <a href="https://github.com/ClickHouse/ClickHouse/tree'
'/master/docker/test/performance-comparison#how-to-read-the-report">here</a>. '
"In square brackets are the optional part/total tests",
lambda x: x.startswith("Performance Comparison"),
),
CheckDescription(
"Push to Dockerhub",
"The check for building and pushing the CI related docker images to docker hub",
lambda x: x.startswith("Push") and "to Dockerhub" in x,
),
CheckDescription(
"Sqllogic",
"Run clickhouse on the "
'<a href="https://www.sqlite.org/sqllogictest">sqllogic</a> '
"test set against sqlite and checks that all statements are passed",
lambda x: x.startswith("Sqllogic test"),
),
CheckDescription(
"SQLancer",
"Fuzzing tests that detect logical bugs with "
'<a href="https://github.com/sqlancer/sqlancer">SQLancer</a> tool',
lambda x: x.startswith("SQLancer"),
),
CheckDescription(
"Stateful tests",
"Runs stateful functional tests for ClickHouse binaries built in various "
"configurations -- release, debug, with sanitizers, etc",
lambda x: x.startswith("Stateful tests ("),
),
CheckDescription(
"Stateless tests",
"Runs stateless functional tests for ClickHouse binaries built in various "
"configurations -- release, debug, with sanitizers, etc",
lambda x: x.startswith("Stateless tests ("),
),
CheckDescription(
"Stress test",
"Runs stateless functional tests concurrently from several clients to detect "
"concurrency-related errors",
lambda x: x.startswith("Stress test ("),
),
CheckDescription(
CI.JobNames.STYLE_CHECK,
"Runs a set of checks to keep the code style clean. If some of tests failed, "
"see the related log from the report",
lambda x: x == CI.JobNames.STYLE_CHECK,
),
CheckDescription(
"Unit tests",
"Runs the unit tests for different release types",
lambda x: x.startswith("Unit tests ("),
),
CheckDescription(
"Upgrade check",
"Runs stress tests on server version from last release and then tries to "
"upgrade it to the version from the PR. It checks if the new server can "
"successfully startup without any errors, crashes or sanitizer asserts",
lambda x: x.startswith("Upgrade check ("),
),
CheckDescription(
"ClickBench",
"Runs [ClickBench](https://github.com/ClickHouse/ClickBench/) with instant-attach table",
lambda x: x.startswith("ClickBench"),
),
CheckDescription(
"Fallback for unknown",
"There's no description for the check yet, please add it to "
"tests/ci/ci_config.py:CHECK_DESCRIPTIONS",
lambda x: True,
),
]

View File

@ -23,7 +23,7 @@ from commit_status_helper import (
from get_robot_token import get_best_robot_token
from github_helper import GitHub, NamedUser, PullRequest, Repository
from pr_info import PRInfo
from report import SUCCESS, FAILURE
from report import SUCCESS
from env_helper import GITHUB_UPSTREAM_REPOSITORY, GITHUB_REPOSITORY
from synchronizer_utils import SYNC_BRANCH_PREFIX
from ci_config import CI
@ -248,23 +248,27 @@ def main():
repo = gh.get_repo(args.repo)
if args.set_ci_status:
CI.GH.print_workflow_results()
# set Mergeable check status and exit
assert args.wf_status in (FAILURE, SUCCESS)
commit = get_commit(gh, args.pr_info.sha)
statuses = get_commit_filtered_statuses(commit)
has_failed_statuses = False
has_native_failed_status = False
for status in statuses:
print(f"Check status [{status.context}], [{status.state}]")
if CI.is_required(status.context) and status.state != SUCCESS:
print(f"WARNING: Failed status [{status.context}], [{status.state}]")
if (
CI.is_required(status.context)
and status.state != SUCCESS
and status.context != CI.StatusNames.SYNC
):
print(
f"WARNING: Not success status [{status.context}], [{status.state}]"
)
has_failed_statuses = True
if status.context != CI.StatusNames.SYNC:
has_native_failed_status = True
if args.wf_status == SUCCESS or has_failed_statuses:
# set Mergeable check if workflow is successful (green)
workflow_ok = CI.is_workflow_ok()
if workflow_ok or has_failed_statuses:
# set Mergeable Check if workflow is successful (green)
# or if we have GH statuses with failures (red)
# to avoid false-green on a died runner
state = trigger_mergeable_check(
@ -283,7 +287,7 @@ def main():
print(
"Workflow failed but no failed statuses found (died runner?) - cannot set Mergeable Check status"
)
if args.wf_status == SUCCESS and not has_native_failed_status:
if workflow_ok and not has_failed_statuses:
sys.exit(0)
else:
sys.exit(1)

View File

@ -22,7 +22,6 @@ from typing import (
from build_download_helper import get_gh_api
from ci_config import CI
from ci_utils import normalize_string
from env_helper import REPORT_PATH, GITHUB_WORKSPACE
logger = logging.getLogger(__name__)
@ -622,7 +621,7 @@ class BuildResult:
def write_json(self, directory: Union[Path, str] = REPORT_PATH) -> Path:
path = Path(directory) / self.get_report_name(
self.build_name, self.pr_number or normalize_string(self.head_ref)
self.build_name, self.pr_number or CI.Utils.normalize_string(self.head_ref)
)
path.write_text(
json.dumps(

View File

@ -1,5 +1,6 @@
#!/usr/bin/env python3
import logging
import re
import sys
from typing import Tuple
@ -16,7 +17,6 @@ from commit_status_helper import (
from env_helper import GITHUB_REPOSITORY, GITHUB_SERVER_URL
from get_robot_token import get_best_robot_token
from ci_config import CI
from ci_utils import Utils
from pr_info import PRInfo
from report import FAILURE, PENDING, SUCCESS, StatusType
@ -25,12 +25,144 @@ TRUSTED_ORG_IDS = {
54801242, # clickhouse
}
TRUSTED_CONTRIBUTORS = {
e.lower()
for e in [
"amosbird",
"azat", # SEMRush
"bharatnc", # Many contributions.
"cwurm", # ClickHouse, Inc
"den-crane", # Documentation contributor
"ildus", # adjust, ex-pgpro
"nvartolomei", # Seasoned contributor, CloudFlare
"taiyang-li",
"ucasFL", # Amos Bird's friend
"thomoco", # ClickHouse, Inc
"tonickkozlov", # Cloudflare
"tylerhannan", # ClickHouse, Inc
"tsolodov", # ClickHouse, Inc
"justindeguzman", # ClickHouse, Inc
"XuJia0210", # ClickHouse, Inc
]
}
OK_SKIP_LABELS = {CI.Labels.RELEASE, CI.Labels.PR_BACKPORT, CI.Labels.PR_CHERRYPICK}
PR_CHECK = "PR Check"
LABEL_CATEGORIES = {
"pr-backward-incompatible": ["Backward Incompatible Change"],
"pr-bugfix": [
"Bug Fix",
"Bug Fix (user-visible misbehavior in an official stable release)",
"Bug Fix (user-visible misbehaviour in official stable or prestable release)",
"Bug Fix (user-visible misbehavior in official stable or prestable release)",
],
"pr-critical-bugfix": ["Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)"],
"pr-build": [
"Build/Testing/Packaging Improvement",
"Build Improvement",
"Build/Testing Improvement",
"Build",
"Packaging Improvement",
],
"pr-documentation": [
"Documentation (changelog entry is not required)",
"Documentation",
],
"pr-feature": ["New Feature"],
"pr-improvement": ["Improvement"],
"pr-not-for-changelog": [
"Not for changelog (changelog entry is not required)",
"Not for changelog",
],
"pr-performance": ["Performance Improvement"],
"pr-ci": ["CI Fix or Improvement (changelog entry is not required)"],
}
CATEGORY_TO_LABEL = {
c: lb for lb, categories in LABEL_CATEGORIES.items() for c in categories
}
def check_pr_description(pr_body: str, repo_name: str) -> Tuple[str, str]:
"""The function checks the body to being properly formatted according to
.github/PULL_REQUEST_TEMPLATE.md, if the first returned string is not empty,
then there is an error."""
lines = list(map(lambda x: x.strip(), pr_body.split("\n") if pr_body else []))
lines = [re.sub(r"\s+", " ", line) for line in lines]
# Check if body contains "Reverts ClickHouse/ClickHouse#36337"
if [True for line in lines if re.match(rf"\AReverts {repo_name}#[\d]+\Z", line)]:
return "", LABEL_CATEGORIES["pr-not-for-changelog"][0]
category = ""
entry = ""
description_error = ""
i = 0
while i < len(lines):
if re.match(r"(?i)^[#>*_ ]*change\s*log\s*category", lines[i]):
i += 1
if i >= len(lines):
break
# Can have one empty line between header and the category
# itself. Filter it out.
if not lines[i]:
i += 1
if i >= len(lines):
break
category = re.sub(r"^[-*\s]*", "", lines[i])
i += 1
# Should not have more than one category. Require empty line
# after the first found category.
if i >= len(lines):
break
if lines[i]:
second_category = re.sub(r"^[-*\s]*", "", lines[i])
description_error = (
"More than one changelog category specified: "
f"'{category}', '{second_category}'"
)
return description_error, category
elif re.match(
r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
):
i += 1
# Can have one empty line between header and the entry itself.
# Filter it out.
if i < len(lines) and not lines[i]:
i += 1
# All following lines until empty one are the changelog entry.
entry_lines = []
while i < len(lines) and lines[i]:
entry_lines.append(lines[i])
i += 1
entry = " ".join(entry_lines)
# Don't accept changelog entries like '...'.
entry = re.sub(r"[#>*_.\- ]", "", entry)
# Don't accept changelog entries like 'Close #12345'.
entry = re.sub(r"^[\w\-\s]{0,10}#?\d{5,6}\.?$", "", entry)
else:
i += 1
if not category:
description_error = "Changelog category is empty"
# Filter out the PR categories that are not for changelog.
elif "(changelog entry is not required)" in category:
pass # to not check the rest of the conditions
elif category not in CATEGORY_TO_LABEL:
description_error, category = f"Category '{category}' is not valid", ""
elif not entry:
description_error = f"Changelog entry required for category '{category}'"
return description_error, category
def pr_is_by_trusted_user(pr_user_login, pr_user_orgs):
if pr_user_login.lower() in CI.TRUSTED_CONTRIBUTORS:
if pr_user_login.lower() in TRUSTED_CONTRIBUTORS:
logging.info("User '%s' is trusted", pr_user_login)
return True
@ -92,22 +224,20 @@ def main():
commit = get_commit(gh, pr_info.sha)
status = SUCCESS # type: StatusType
description_error, category = Utils.check_pr_description(
pr_info.body, GITHUB_REPOSITORY
)
description_error, category = check_pr_description(pr_info.body, GITHUB_REPOSITORY)
pr_labels_to_add = []
pr_labels_to_remove = []
if (
category in CI.CATEGORY_TO_LABEL
and CI.CATEGORY_TO_LABEL[category] not in pr_info.labels
category in CATEGORY_TO_LABEL
and CATEGORY_TO_LABEL[category] not in pr_info.labels
):
pr_labels_to_add.append(CI.CATEGORY_TO_LABEL[category])
pr_labels_to_add.append(CATEGORY_TO_LABEL[category])
for label in pr_info.labels:
if (
label in CI.CATEGORY_TO_LABEL.values()
and category in CI.CATEGORY_TO_LABEL
and label != CI.CATEGORY_TO_LABEL[category]
label in CATEGORY_TO_LABEL.values()
and category in CATEGORY_TO_LABEL
and label != CATEGORY_TO_LABEL[category]
):
pr_labels_to_remove.append(label)

View File

@ -9,7 +9,7 @@ from ci_settings import CiSettings
from pr_info import PRInfo, EventType
from s3_helper import S3Helper
from ci_cache import CiCache
from ci_utils import normalize_string
from ci_utils import Utils
_TEST_EVENT_JSON = {"dummy": "dummy"}
@ -55,7 +55,7 @@ class TestCIConfig(unittest.TestCase):
if CI.JOB_CONFIGS[job].job_name_keyword:
self.assertTrue(
CI.JOB_CONFIGS[job].job_name_keyword.lower()
in normalize_string(job),
in Utils.normalize_string(job),
f"Job [{job}] apparently uses wrong common config with job keyword [{CI.JOB_CONFIGS[job].job_name_keyword}]",
)
@ -291,7 +291,9 @@ class TestCIConfig(unittest.TestCase):
assert tag_config
set_jobs = tag_config.run_jobs
for job in set_jobs:
if any(k in normalize_string(job) for k in settings.exclude_keywords):
if any(
k in Utils.normalize_string(job) for k in settings.exclude_keywords
):
continue
expected_jobs_to_do.append(job)
for job, config in CI.JOB_CONFIGS.items():
@ -303,12 +305,12 @@ class TestCIConfig(unittest.TestCase):
# expected to run all builds jobs
expected_jobs_to_do.append(job)
if not any(
keyword in normalize_string(job)
keyword in Utils.normalize_string(job)
for keyword in settings.include_keywords
):
continue
if any(
keyword in normalize_string(job)
keyword in Utils.normalize_string(job)
for keyword in settings.exclude_keywords
):
continue

View File

@ -1,7 +1,8 @@
<clickhouse>
<rocksdb>
<options>
<info_log_level>DEBUG_LEVEL</info_log_level>
<!-- https://github.com/ClickHouse/ClickHouse/pull/67274#issuecomment-2255301116 -->
<info_log_level>ERROR_LEVEL</info_log_level>
</options>
</rocksdb>
</clickhouse>

View File

@ -19,6 +19,7 @@ services:
ldapsearch -x -H ldap://localhost:$$LDAP_PORT_NUMBER -D $$LDAP_ADMIN_DN -w $$LDAP_ADMIN_PASSWORD -b $$LDAP_ROOT
| grep -c -E "member: cn=j(ohn|ane)doe"
| grep 2 >> /dev/null
&& cat /run/slapd/slapd.pid
interval: 10s
retries: 10
timeout: 2s

View File

@ -4484,7 +4484,7 @@ class ClickHouseInstance:
use_old_analyzer = os.environ.get("CLICKHOUSE_USE_OLD_ANALYZER") is not None
# If specific version was used there can be no
# allow_experimental_analyzer setting, so do this only if it was
# enable_analyzer setting, so do this only if it was
# explicitly requested.
if self.tag:
use_old_analyzer = False

View File

@ -1,4 +1,5 @@
import uuid
import time
import pytest
from helpers.cluster import ClickHouseCluster
@ -58,12 +59,12 @@ ORDER BY h;"""
== TSV([["backward", "true"], ["current", "true"]])
)
# Should be enabled everywhere
analyzer_enabled = current.query(
# Should be enabled explicitly on the old instance.
analyzer_enabled = backward.query(
f"""
SELECT
DISTINCT Settings['allow_experimental_analyzer']
FROM clusterAllReplicas('test_cluster_mixed', system.query_log)
FROM system.query_log
WHERE initial_query_id = '{query_id}';"""
)
@ -78,6 +79,8 @@ WHERE initial_query_id = '{query_id}';"""
current.query("SYSTEM FLUSH LOGS")
backward.query("SYSTEM FLUSH LOGS")
# The old version doesn't know about the alias.
# For this we will ask about the old experimental name.
assert (
backward.query(
"""
@ -98,3 +101,26 @@ WHERE initial_query_id = '{query_id}';"""
)
assert TSV(analyzer_enabled) == TSV("0")
# Only new version knows about the alias
# and it will send the old setting `allow_experimental_analyzer`
# to the remote server.
query_id = str(uuid.uuid4())
current.query(
"SELECT * FROM clusterAllReplicas('test_cluster_mixed', system.tables) SETTINGS enable_analyzer = 1;",
query_id=query_id,
)
current.query("SYSTEM FLUSH LOGS")
backward.query("SYSTEM FLUSH LOGS")
# Should be disabled explicitly everywhere.
analyzer_enabled = current.query(
f"""
SELECT
DISTINCT Settings['allow_experimental_analyzer']
FROM system.query_log
WHERE initial_query_id = '{query_id}';"""
)
assert TSV(analyzer_enabled) == TSV("1")

View File

@ -31,6 +31,9 @@ def started_cluster():
def test_distributed_type_object(started_cluster):
node1.query("TRUNCATE TABLE local_table")
node2.query("TRUNCATE TABLE local_table")
node1.query(
'INSERT INTO local_table FORMAT JSONEachRow {"id": 1, "data": {"k1": 10}}'
)
@ -89,7 +92,7 @@ def test_distributed_type_object(started_cluster):
assert (
TSV(
node1.query(
"SELECT id, data.k1, data.k2.k3, data.k2.k4, data.k5 FROM dist_table ORDER BY id SETTINGS allow_experimental_analyzer = 0"
"SELECT id, data.k1, data.k2.k3, data.k2.k4, data.k5 FROM dist_table ORDER BY id SETTINGS enable_analyzer = 0"
)
)
== expected

View File

@ -1,7 +1,7 @@
<clickhouse>
<profiles>
<default>
<allow_experimental_analyzer>1</allow_experimental_analyzer>
<enable_analyzer>1</enable_analyzer>
<allow_experimental_parallel_reading_from_replicas>1</allow_experimental_parallel_reading_from_replicas>
<cluster_for_parallel_replicas>default</cluster_for_parallel_replicas>
<max_parallel_replicas>100</max_parallel_replicas>

View File

@ -459,7 +459,7 @@ def test_show_profiles():
query_possible_response = [
"CREATE SETTINGS PROFILE `default`\n",
"CREATE SETTINGS PROFILE `default` SETTINGS allow_experimental_analyzer = true\n",
"CREATE SETTINGS PROFILE `default` SETTINGS enable_analyzer = true\n",
]
assert (
instance.query("SHOW CREATE SETTINGS PROFILE default")
@ -470,7 +470,7 @@ def test_show_profiles():
"CREATE SETTINGS PROFILE `default`\n"
"CREATE SETTINGS PROFILE `readonly` SETTINGS readonly = 1\n"
"CREATE SETTINGS PROFILE `xyz`\n",
"CREATE SETTINGS PROFILE `default` SETTINGS allow_experimental_analyzer = true\n"
"CREATE SETTINGS PROFILE `default` SETTINGS enable_analyzer = true\n"
"CREATE SETTINGS PROFILE `readonly` SETTINGS readonly = 1\n"
"CREATE SETTINGS PROFILE `xyz`\n",
]
@ -482,7 +482,7 @@ def test_show_profiles():
"CREATE SETTINGS PROFILE `xyz`\n"
)
expected_access_analyzer = (
"CREATE SETTINGS PROFILE `default` SETTINGS allow_experimental_analyzer = true\n"
"CREATE SETTINGS PROFILE `default` SETTINGS enable_analyzer = true\n"
"CREATE SETTINGS PROFILE `readonly` SETTINGS readonly = 1\n"
"CREATE SETTINGS PROFILE `xyz`\n"
)

View File

@ -71,6 +71,8 @@ function configure
{
# Use the new config for both servers, so that we can change it in a PR.
rm right/config/config.d/text_log.xml ||:
# backups disk uses absolute path, and this overlaps between servers, that could lead to errors
rm right/config/config.d/backups.xml ||:
cp -rv right/config left ||:
# Start a temporary server to rename the tables

View File

@ -16,4 +16,4 @@
<query>SELECT keys.key, value1 FROM keys ANY LEFT JOIN dict AS d ON (keys.key = d.key) FORMAT Null;</query>
<query>SELECT keys.key, value1 FROM keys ANY LEFT JOIN dict AS d ON (keys.key = d.key) FORMAT Null SETTINGS
allow_experimental_analyzer=1</query>
</test>
</test>

View File

@ -28,8 +28,8 @@ RENAME TABLE set2 TO set;
SELECT arrayJoin(['Hello', 'test', 'World', 'world', 'abc', 'xyz']) AS s WHERE s IN set;
create table tab (x String) engine = MergeTree order by x as select 'Hello';
SELECT * FROM tab PREWHERE x IN (set) WHERE x IN (set) LIMIT 1 settings allow_experimental_analyzer=0;
SELECT * FROM tab PREWHERE x IN (set) WHERE x IN (set) LIMIT 1 settings allow_experimental_analyzer=1;
SELECT * FROM tab PREWHERE x IN (set) WHERE x IN (set) LIMIT 1 settings enable_analyzer=0;
SELECT * FROM tab PREWHERE x IN (set) WHERE x IN (set) LIMIT 1 settings enable_analyzer=1;
DROP TABLE tab;
DROP TABLE set;

View File

@ -3,5 +3,6 @@ SELECT x, y FROM (SELECT number AS x FROM system.numbers LIMIT 3) js1 CROSS JOIN
SET join_algorithm = 'auto';
SELECT x, y FROM (SELECT number AS x FROM system.numbers LIMIT 3) js1 CROSS JOIN (SELECT number AS y FROM system.numbers LIMIT 5) js2;
-- Just to test that we preserved old setting name this we use `enable_analyzer` instead of `enable_analyzer` here.
SET allow_experimental_analyzer = 1;
SELECT x, y FROM (SELECT number AS x FROM system.numbers LIMIT 3) js1 CROSS JOIN (SELECT number AS y FROM system.numbers LIMIT 5) js2;

View File

@ -1,6 +1,6 @@
-- Tags: shard
set allow_experimental_analyzer = 1;
set enable_analyzer = 1;
set enable_positional_arguments = 0;
select 40 as z from (select * from system.numbers limit 3) group by z;

View File

@ -4,10 +4,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1&output_format_write_statistics=0" -d "SELECT 1 AS k, count() GROUP BY k WITH TOTALS SETTINGS allow_experimental_analyzer = 1";
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1&output_format_write_statistics=0" -d "SELECT 1234567890123 AS k, count() GROUP BY k WITH TOTALS SETTINGS allow_experimental_analyzer = 1 FORMAT JSON";
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1&output_format_write_statistics=0" -d "SELECT toFloat32(1.23) AS k, count() GROUP BY k WITH TOTALS SETTINGS allow_experimental_analyzer = 1 FORMAT JSONCompact";
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1&output_format_write_statistics=0" -d "SELECT 1 AS k, count() GROUP BY k WITH TOTALS SETTINGS enable_analyzer = 1";
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1&output_format_write_statistics=0" -d "SELECT 1234567890123 AS k, count() GROUP BY k WITH TOTALS SETTINGS enable_analyzer = 1 FORMAT JSON";
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1&output_format_write_statistics=0" -d "SELECT toFloat32(1.23) AS k, count() GROUP BY k WITH TOTALS SETTINGS enable_analyzer = 1 FORMAT JSONCompact";
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1&output_format_write_statistics=0" -d "SELECT toDate('2010-01-01') AS k, count() GROUP BY k WITH TOTALS SETTINGS allow_experimental_analyzer = 1";
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1&output_format_write_statistics=0" -d "SELECT toDateTime('2010-01-01 01:02:03', 'UTC') AS k, count() GROUP BY k WITH TOTALS SETTINGS allow_experimental_analyzer = 1 FORMAT JSON";
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1&output_format_write_statistics=0" -d "SELECT 1.1 AS k, count() GROUP BY k WITH TOTALS SETTINGS allow_experimental_analyzer = 1 FORMAT JSONCompact";
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1&output_format_write_statistics=0" -d "SELECT toDate('2010-01-01') AS k, count() GROUP BY k WITH TOTALS SETTINGS enable_analyzer = 1";
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1&output_format_write_statistics=0" -d "SELECT toDateTime('2010-01-01 01:02:03', 'UTC') AS k, count() GROUP BY k WITH TOTALS SETTINGS enable_analyzer = 1 FORMAT JSON";
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1&output_format_write_statistics=0" -d "SELECT 1.1 AS k, count() GROUP BY k WITH TOTALS SETTINGS enable_analyzer = 1 FORMAT JSONCompact";

View File

@ -1,3 +1,7 @@
-- Tags: no-msan
-- ^
-- makes SELECTs extremely slow sometimes for some reason: "Aggregated. 1000000 to 1 rows (from 7.63 MiB) in 242.829221645 sec."
DROP TABLE IF EXISTS sample_00314_1;
DROP TABLE IF EXISTS sample_00314_2;
DROP TABLE IF EXISTS sample_merge_00314;

View File

@ -1,8 +1,8 @@
SET allow_experimental_analyzer = 1;
SET enable_analyzer = 1;
-- https://github.com/ClickHouse/ClickHouse/issues/45804
CREATE TABLE myRMT(
CREATE TABLE myRMT(
key Int64,
someCol String,
ver DateTime

View File

@ -1,6 +1,6 @@
SET any_join_distinct_right_table_keys = 1;
SET joined_subquery_requires_alias = 0;
SET allow_experimental_analyzer = 1;
SET enable_analyzer = 1;
select x, y from (select 1 as x, 2 as y, x, y);
select x, y from (select 1 as x, 1 as y, x, y);

View File

@ -2,7 +2,7 @@
SET output_format_write_statistics = 0;
SET extremes = 1;
SET allow_experimental_analyzer = 1;
SET enable_analyzer = 1;
SET output_format_json_quote_64bit_integers = 1;
SELECT toInt64(0) as i0, toUInt64(0) as u0, toInt64(9223372036854775807) as ip, toInt64(-9223372036854775808) as in, toUInt64(18446744073709551615) as up, [toInt64(0)] as arr, (toUInt64(0), toUInt64(0)) as tuple GROUP BY i0, u0, ip, in, up, arr, tuple WITH TOTALS FORMAT JSON;

View File

@ -1,4 +1,4 @@
SET allow_experimental_analyzer = 1;
SET enable_analyzer = 1;
SET join_use_nulls = 0;
SET any_join_distinct_right_table_keys = 1;

Some files were not shown because too many files have changed in this diff Show More