From af1cdb4aa2e74e995cb155d6fda4f9069321f387 Mon Sep 17 00:00:00 2001 From: maks-buren630501 Date: Tue, 30 Aug 2022 15:13:22 +0300 Subject: [PATCH 01/83] Fix bug with materialized_view and postgresql replication --- src/Processors/Transforms/buildPushingToViewsChain.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index d71d6901cee..8001bd7607a 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -340,7 +340,8 @@ Chain buildPushingToViewsChain( chains.emplace_back(std::move(out)); /// Add the view to the query access info so it can appear in system.query_log - if (!no_destination) + /// hasQueryContext - for materialized tables with background replication process query context is not added + if (!no_destination && context->hasQueryContext()) { context->getQueryContext()->addQueryAccessInfo( backQuoteIfNeed(database_table.getDatabaseName()), views_data->views.back().runtime_stats->target_name, {}, "", database_table.getFullTableName()); @@ -698,7 +699,6 @@ IProcessor::Status FinalizingViewsTransform::prepare() output.finish(); return Status::Finished; } - return Status::NeedData; } From 64df6ca5209949fe80ca798eb82dc386d5e74652 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 3 Jan 2023 19:22:31 +0000 Subject: [PATCH 02/83] Split stress test and backward compatibility check --- docker/test/{stress/stress => ci} | 0 docker/test/stress/Dockerfile | 1 - docker/test/stress/run.sh | 251 +--------- docker/test/upgrade/Dockerfile | 31 ++ docker/test/upgrade/run.sh | 462 ++++++++++++++++++ tests/ci/stress.py | 305 ++++++++++++ tests/ci/stress_check.py | 8 +- tests/ci/upgrade_check.py | 4 + tests/clickhouse-test | 45 +- .../0_stateless/00061_merge_tree_alter.sql | 2 +- ..._replace_partition_from_table_zookeeper.sh | 2 +- ...ost_part_and_alive_part_zookeeper_long.sql | 2 +- .../0_stateless/00942_dataparts_500.sh | 2 +- ...tem_parts_race_condition_drop_zookeeper.sh | 2 +- ...9_parallel_alter_detach_table_zookeeper.sh | 2 +- .../01111_create_drop_replicated_db_stress.sh | 2 +- .../0_stateless/01191_rename_dictionary.sql | 2 +- ...18_long_unsuccessful_mutation_zookeeper.sh | 2 +- .../01378_alter_rename_with_ttl_zookeeper.sql | 2 +- .../0_stateless/01391_join_on_dict_crash.sql | 2 +- .../01555_system_distribution_queue_mask.sql | 2 +- .../01576_alter_low_cardinality_and_select.sh | 2 +- ...ch_patition_with_macro_in_zk_path_long.sql | 2 +- .../0_stateless/01780_column_sparse_alter.sql | 2 +- .../02022_storage_filelog_one_file.sh | 2 +- .../02025_storage_filelog_virtual_col.sh | 2 +- .../0_stateless/02067_lost_part_s3.sql | 2 +- ...2_create_table_without_columns_metadata.sh | 2 +- .../0_stateless/02242_delete_user_race.sh | 2 +- .../0_stateless/02243_drop_user_grant_race.sh | 2 +- tests/queries/0_stateless/02293_hashid.sql | 2 +- .../02302_join_auto_lc_nullable_bug.sql | 2 +- .../02306_window_move_row_number_fix.sql | 2 +- .../02313_cross_join_dup_col_names.sql | 2 +- .../02315_pmj_union_ubsan_35857.sql | 2 +- ...2316_cast_to_ip_address_default_column.sql | 3 - .../02316_const_string_intersact.sql | 2 +- ...02320_mapped_array_witn_const_nullable.sql | 2 +- .../02332_dist_insert_send_logs_level.sh | 2 +- ...45_partial_sort_transform_optimization.sql | 2 +- tests/queries/0_stateless/02354_annoy.sh | 2 +- .../0_stateless/02363_mapupdate_improve.sql | 2 +- .../02366_direct_dictionary_dict_has.sql | 2 +- .../0_stateless/02366_with_fill_date.sql | 2 +- .../02381_compress_marks_and_primary_key.sql | 2 +- ...397_system_parts_race_condition_drop_rm.sh | 2 +- .../02429_low_cardinality_trash.sh | 2 +- .../02450_kill_distributed_query_deadlock.sh | 2 +- 48 files changed, 873 insertions(+), 313 deletions(-) rename docker/test/{stress/stress => ci} (100%) create mode 100644 docker/test/upgrade/Dockerfile create mode 100644 docker/test/upgrade/run.sh create mode 100755 tests/ci/stress.py create mode 100644 tests/ci/upgrade_check.py diff --git a/docker/test/stress/stress b/docker/test/ci similarity index 100% rename from docker/test/stress/stress rename to docker/test/ci diff --git a/docker/test/stress/Dockerfile b/docker/test/stress/Dockerfile index 393508fd551..2778b63774d 100644 --- a/docker/test/stress/Dockerfile +++ b/docker/test/stress/Dockerfile @@ -24,7 +24,6 @@ RUN apt-get update -y \ llvm-9 \ brotli -COPY ./stress /stress COPY run.sh / ENV DATASETS="hits visits" diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 41245013a4a..c99de2b93e5 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -28,6 +28,7 @@ function configure() /usr/share/clickhouse-test/config/install.sh # we mount tests folder from repo to /usr/share + ln -s /usr/share/clickhouse-test/ci/stress.py /usr/bin/stress ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test ln -s /usr/share/clickhouse-test/ci/download_release_packages.py /usr/bin/download_release_packages ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag @@ -278,7 +279,7 @@ sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_defau start -./stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \ +stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \ && echo -e 'Test script exit code\tOK' >> /test_output/test_results.tsv \ || echo -e 'Test script failed\tFAIL' >> /test_output/test_results.tsv @@ -292,14 +293,19 @@ unset "${!THREAD_@}" start clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/test_results.tsv \ - || (echo -e 'Server failed to start (see application_errors.txt and clickhouse-server.clean.log)\tFAIL' >> /test_output/test_results.tsv \ + || (echo -e 'Server failed to start (see application_errors.txt and clickhouse-server.final.log)\tFAIL' >> /test_output/test_results.tsv \ && grep -a ".*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt) +# Remove file application_errors.txt if it's empty +[ -s /test_output/application_errors.txt ] || rm /test_output/application_errors.txt + stop [ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL" [ -f /var/log/clickhouse-server/stderr.log ] || echo -e "Stderr log does not exist\tFAIL" +mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.final.log + # Grep logs for sanitizer asserts, crashes and other critical errors # Sanitizer asserts @@ -312,12 +318,12 @@ rm -f /test_output/tmp # OOM zgrep -Fa " Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \ - && echo -e 'OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv + && echo -e 'OOM killer (or signal 9) in server logs\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'No OOM messages in server logs\tOK' >> /test_output/test_results.tsv # Logical errors zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server*.log > /test_output/logical_errors.txt \ - && echo -e 'Logical error thrown (see clickhouse-server.log or logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ + && echo -e 'Logical error thrown (see server logs or logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'No logical errors\tOK' >> /test_output/test_results.tsv # Remove file logical_errors.txt if it's empty @@ -325,7 +331,7 @@ zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-serve # No such key errors zgrep -Ea "Code: 499.*The specified key does not exist" /var/log/clickhouse-server/clickhouse-server*.log > /test_output/no_such_key_errors.txt \ - && echo -e 'S3_ERROR No such key thrown (see clickhouse-server.log or no_such_key_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ + && echo -e 'S3_ERROR No such key thrown (see server logs or no_such_key_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'No lost s3 keys\tOK' >> /test_output/test_results.tsv # Remove file no_such_key_errors.txt if it's empty @@ -333,13 +339,13 @@ zgrep -Ea "Code: 499.*The specified key does not exist" /var/log/clickhouse-serv # Crash zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \ - && echo -e 'Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \ + && echo -e 'Killed by signal (in server logs)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'Not crashed\tOK' >> /test_output/test_results.tsv # It also checks for crash without stacktrace (printed by watchdog) zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server*.log > /test_output/fatal_messages.txt \ - && echo -e 'Fatal message in clickhouse-server.log (see fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv + && echo -e 'Fatal message in server logs (see fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'No fatal messages in server logs\tOK' >> /test_output/test_results.tsv # Remove file fatal_messages.txt if it's empty [ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt @@ -350,229 +356,12 @@ zgrep -Fa "########################################" /test_output/* > /dev/null zgrep -Fa " received signal " /test_output/gdb.log > /dev/null \ && echo -e 'Found signal in gdb.log\tFAIL' >> /test_output/test_results.tsv -if [ "$DISABLE_BC_CHECK" -ne "1" ]; then - echo -e "Backward compatibility check\n" +for table in query_log trace_log +do + clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.tsv.gz ||: +done - echo "Get previous release tag" - previous_release_tag=$(clickhouse-client --version | grep -o "[0-9]*\.[0-9]*\.[0-9]*\.[0-9]*" | get_previous_release_tag) - echo $previous_release_tag - - echo "Clone previous release repository" - git clone https://github.com/ClickHouse/ClickHouse.git --no-tags --progress --branch=$previous_release_tag --no-recurse-submodules --depth=1 previous_release_repository - - echo "Download clickhouse-server from the previous release" - mkdir previous_release_package_folder - - echo $previous_release_tag | download_release_packages && echo -e 'Download script exit code\tOK' >> /test_output/test_results.tsv \ - || echo -e 'Download script failed\tFAIL' >> /test_output/test_results.tsv - - mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.clean.log - for table in query_log trace_log - do - clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.tsv.gz ||: - done - - tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||: - - # Check if we cloned previous release repository successfully - if ! [ "$(ls -A previous_release_repository/tests/queries)" ] - then - echo -e "Backward compatibility check: Failed to clone previous release tests\tFAIL" >> /test_output/test_results.tsv - elif ! [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ] - then - echo -e "Backward compatibility check: Failed to download previous release packages\tFAIL" >> /test_output/test_results.tsv - else - echo -e "Successfully cloned previous release tests\tOK" >> /test_output/test_results.tsv - echo -e "Successfully downloaded previous release packages\tOK" >> /test_output/test_results.tsv - - # Uninstall current packages - dpkg --remove clickhouse-client - dpkg --remove clickhouse-server - dpkg --remove clickhouse-common-static-dbg - dpkg --remove clickhouse-common-static - - rm -rf /var/lib/clickhouse/* - - # Make BC check more funny by forcing Ordinary engine for system database - mkdir /var/lib/clickhouse/metadata - echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/system.sql - - # Install previous release packages - install_packages previous_release_package_folder - - # Start server from previous release - # Previous version may not be ready for fault injections - export ZOOKEEPER_FAULT_INJECTION=0 - configure - - # Avoid "Setting s3_check_objects_after_upload is neither a builtin setting..." - rm -f /etc/clickhouse-server/users.d/enable_blobs_check.xml ||: - rm -f /etc/clickhouse-server/users.d/marks.xml ||: - - # Remove s3 related configs to avoid "there is no disk type `cache`" - rm -f /etc/clickhouse-server/config.d/storage_conf.xml ||: - rm -f /etc/clickhouse-server/config.d/azure_storage_conf.xml ||: - - # Turn on after 22.12 - rm -f /etc/clickhouse-server/config.d/compressed_marks_and_index.xml ||: - # it uses recently introduced settings which previous versions may not have - rm -f /etc/clickhouse-server/users.d/insert_keeper_retries.xml ||: - - start - - clickhouse-client --query="SELECT 'Server version: ', version()" - - # Install new package before running stress test because we should use new - # clickhouse-client and new clickhouse-test. - # - # But we should leave old binary in /usr/bin/ and debug symbols in - # /usr/lib/debug/usr/bin (if any) for gdb and internal DWARF parser, so it - # will print sane stacktraces and also to avoid possible crashes. - # - # FIXME: those files can be extracted directly from debian package, but - # actually better solution will be to use different PATH instead of playing - # games with files from packages. - mv /usr/bin/clickhouse previous_release_package_folder/ - mv /usr/lib/debug/usr/bin/clickhouse.debug previous_release_package_folder/ - install_packages package_folder - mv /usr/bin/clickhouse package_folder/ - mv /usr/lib/debug/usr/bin/clickhouse.debug package_folder/ - mv previous_release_package_folder/clickhouse /usr/bin/ - mv previous_release_package_folder/clickhouse.debug /usr/lib/debug/usr/bin/clickhouse.debug - - mkdir tmp_stress_output - - ./stress --test-cmd="/usr/bin/clickhouse-test --queries=\"previous_release_repository/tests/queries\"" --backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \ - && echo -e 'Backward compatibility check: Test script exit code\tOK' >> /test_output/test_results.tsv \ - || echo -e 'Backward compatibility check: Test script failed\tFAIL' >> /test_output/test_results.tsv - rm -rf tmp_stress_output - - # We experienced deadlocks in this command in very rare cases. Let's debug it: - timeout 10m clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables" || - ( - echo "thread apply all backtrace (on select tables count)" >> /test_output/gdb.log - timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log - clickhouse stop --force - ) - - stop 1 - mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.stress.log - - # Start new server - mv package_folder/clickhouse /usr/bin/ - mv package_folder/clickhouse.debug /usr/lib/debug/usr/bin/clickhouse.debug - # Disable fault injections on start (we don't test them here, and it can lead to tons of requests in case of huge number of tables). - export ZOOKEEPER_FAULT_INJECTION=0 - configure - start 500 - clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK'" >> /test_output/test_results.tsv \ - || (echo -e 'Backward compatibility check: Server failed to start\tFAIL' >> /test_output/test_results.tsv \ - && grep -a ".*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/bc_check_application_errors.txt) - - clickhouse-client --query="SELECT 'Server version: ', version()" - - # Let the server run for a while before checking log. - sleep 60 - - stop - mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.dirty.log - - # Error messages (we should ignore some errors) - # FIXME https://github.com/ClickHouse/ClickHouse/issues/38643 ("Unknown index: idx.") - # FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 ("Cannot parse string 'Hello' as UInt64") - # FIXME Not sure if it's expected, but some tests from BC check may not be finished yet when we restarting server. - # Let's just ignore all errors from queries ("} TCPHandler: Code:", "} executeQuery: Code:") - # FIXME https://github.com/ClickHouse/ClickHouse/issues/39197 ("Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'") - # NOTE Incompatibility was introduced in https://github.com/ClickHouse/ClickHouse/pull/39263, it's expected - # ("This engine is deprecated and is not supported in transactions", "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part") - # FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 - bad mutation does not indicate backward incompatibility - echo "Check for Error messages in server log:" - zgrep -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \ - -e "Code: 236. DB::Exception: Cancelled mutating parts" \ - -e "REPLICA_IS_ALREADY_ACTIVE" \ - -e "REPLICA_ALREADY_EXISTS" \ - -e "ALL_REPLICAS_LOST" \ - -e "DDLWorker: Cannot parse DDL task query" \ - -e "RaftInstance: failed to accept a rpc connection due to error 125" \ - -e "UNKNOWN_DATABASE" \ - -e "NETWORK_ERROR" \ - -e "UNKNOWN_TABLE" \ - -e "ZooKeeperClient" \ - -e "KEEPER_EXCEPTION" \ - -e "DirectoryMonitor" \ - -e "TABLE_IS_READ_ONLY" \ - -e "Code: 1000, e.code() = 111, Connection refused" \ - -e "UNFINISHED" \ - -e "NETLINK_ERROR" \ - -e "Renaming unexpected part" \ - -e "PART_IS_TEMPORARILY_LOCKED" \ - -e "and a merge is impossible: we didn't find" \ - -e "found in queue and some source parts for it was lost" \ - -e "is lost forever." \ - -e "Unknown index: idx." \ - -e "Cannot parse string 'Hello' as UInt64" \ - -e "} TCPHandler: Code:" \ - -e "} executeQuery: Code:" \ - -e "Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'" \ - -e "This engine is deprecated and is not supported in transactions" \ - -e "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part" \ - -e "The set of parts restored in place of" \ - -e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \ - -e "Code: 269. DB::Exception: Destination table is myself" \ - -e "Coordination::Exception: Connection loss" \ - -e "MutateFromLogEntryTask" \ - -e "No connection to ZooKeeper, cannot get shared table ID" \ - -e "Session expired" \ - /var/log/clickhouse-server/clickhouse-server.backward.dirty.log | zgrep -Fa "" > /test_output/bc_check_error_messages.txt \ - && echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv - - # Remove file bc_check_error_messages.txt if it's empty - [ -s /test_output/bc_check_error_messages.txt ] || rm /test_output/bc_check_error_messages.txt - - # Sanitizer asserts - zgrep -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp - zgrep -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp - zgrep -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \ - && echo -e 'Backward compatibility check: Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'Backward compatibility check: No sanitizer asserts\tOK' >> /test_output/test_results.tsv - rm -f /test_output/tmp - - # OOM - zgrep -Fa " Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \ - && echo -e 'Backward compatibility check: OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'Backward compatibility check: No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv - - # Logical errors - echo "Check for Logical errors in server log:" - zgrep -Fa -A20 "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_logical_errors.txt \ - && echo -e 'Backward compatibility check: Logical error thrown (see clickhouse-server.log or bc_check_logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'Backward compatibility check: No logical errors\tOK' >> /test_output/test_results.tsv - - # Remove file bc_check_logical_errors.txt if it's empty - [ -s /test_output/bc_check_logical_errors.txt ] || rm /test_output/bc_check_logical_errors.txt - - # Crash - zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \ - && echo -e 'Backward compatibility check: Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'Backward compatibility check: Not crashed\tOK' >> /test_output/test_results.tsv - - # It also checks for crash without stacktrace (printed by watchdog) - echo "Check for Fatal message in server log:" - zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_fatal_messages.txt \ - && echo -e 'Backward compatibility check: Fatal message in clickhouse-server.log (see bc_check_fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'Backward compatibility check: No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv - - # Remove file bc_check_fatal_messages.txt if it's empty - [ -s /test_output/bc_check_fatal_messages.txt ] || rm /test_output/bc_check_fatal_messages.txt - - tar -chf /test_output/coordination.backward.tar /var/lib/clickhouse/coordination ||: - for table in query_log trace_log - do - clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.backward.tsv.gz ||: - done - fi -fi +tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||: dmesg -T > /test_output/dmesg.log diff --git a/docker/test/upgrade/Dockerfile b/docker/test/upgrade/Dockerfile new file mode 100644 index 00000000000..c98220b3403 --- /dev/null +++ b/docker/test/upgrade/Dockerfile @@ -0,0 +1,31 @@ +# rebuild in #33610 +# docker build -t clickhouse/upgrade-check . +ARG FROM_TAG=latest +FROM clickhouse/stateful-test:$FROM_TAG + +RUN apt-get update -y \ + && env DEBIAN_FRONTEND=noninteractive \ + apt-get install --yes --no-install-recommends \ + bash \ + tzdata \ + fakeroot \ + debhelper \ + parallel \ + expect \ + python3 \ + python3-lxml \ + python3-termcolor \ + python3-requests \ + curl \ + sudo \ + openssl \ + netcat-openbsd \ + telnet \ + llvm-9 \ + brotli + +COPY run.sh / + +ENV EXPORT_S3_STORAGE_POLICIES=1 + +CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh new file mode 100644 index 00000000000..1a107b6df2a --- /dev/null +++ b/docker/test/upgrade/run.sh @@ -0,0 +1,462 @@ +#!/bin/bash +# shellcheck disable=SC2094 +# shellcheck disable=SC2086 +# shellcheck disable=SC2024 + +# This script is similar to script for common stress test + +# Avoid overlaps with previous runs +dmesg --clear + +set -x + +# core.COMM.PID-TID +sysctl kernel.core_pattern='core.%e.%p-%P' + + +function install_packages() +{ + dpkg -i $1/clickhouse-common-static_*.deb + dpkg -i $1/clickhouse-common-static-dbg_*.deb + dpkg -i $1/clickhouse-server_*.deb + dpkg -i $1/clickhouse-client_*.deb +} + +function configure() +{ + # install test configs + export USE_DATABASE_ORDINARY=1 + export EXPORT_S3_STORAGE_POLICIES=1 + /usr/share/clickhouse-test/config/install.sh + + # we mount tests folder from repo to /usr/share + ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test + ln -s /usr/share/clickhouse-test/ci/download_release_packages.py /usr/bin/download_release_packages + ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag + + # avoid too slow startup + sudo cat /etc/clickhouse-server/config.d/keeper_port.xml | sed "s|100000|10000|" > /etc/clickhouse-server/config.d/keeper_port.xml.tmp + sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml + sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml + sudo chgrp clickhouse /etc/clickhouse-server/config.d/keeper_port.xml + + # for clickhouse-server (via service) + echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment + # for clickhouse-client + export ASAN_OPTIONS='malloc_context_size=10 allocator_release_to_os_interval_ms=10000' + + # since we run clickhouse from root + sudo chown root: /var/lib/clickhouse + + # Set more frequent update period of asynchronous metrics to more frequently update information about real memory usage (less chance of OOM). + echo "1" \ + > /etc/clickhouse-server/config.d/asynchronous_metrics_update_period_s.xml + + local total_mem + total_mem=$(awk '/MemTotal/ { print $(NF-1) }' /proc/meminfo) # KiB + total_mem=$(( total_mem*1024 )) # bytes + # Set maximum memory usage as half of total memory (less chance of OOM). + # + # But not via max_server_memory_usage but via max_memory_usage_for_user, + # so that we can override this setting and execute service queries, like: + # - hung check + # - show/drop database + # - ... + # + # So max_memory_usage_for_user will be a soft limit, and + # max_server_memory_usage will be hard limit, and queries that should be + # executed regardless memory limits will use max_memory_usage_for_user=0, + # instead of relying on max_untracked_memory + local max_server_mem + max_server_mem=$((total_mem*75/100)) # 75% + echo "Setting max_server_memory_usage=$max_server_mem" + cat > /etc/clickhouse-server/config.d/max_server_memory_usage.xml < + ${max_server_mem} + +EOL + local max_users_mem + max_users_mem=$((total_mem*50/100)) # 50% + echo "Setting max_memory_usage_for_user=$max_users_mem" + cat > /etc/clickhouse-server/users.d/max_memory_usage_for_user.xml < + + + ${max_users_mem} + + + +EOL + + cat > /etc/clickhouse-server/config.d/core.xml < + + + 107374182400 + + + $PWD + +EOL + + # Analyzer is not yet ready for testing + cat > /etc/clickhouse-server/users.d/no_analyzer.xml < + + + + + + + + + + +EOL + +} + +function stop() +{ + local pid + # Preserve the pid, since the server can hung after the PID will be deleted. + pid="$(cat /var/run/clickhouse-server/clickhouse-server.pid)" + + clickhouse stop $max_tries --do-not-kill && return + + if [ -n "$1" ] + then + # temporarily disable it in BC check + clickhouse stop --force + return + fi + + # We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces. + kill -TERM "$(pidof gdb)" ||: + sleep 5 + echo "thread apply all backtrace (on stop)" >> /test_output/gdb.log + timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$pid" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log + clickhouse stop --force +} + +function start() +{ + counter=0 + until clickhouse-client --query "SELECT 1" + do + if [ "$counter" -gt ${1:-120} ] + then + echo "Cannot start clickhouse-server" + echo -e "Cannot start clickhouse-server\tFAIL" >> /test_output/test_results.tsv + cat /var/log/clickhouse-server/stdout.log + tail -n1000 /var/log/clickhouse-server/stderr.log + tail -n100000 /var/log/clickhouse-server/clickhouse-server.log | grep -F -v -e ' RaftInstance:' -e ' RaftInstance' | tail -n1000 + break + fi + # use root to match with current uid + clickhouse start --user root >/var/log/clickhouse-server/stdout.log 2>>/var/log/clickhouse-server/stderr.log + sleep 0.5 + counter=$((counter + 1)) + done + + # Set follow-fork-mode to parent, because we attach to clickhouse-server, not to watchdog + # and clickhouse-server can do fork-exec, for example, to run some bridge. + # Do not set nostop noprint for all signals, because some it may cause gdb to hang, + # explicitly ignore non-fatal signals that are used by server. + # Number of SIGRTMIN can be determined only in runtime. + RTMIN=$(kill -l SIGRTMIN) + echo " +set follow-fork-mode parent +handle SIGHUP nostop noprint pass +handle SIGINT nostop noprint pass +handle SIGQUIT nostop noprint pass +handle SIGPIPE nostop noprint pass +handle SIGTERM nostop noprint pass +handle SIGUSR1 nostop noprint pass +handle SIGUSR2 nostop noprint pass +handle SIG$RTMIN nostop noprint pass +info signals +continue +backtrace full +thread apply all backtrace full +info registers +disassemble /s +up +disassemble /s +up +disassemble /s +p \"done\" +detach +quit +" > script.gdb + + # FIXME Hung check may work incorrectly because of attached gdb + # 1. False positives are possible + # 2. We cannot attach another gdb to get stacktraces if some queries hung + gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log & + sleep 5 + # gdb will send SIGSTOP, spend some time loading debug info and then send SIGCONT, wait for it (up to send_timeout, 300s) + time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||: +} + +# Thread Fuzzer allows to check more permutations of possible thread scheduling +# and find more potential issues. +# Temporarily disable ThreadFuzzer with tsan because of https://github.com/google/sanitizers/issues/1540 +is_tsan_build=$(clickhouse local -q "select value like '% -fsanitize=thread %' from system.build_options where name='CXX_FLAGS'") +if [ "$is_tsan_build" -eq "0" ]; then + export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000 + export THREAD_FUZZER_SLEEP_PROBABILITY=0.1 + export THREAD_FUZZER_SLEEP_TIME_US=100000 + + export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1 + export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1 + export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY=1 + export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY=1 + + export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001 + export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001 + export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001 + export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001 + export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000 + + export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000 + export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000 + export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000 +fi + +azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & +./setup_minio.sh stateless # to have a proper environment + +# But we still need default disk because some tables loaded only into it +sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml | sed "s|
s3
|
s3
default|" > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp +mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml +sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml +sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml + +echo "Get previous release tag" +previous_release_tag=$(dpkg --info package_folder/clickhouse-client*.deb | grep "Version: " | awk '{print $2}' | get_previous_release_tag) +echo $previous_release_tag + +echo "Clone previous release repository" +git clone https://github.com/ClickHouse/ClickHouse.git --no-tags --progress --branch=$previous_release_tag --no-recurse-submodules --depth=1 previous_release_repository + +echo "Download clickhouse-server from the previous release" +mkdir previous_release_package_folder + +echo $previous_release_tag | download_release_packages && echo -e 'Download script exit code\tOK' >> /test_output/test_results.tsv \ + || echo -e 'Download script failed\tFAIL' >> /test_output/test_results.tsv + +# Check if we cloned previous release repository successfully +if ! [ "$(ls -A previous_release_repository/tests/queries)" ] +then + echo -e "Failed to clone previous release tests\tFAIL" >> /test_output/test_results.tsv +elif ! [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ] +then + echo -e "Failed to download previous release packages\tFAIL" >> /test_output/test_results.tsv +else + echo -e "Successfully cloned previous release tests\tOK" >> /test_output/test_results.tsv + echo -e "Successfully downloaded previous release packages\tOK" >> /test_output/test_results.tsv + + # Make upgrade check more funny by forcing Ordinary engine for system database + mkdir /var/lib/clickhouse/metadata + echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/system.sql + + # Install previous release packages + install_packages previous_release_package_folder + + # Start server from previous release + # Previous version may not be ready for fault injections + export ZOOKEEPER_FAULT_INJECTION=0 + configure + + # Avoid "Setting s3_check_objects_after_upload is neither a builtin setting..." + rm -f /etc/clickhouse-server/users.d/enable_blobs_check.xml ||: + rm -f /etc/clickhouse-server/users.d/marks.xml ||: + + # Remove s3 related configs to avoid "there is no disk type `cache`" + rm -f /etc/clickhouse-server/config.d/storage_conf.xml ||: + rm -f /etc/clickhouse-server/config.d/azure_storage_conf.xml ||: + + # Turn on after 22.12 + rm -f /etc/clickhouse-server/config.d/compressed_marks_and_index.xml ||: + # it uses recently introduced settings which previous versions may not have + rm -f /etc/clickhouse-server/users.d/insert_keeper_retries.xml ||: + + start + + clickhouse-client --query="SELECT 'Server version: ', version()" + + # Install new package before running stress test because we should use new + # clickhouse-client and new clickhouse-test. + # + # But we should leave old binary in /usr/bin/ and debug symbols in + # /usr/lib/debug/usr/bin (if any) for gdb and internal DWARF parser, so it + # will print sane stacktraces and also to avoid possible crashes. + # + # FIXME: those files can be extracted directly from debian package, but + # actually better solution will be to use different PATH instead of playing + # games with files from packages. + mv /usr/bin/clickhouse previous_release_package_folder/ + mv /usr/lib/debug/usr/bin/clickhouse.debug previous_release_package_folder/ + install_packages package_folder + mv /usr/bin/clickhouse package_folder/ + mv /usr/lib/debug/usr/bin/clickhouse.debug package_folder/ + mv previous_release_package_folder/clickhouse /usr/bin/ + mv previous_release_package_folder/clickhouse.debug /usr/lib/debug/usr/bin/clickhouse.debug + + mkdir tmp_stress_output + + stress --test-cmd="/usr/bin/clickhouse-test --queries=\"previous_release_repository/tests/queries\"" --upgrade-check --output-folder tmp_stress_output --global-time-limit=1200 \ + && echo -e 'Test script exit code\tOK' >> /test_output/test_results.tsv \ + || echo -e 'Test script failed\tFAIL' >> /test_output/test_results.tsv + rm -rf tmp_stress_output + + # We experienced deadlocks in this command in very rare cases. Let's debug it: + timeout 10m clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables" || + ( + echo "thread apply all backtrace (on select tables count)" >> /test_output/gdb.log + timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log + clickhouse stop --force + ) + + stop 1 + mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log + + # Start new server + mv package_folder/clickhouse /usr/bin/ + mv package_folder/clickhouse.debug /usr/lib/debug/usr/bin/clickhouse.debug + # Disable fault injections on start (we don't test them here, and it can lead to tons of requests in case of huge number of tables). + export ZOOKEEPER_FAULT_INJECTION=0 + configure + start 500 + clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/test_results.tsv \ + || (echo -e 'Server failed to start\tFAIL' >> /test_output/test_results.tsv \ + && grep -a ".*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/application_errors.txt) + + # Remove file application_errors.txt if it's empty + [ -s /test_output/application_errors.txt ] || rm /test_output/application_errors.txt + + clickhouse-client --query="SELECT 'Server version: ', version()" + + # Let the server run for a while before checking log. + sleep 60 + + stop + mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.upgrade.log + + # Error messages (we should ignore some errors) + # FIXME https://github.com/ClickHouse/ClickHouse/issues/38643 ("Unknown index: idx.") + # FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 ("Cannot parse string 'Hello' as UInt64") + # FIXME Not sure if it's expected, but some tests from stress test may not be finished yet when we restarting server. + # Let's just ignore all errors from queries ("} TCPHandler: Code:", "} executeQuery: Code:") + # FIXME https://github.com/ClickHouse/ClickHouse/issues/39197 ("Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'") + # NOTE Incompatibility was introduced in https://github.com/ClickHouse/ClickHouse/pull/39263, it's expected + # ("This engine is deprecated and is not supported in transactions", "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part") + # FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 - bad mutation does not indicate backward incompatibility + echo "Check for Error messages in server log:" + zgrep -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \ + -e "Code: 236. DB::Exception: Cancelled mutating parts" \ + -e "REPLICA_IS_ALREADY_ACTIVE" \ + -e "REPLICA_ALREADY_EXISTS" \ + -e "ALL_REPLICAS_LOST" \ + -e "DDLWorker: Cannot parse DDL task query" \ + -e "RaftInstance: failed to accept a rpc connection due to error 125" \ + -e "UNKNOWN_DATABASE" \ + -e "NETWORK_ERROR" \ + -e "UNKNOWN_TABLE" \ + -e "ZooKeeperClient" \ + -e "KEEPER_EXCEPTION" \ + -e "DirectoryMonitor" \ + -e "TABLE_IS_READ_ONLY" \ + -e "Code: 1000, e.code() = 111, Connection refused" \ + -e "UNFINISHED" \ + -e "NETLINK_ERROR" \ + -e "Renaming unexpected part" \ + -e "PART_IS_TEMPORARILY_LOCKED" \ + -e "and a merge is impossible: we didn't find" \ + -e "found in queue and some source parts for it was lost" \ + -e "is lost forever." \ + -e "Unknown index: idx." \ + -e "Cannot parse string 'Hello' as UInt64" \ + -e "} TCPHandler: Code:" \ + -e "} executeQuery: Code:" \ + -e "Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'" \ + -e "This engine is deprecated and is not supported in transactions" \ + -e "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part" \ + -e "The set of parts restored in place of" \ + -e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \ + -e "Code: 269. DB::Exception: Destination table is myself" \ + -e "Coordination::Exception: Connection loss" \ + -e "MutateFromLogEntryTask" \ + -e "No connection to ZooKeeper, cannot get shared table ID" \ + -e "Session expired" \ + /var/log/clickhouse-server/clickhouse-server.upgrade.log | zgrep -Fa "" > /test_output/upgrade_error_messages.txt \ + && echo -e 'Error message in logs after server upgrade (see upgrade_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'No Error messages after server upgrade\tOK' >> /test_output/test_results.tsv + + # Remove file bc_check_error_messages.txt if it's empty + [ -s /test_output/upgrade_error_messages.txt ] || rm /test_output/upgrade_error_messages.txt + + # Sanitizer asserts + zgrep -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp + zgrep -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp + zgrep -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \ + && echo -e 'Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'No sanitizer asserts\tOK' >> /test_output/test_results.tsv + rm -f /test_output/tmp + + # OOM + zgrep -Fa " Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.*.log > /dev/null \ + && echo -e 'OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv + + # Logical errors + echo "Check for Logical errors in server log:" + zgrep -Fa -A20 "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.*.log > /test_output/logical_errors.txt \ + && echo -e 'Logical error thrown (see server logs or logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'No logical errors\tOK' >> /test_output/test_results.tsv + + # Remove file logical_errors.txt if it's empty + [ -s /test_output/logical_errors.txt ] || rm /test_output/logical_errors.txt + + # Crash + zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.*.log > /dev/null \ + && echo -e 'Killed by signal (in server logs)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Not crashed\tOK' >> /test_output/test_results.tsv + + # It also checks for crash without stacktrace (printed by watchdog) + echo "Check for Fatal message in server log:" + zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.*.log > /test_output/fatal_messages.txt \ + && echo -e 'Fatal message in server logs (see bc_check_fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'No fatal messages in server logs\tOK' >> /test_output/test_results.tsv + + # Remove file fatal_messages.txt if it's empty + [ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt + + tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||: + for table in query_log trace_log + do + clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.backward.tsv.gz ||: + done +fi + +dmesg -T > /test_output/dmesg.log + +# OOM in dmesg -- those are real +grep -q -F -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE' /test_output/dmesg.log \ + && echo -e 'OOM in dmesg\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'No OOM in dmesg\tOK' >> /test_output/test_results.tsv + +mv /var/log/clickhouse-server/stderr.log /test_output/ + +# Write check result into check_status.tsv +clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%'), rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv +[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv + +# Core dumps +for core in core.*; do + pigz $core + mv $core.gz /test_output/ +done diff --git a/tests/ci/stress.py b/tests/ci/stress.py new file mode 100755 index 00000000000..ae35afbc5fa --- /dev/null +++ b/tests/ci/stress.py @@ -0,0 +1,305 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +from multiprocessing import cpu_count +from subprocess import Popen, call, check_output, STDOUT +import os +import argparse +import logging +import time + + +def get_options(i, upgrade_check): + options = [] + client_options = [] + if 0 < i: + options.append("--order=random") + + if i % 3 == 2 and not upgrade_check: + options.append( + '''--db-engine="Replicated('/test/db/test_{}', 's1', 'r1')"'''.format(i) + ) + client_options.append("allow_experimental_database_replicated=1") + + # If database name is not specified, new database is created for each functional test. + # Run some threads with one database for all tests. + if i % 2 == 1: + options.append(" --database=test_{}".format(i)) + + if i % 3 == 1: + client_options.append("join_use_nulls=1") + + if i % 2 == 1: + join_alg_num = i // 2 + if join_alg_num % 4 == 0: + client_options.append("join_algorithm='parallel_hash'") + if join_alg_num % 4 == 1: + client_options.append("join_algorithm='partial_merge'") + if join_alg_num % 4 == 2: + client_options.append("join_algorithm='full_sorting_merge'") + if join_alg_num % 4 == 3: + client_options.append("join_algorithm='auto'") + client_options.append('max_rows_in_join=1000') + + if i == 13: + client_options.append("memory_tracker_fault_probability=0.001") + + if i % 2 == 1 and not upgrade_check: + client_options.append("group_by_use_nulls=1") + + if client_options: + options.append(" --client-option " + " ".join(client_options)) + + return " ".join(options) + + +def run_func_test( + cmd, + output_prefix, + num_processes, + skip_tests_option, + global_time_limit, + upgrade_check, +): + upgrade_check_option = ( + "--upgrade-check" if upgrade_check else "" + ) + global_time_limit_option = "" + if global_time_limit: + global_time_limit_option = "--global_time_limit={}".format(global_time_limit) + + output_paths = [ + os.path.join(output_prefix, "stress_test_run_{}.txt".format(i)) + for i in range(num_processes) + ] + pipes = [] + for i in range(0, len(output_paths)): + f = open(output_paths[i], "w") + full_command = "{} {} {} {} {} --stress".format( + cmd, + get_options(i, upgrade_check), + global_time_limit_option, + skip_tests_option, + upgrade_check_option, + ) + logging.info("Run func tests '%s'", full_command) + p = Popen(full_command, shell=True, stdout=f, stderr=f) + pipes.append(p) + time.sleep(0.5) + return pipes + + +def compress_stress_logs(output_path, files_prefix): + cmd = f"cd {output_path} && tar -zcf stress_run_logs.tar.gz {files_prefix}* && rm {files_prefix}*" + check_output(cmd, shell=True) + + +def call_with_retry(query, timeout=30, retry_count=5): + for i in range(retry_count): + code = call(query, shell=True, stderr=STDOUT, timeout=timeout) + if code != 0: + time.sleep(i) + else: + break + + +def make_query_command(query): + return f"""clickhouse client -q "{query}" --max_untracked_memory=1Gi --memory_profiler_step=1Gi --max_memory_usage_for_user=0""" + + +def prepare_for_hung_check(drop_databases): + # FIXME this function should not exist, but... + + # We attach gdb to clickhouse-server before running tests + # to print stacktraces of all crashes even if clickhouse cannot print it for some reason. + # However, it obstruct checking for hung queries. + logging.info("Will terminate gdb (if any)") + call_with_retry("kill -TERM $(pidof gdb)") + + # ThreadFuzzer significantly slows down server and causes false-positive hung check failures + call_with_retry("clickhouse client -q 'SYSTEM STOP THREAD FUZZER'") + + call_with_retry(make_query_command("SELECT 1 FORMAT Null")) + + # Some tests execute SYSTEM STOP MERGES or similar queries. + # It may cause some ALTERs to hang. + # Possibly we should fix tests and forbid to use such queries without specifying table. + call_with_retry(make_query_command("SYSTEM START MERGES")) + call_with_retry(make_query_command("SYSTEM START DISTRIBUTED SENDS")) + call_with_retry(make_query_command("SYSTEM START TTL MERGES")) + call_with_retry(make_query_command("SYSTEM START MOVES")) + call_with_retry(make_query_command("SYSTEM START FETCHES")) + call_with_retry(make_query_command("SYSTEM START REPLICATED SENDS")) + call_with_retry(make_query_command("SYSTEM START REPLICATION QUEUES")) + call_with_retry(make_query_command("SYSTEM DROP MARK CACHE")) + + # Issue #21004, live views are experimental, so let's just suppress it + call_with_retry(make_query_command("KILL QUERY WHERE upper(query) LIKE 'WATCH %'")) + + # Kill other queries which known to be slow + # It's query from 01232_preparing_sets_race_condition_long, it may take up to 1000 seconds in slow builds + call_with_retry( + make_query_command("KILL QUERY WHERE query LIKE 'insert into tableB select %'") + ) + # Long query from 00084_external_agregation + call_with_retry( + make_query_command( + "KILL QUERY WHERE query LIKE 'SELECT URL, uniq(SearchPhrase) AS u FROM test.hits GROUP BY URL ORDER BY u %'" + ) + ) + + if drop_databases: + for i in range(5): + try: + # Here we try to drop all databases in async mode. If some queries really hung, than drop will hung too. + # Otherwise we will get rid of queries which wait for background pool. It can take a long time on slow builds (more than 900 seconds). + # + # Also specify max_untracked_memory to allow 1GiB of memory to overcommit. + databases = ( + check_output( + make_query_command("SHOW DATABASES"), shell=True, timeout=30 + ) + .decode("utf-8") + .strip() + .split() + ) + for db in databases: + if db == "system": + continue + command = make_query_command(f'DETACH DATABASE {db}') + # we don't wait for drop + Popen(command, shell=True) + break + except Exception as ex: + logging.error( + "Failed to SHOW or DROP databasese, will retry %s", str(ex) + ) + time.sleep(i) + else: + raise Exception( + "Cannot drop databases after stress tests. Probably server consumed too much memory and cannot execute simple queries" + ) + + # Wait for last queries to finish if any, not longer than 300 seconds + call( + make_query_command( + """ + select sleepEachRow(( + select maxOrDefault(300 - elapsed) + 1 + from system.processes + where query not like '%from system.processes%' and elapsed < 300 + ) / 300) + from numbers(300) + format Null + """ + ), + shell=True, + stderr=STDOUT, + timeout=330, + ) + + # Even if all clickhouse-test processes are finished, there are probably some sh scripts, + # which still run some new queries. Let's ignore them. + try: + query = """clickhouse client -q "SELECT count() FROM system.processes where where elapsed > 300" """ + output = ( + check_output(query, shell=True, stderr=STDOUT, timeout=30) + .decode("utf-8") + .strip() + ) + if int(output) == 0: + return False + except: + pass + return True + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") + parser = argparse.ArgumentParser( + description="ClickHouse script for running stresstest" + ) + parser.add_argument("--test-cmd", default="/usr/bin/clickhouse-test") + parser.add_argument("--skip-func-tests", default="") + parser.add_argument("--client-cmd", default="clickhouse-client") + parser.add_argument("--server-log-folder", default="/var/log/clickhouse-server") + parser.add_argument("--output-folder") + parser.add_argument("--global-time-limit", type=int, default=1800) + parser.add_argument("--num-parallel", type=int, default=cpu_count()) + parser.add_argument("--upgrade-check", action="store_true") + parser.add_argument("--hung-check", action="store_true", default=False) + # make sense only for hung check + parser.add_argument("--drop-databases", action="store_true", default=False) + + args = parser.parse_args() + if args.drop_databases and not args.hung_check: + raise Exception("--drop-databases only used in hung check (--hung-check)") + func_pipes = [] + func_pipes = run_func_test( + args.test_cmd, + args.output_folder, + args.num_parallel, + args.skip_func_tests, + args.global_time_limit, + args.upgrade_check, + ) + + logging.info("Will wait functests to finish") + while True: + retcodes = [] + for p in func_pipes: + if p.poll() is not None: + retcodes.append(p.returncode) + if len(retcodes) == len(func_pipes): + break + logging.info("Finished %s from %s processes", len(retcodes), len(func_pipes)) + time.sleep(5) + + logging.info("All processes finished") + + logging.info("Compressing stress logs") + compress_stress_logs(args.output_folder, "stress_test_run_") + logging.info("Logs compressed") + + if args.hung_check: + try: + have_long_running_queries = prepare_for_hung_check(args.drop_databases) + except Exception as ex: + have_long_running_queries = True + logging.error("Failed to prepare for hung check %s", str(ex)) + logging.info("Checking if some queries hung") + cmd = " ".join( + [ + args.test_cmd, + # Do not track memory allocations up to 1Gi, + # this will allow to ignore server memory limit (max_server_memory_usage) for this query. + # + # NOTE: memory_profiler_step should be also adjusted, because: + # + # untracked_memory_limit = min(settings.max_untracked_memory, settings.memory_profiler_step) + # + # NOTE: that if there will be queries with GROUP BY, this trick + # will not work due to CurrentMemoryTracker::check() from + # Aggregator code. + # But right now it should work, since neither hung check, nor 00001_select_1 has GROUP BY. + "--client-option", + "max_untracked_memory=1Gi", + "max_memory_usage_for_user=0", + "memory_profiler_step=1Gi", + # Use system database to avoid CREATE/DROP DATABASE queries + "--database=system", + "--hung-check", + "--stress", + "00001_select_1", + ] + ) + res = call(cmd, shell=True, stderr=STDOUT) + hung_check_status = "No queries hung\tOK\n" + if res != 0 and have_long_running_queries: + logging.info("Hung check failed with exit code {}".format(res)) + hung_check_status = "Hung check failed\tFAIL\n" + with open( + os.path.join(args.output_folder, "test_results.tsv"), "w+" + ) as results: + results.write(hung_check_status) + + logging.info("Stress test finished") diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index 37277538867..1437d50f9c5 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -34,7 +34,6 @@ def get_run_command( "docker run --cap-add=SYS_PTRACE " # a static link, don't use S3_URL or S3_DOWNLOAD "-e S3_URL='https://s3.amazonaws.com/clickhouse-datasets' " - f"-e DISABLE_BC_CHECK={os.environ.get('DISABLE_BC_CHECK', '0')} " # For dmesg and sysctl "--privileged " f"--volume={build_path}:/package_folder " @@ -100,7 +99,7 @@ def process_results( return state, description, test_results, additional_files -if __name__ == "__main__": +def run_stress_test(docker_image_name): logging.basicConfig(level=logging.INFO) stopwatch = Stopwatch() @@ -123,7 +122,7 @@ if __name__ == "__main__": logging.info("Check is already finished according to github status, exiting") sys.exit(0) - docker_image = get_image_with_version(reports_path, "clickhouse/stress-test") + docker_image = get_image_with_version(reports_path, docker_image_name) packages_path = os.path.join(temp_path, "packages") if not os.path.exists(packages_path): @@ -187,3 +186,6 @@ if __name__ == "__main__": if state == "error": sys.exit(1) + +if __name__ == "__main__": + run_stress_test("clickhouse/stress-test") diff --git a/tests/ci/upgrade_check.py b/tests/ci/upgrade_check.py new file mode 100644 index 00000000000..83b6f9e299f --- /dev/null +++ b/tests/ci/upgrade_check.py @@ -0,0 +1,4 @@ +import stress_check + +if __name__ == "__main__": + stress_check.run_stress_test("clickhouse/upgrade-check") diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 13669981daa..8d80bc58f7d 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -404,7 +404,7 @@ class FailureReason(enum.Enum): S3_STORAGE = "s3-storage" STRESS = "stress" BUILD = "not running for current build" - BACKWARD_INCOMPATIBLE = "test is backward incompatible" + NO_UPGRADE_CHECK = "not running for upgrade check" # UNKNOWN reasons NO_REFERENCE = "no reference file" @@ -650,35 +650,6 @@ class TestCase: else "" ) - # Check if test contains tag "no-backward-compatibility-check" and we should skip it - def check_backward_incompatible_tag(self) -> bool: - for tag in self.tags: - if tag.startswith("no-backward-compatibility-check"): - split = tag.split(":") - - # If version is not specified in tag, always skip this test. - if len(split) == 1: - return True - version_from_tag = split[1] - - # Check if extracted string from tag is a real ClickHouse version, if not - always skip test. - if re.match(VERSION_PATTERN, version_from_tag) is None: - return True - - server_version = str( - clickhouse_execute(args, "SELECT version()").decode() - ) - # If server version is less or equal from the version specified in tag, we should skip this test. - version_from_tag_split = list(map(int, version_from_tag.split("."))) - server_version_split = list(map(int, server_version.split("."))) - if ( - server_version_split[: len(version_from_tag_split)] - <= version_from_tag_split - ): - return True - - return False - # should skip test, should increment skipped_total, skip reason def should_skip_test(self, suite) -> Optional[FailureReason]: tags = self.tags @@ -726,10 +697,10 @@ class TestCase: elif tags and ("no-replicated-database" in tags) and args.replicated_database: return FailureReason.REPLICATED_DB - elif ( - args.backward_compatibility_check and self.check_backward_incompatible_tag() - ): - return FailureReason.BACKWARD_INCOMPATIBLE + # TODO: remove checking "no-upgrade-check" after 23.1 + elif args.upgrade_check and ( + "no-upgrade-check" in tags or "no-upgrade-check" in tags): + return FailureReason.NO_UPGRADE_CHECK elif tags and ("no-s3-storage" in tags) and args.s3_storage: return FailureReason.S3_STORAGE @@ -2212,9 +2183,9 @@ if __name__ == "__main__": ) group.add_argument( - "--backward-compatibility-check", + "--upgrade-check", action="store_true", - help="Run tests for further backward compatibility testing by ignoring all" + help="Run tests for further server upgrade testing by ignoring all" "drop queries in tests for collecting data from new version of server", ) parser.add_argument( @@ -2332,7 +2303,7 @@ if __name__ == "__main__": else: args.client_database = "default" - if args.backward_compatibility_check: + if args.upgrade_check: args.client += " --fake-drop" if args.client_option or args.secure: diff --git a/tests/queries/0_stateless/00061_merge_tree_alter.sql b/tests/queries/0_stateless/00061_merge_tree_alter.sql index ee5694518d9..2e46b1e16d6 100644 --- a/tests/queries/0_stateless/00061_merge_tree_alter.sql +++ b/tests/queries/0_stateless/00061_merge_tree_alter.sql @@ -1,4 +1,4 @@ --- Tags: no-backward-compatibility-check +-- Tags: no-upgrade-check DROP TABLE IF EXISTS alter_00061; set allow_deprecated_syntax_for_merge_tree=1; diff --git a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh index 1aa02864815..8b07d9abe35 100755 --- a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh +++ b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: zookeeper, no-parallel, no-s3-storage, no-backward-compatibility-check +# Tags: zookeeper, no-parallel, no-s3-storage, no-upgrade-check # Because REPLACE PARTITION does not forces immediate removal of replaced data parts from local filesystem # (it tries to do it as quick as possible, but it still performed in separate thread asynchronously) diff --git a/tests/queries/0_stateless/00732_quorum_insert_lost_part_and_alive_part_zookeeper_long.sql b/tests/queries/0_stateless/00732_quorum_insert_lost_part_and_alive_part_zookeeper_long.sql index e8d923389e5..9c02ac795ed 100644 --- a/tests/queries/0_stateless/00732_quorum_insert_lost_part_and_alive_part_zookeeper_long.sql +++ b/tests/queries/0_stateless/00732_quorum_insert_lost_part_and_alive_part_zookeeper_long.sql @@ -1,4 +1,4 @@ --- Tags: long, zookeeper, no-replicated-database, no-backward-compatibility-check +-- Tags: long, zookeeper, no-replicated-database, no-upgrade-check -- Tag no-replicated-database: Fails due to additional replicas or shards SET send_logs_level = 'fatal'; diff --git a/tests/queries/0_stateless/00942_dataparts_500.sh b/tests/queries/0_stateless/00942_dataparts_500.sh index 7e1a7f15810..a6c3fcd4303 100755 --- a/tests/queries/0_stateless/00942_dataparts_500.sh +++ b/tests/queries/0_stateless/00942_dataparts_500.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-backward-compatibility-check +# Tags: no-upgrade-check # Test fix for issue #5066 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh b/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh index 5ccef802c0c..f143c97bdf4 100755 --- a/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh +++ b/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, zookeeper, no-parallel, no-backward-compatibility-check +# Tags: race, zookeeper, no-parallel, no-upgrade-check CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh b/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh index 1f316b4b389..aec27792603 100755 --- a/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh +++ b/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: zookeeper, no-parallel, no-fasttest, no-backward-compatibility-check +# Tags: zookeeper, no-parallel, no-fasttest, no-upgrade-check CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh index bbe3a5a51c0..8c9efb75e96 100755 --- a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh +++ b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, zookeeper, no-backward-compatibility-check +# Tags: race, zookeeper, no-upgrade-check CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01191_rename_dictionary.sql b/tests/queries/0_stateless/01191_rename_dictionary.sql index ed9bc8af61b..8074e84f0ed 100644 --- a/tests/queries/0_stateless/01191_rename_dictionary.sql +++ b/tests/queries/0_stateless/01191_rename_dictionary.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-backward-compatibility-check +-- Tags: no-parallel, no-upgrade-check DROP DATABASE IF EXISTS test_01191; CREATE DATABASE test_01191 ENGINE=Atomic; diff --git a/tests/queries/0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh b/tests/queries/0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh index f7615974237..f9a2ec8a34c 100755 --- a/tests/queries/0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh +++ b/tests/queries/0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, zookeeper, no-parallel, no-backward-compatibility-check +# Tags: long, zookeeper, no-parallel, no-upgrade-check CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01378_alter_rename_with_ttl_zookeeper.sql b/tests/queries/0_stateless/01378_alter_rename_with_ttl_zookeeper.sql index 8717c93b468..43c9fa43104 100644 --- a/tests/queries/0_stateless/01378_alter_rename_with_ttl_zookeeper.sql +++ b/tests/queries/0_stateless/01378_alter_rename_with_ttl_zookeeper.sql @@ -1,4 +1,4 @@ --- Tags: zookeeper, no-backward-compatibility-check +-- Tags: zookeeper, no-upgrade-check DROP TABLE IF EXISTS table_rename_with_ttl; diff --git a/tests/queries/0_stateless/01391_join_on_dict_crash.sql b/tests/queries/0_stateless/01391_join_on_dict_crash.sql index 13ebd080621..5321e03767f 100644 --- a/tests/queries/0_stateless/01391_join_on_dict_crash.sql +++ b/tests/queries/0_stateless/01391_join_on_dict_crash.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-backward-compatibility-check +-- Tags: no-parallel, no-upgrade-check DROP DATABASE IF EXISTS db_01391; CREATE DATABASE db_01391; diff --git a/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql b/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql index bdcde1adbad..d2ae05a5f80 100644 --- a/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql +++ b/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql @@ -1,4 +1,4 @@ --- Tags: no-backward-compatibility-check +-- Tags: no-upgrade-check -- force data path with the user/pass in it set use_compact_format_in_distributed_parts_names=0; diff --git a/tests/queries/0_stateless/01576_alter_low_cardinality_and_select.sh b/tests/queries/0_stateless/01576_alter_low_cardinality_and_select.sh index 27de10ab16a..4a9b4beee5b 100755 --- a/tests/queries/0_stateless/01576_alter_low_cardinality_and_select.sh +++ b/tests/queries/0_stateless/01576_alter_low_cardinality_and_select.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-backward-compatibility-check +# Tags: no-upgrade-check CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01650_fetch_patition_with_macro_in_zk_path_long.sql b/tests/queries/0_stateless/01650_fetch_patition_with_macro_in_zk_path_long.sql index 1dae8e7b383..b45a1974611 100644 --- a/tests/queries/0_stateless/01650_fetch_patition_with_macro_in_zk_path_long.sql +++ b/tests/queries/0_stateless/01650_fetch_patition_with_macro_in_zk_path_long.sql @@ -1,4 +1,4 @@ --- Tags: long, no-backward-compatibility-check +-- Tags: long, no-upgrade-check DROP TABLE IF EXISTS test_01640; DROP TABLE IF EXISTS restore_01640; diff --git a/tests/queries/0_stateless/01780_column_sparse_alter.sql b/tests/queries/0_stateless/01780_column_sparse_alter.sql index 925b81ea2c2..bc2f6f7c91f 100644 --- a/tests/queries/0_stateless/01780_column_sparse_alter.sql +++ b/tests/queries/0_stateless/01780_column_sparse_alter.sql @@ -1,4 +1,4 @@ --- Tags: no-backward-compatibility-check +-- Tags: no-upgrade-check SET mutations_sync = 2; diff --git a/tests/queries/0_stateless/02022_storage_filelog_one_file.sh b/tests/queries/0_stateless/02022_storage_filelog_one_file.sh index 2f43423e13e..3abf5c52031 100755 --- a/tests/queries/0_stateless/02022_storage_filelog_one_file.sh +++ b/tests/queries/0_stateless/02022_storage_filelog_one_file.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-backward-compatibility-check +# Tags: no-upgrade-check set -eu diff --git a/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh b/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh index e4041b2d755..e0f0114d030 100755 --- a/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh +++ b/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-backward-compatibility-check +# Tags: no-upgrade-check set -eu diff --git a/tests/queries/0_stateless/02067_lost_part_s3.sql b/tests/queries/0_stateless/02067_lost_part_s3.sql index c4e69f68a5d..12afdcd4421 100644 --- a/tests/queries/0_stateless/02067_lost_part_s3.sql +++ b/tests/queries/0_stateless/02067_lost_part_s3.sql @@ -1,4 +1,4 @@ --- Tags: no-backward-compatibility-check, no-fasttest +-- Tags: no-upgrade-check, no-fasttest DROP TABLE IF EXISTS partslost_0; DROP TABLE IF EXISTS partslost_1; diff --git a/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh b/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh index 261c389c9f2..26646bd91a0 100755 --- a/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh +++ b/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-backward-compatibility-check +# Tags: no-fasttest, no-parallel, no-upgrade-check CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02242_delete_user_race.sh b/tests/queries/0_stateless/02242_delete_user_race.sh index 8c28cdb57bd..f22b7796bd4 100755 --- a/tests/queries/0_stateless/02242_delete_user_race.sh +++ b/tests/queries/0_stateless/02242_delete_user_race.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, no-fasttest, no-parallel, no-backward-compatibility-check +# Tags: race, no-fasttest, no-parallel, no-upgrade-check # Test tries to reproduce a race between threads: # - deletes user diff --git a/tests/queries/0_stateless/02243_drop_user_grant_race.sh b/tests/queries/0_stateless/02243_drop_user_grant_race.sh index d36db47e562..e36be96aa02 100755 --- a/tests/queries/0_stateless/02243_drop_user_grant_race.sh +++ b/tests/queries/0_stateless/02243_drop_user_grant_race.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, no-fasttest, no-parallel, no-backward-compatibility-check +# Tags: race, no-fasttest, no-parallel, no-upgrade-check set -e diff --git a/tests/queries/0_stateless/02293_hashid.sql b/tests/queries/0_stateless/02293_hashid.sql index 9938154f169..06af0b5e1d8 100644 --- a/tests/queries/0_stateless/02293_hashid.sql +++ b/tests/queries/0_stateless/02293_hashid.sql @@ -1,4 +1,4 @@ --- Tags: no-backward-compatibility-check +-- Tags: no-upgrade-check SET allow_experimental_hash_functions = 1; select number, hashid(number) from system.numbers limit 5; diff --git a/tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.sql b/tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.sql index 469476c82bf..8e0fb4a55a0 100644 --- a/tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.sql +++ b/tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.sql @@ -1,4 +1,4 @@ --- Tags: no-backward-compatibility-check +-- Tags: no-upgrade-check SET max_bytes_in_join = '100', join_algorithm = 'auto'; diff --git a/tests/queries/0_stateless/02306_window_move_row_number_fix.sql b/tests/queries/0_stateless/02306_window_move_row_number_fix.sql index 5bc0c41b3ee..f73525f92be 100644 --- a/tests/queries/0_stateless/02306_window_move_row_number_fix.sql +++ b/tests/queries/0_stateless/02306_window_move_row_number_fix.sql @@ -1,2 +1,2 @@ --- Tags: no-backward-compatibility-check +-- Tags: no-upgrade-check SELECT nth_value(NULL, 1048577) OVER (Rows BETWEEN 1023 FOLLOWING AND UNBOUNDED FOLLOWING) diff --git a/tests/queries/0_stateless/02313_cross_join_dup_col_names.sql b/tests/queries/0_stateless/02313_cross_join_dup_col_names.sql index 44a4797ae3c..08e8843f763 100644 --- a/tests/queries/0_stateless/02313_cross_join_dup_col_names.sql +++ b/tests/queries/0_stateless/02313_cross_join_dup_col_names.sql @@ -1,4 +1,4 @@ --- Tags: no-backward-compatibility-check +-- Tags: no-upgrade-check -- https://github.com/ClickHouse/ClickHouse/issues/37561 diff --git a/tests/queries/0_stateless/02315_pmj_union_ubsan_35857.sql b/tests/queries/0_stateless/02315_pmj_union_ubsan_35857.sql index 47b47101a79..df20e5c42d4 100644 --- a/tests/queries/0_stateless/02315_pmj_union_ubsan_35857.sql +++ b/tests/queries/0_stateless/02315_pmj_union_ubsan_35857.sql @@ -1,4 +1,4 @@ --- Tags: no-backward-compatibility-check +-- Tags: no-upgrade-check SET join_algorithm = 'partial_merge'; diff --git a/tests/queries/0_stateless/02316_cast_to_ip_address_default_column.sql b/tests/queries/0_stateless/02316_cast_to_ip_address_default_column.sql index 35f210be43d..cac7992e305 100644 --- a/tests/queries/0_stateless/02316_cast_to_ip_address_default_column.sql +++ b/tests/queries/0_stateless/02316_cast_to_ip_address_default_column.sql @@ -1,6 +1,3 @@ --- Tags: no-backward-compatibility-check --- TODO: remove no-backward-compatibility-check after new 22.6 release - SET cast_ipv4_ipv6_default_on_conversion_error = 1; DROP TABLE IF EXISTS ipv4_test; diff --git a/tests/queries/0_stateless/02316_const_string_intersact.sql b/tests/queries/0_stateless/02316_const_string_intersact.sql index 18af398aa5d..148d048952b 100644 --- a/tests/queries/0_stateless/02316_const_string_intersact.sql +++ b/tests/queries/0_stateless/02316_const_string_intersact.sql @@ -1,3 +1,3 @@ --- Tags: no-backward-compatibility-check +-- Tags: no-upgrade-check SELECT 'Play ClickHouse' InterSect SELECT 'Play ClickHouse' diff --git a/tests/queries/0_stateless/02320_mapped_array_witn_const_nullable.sql b/tests/queries/0_stateless/02320_mapped_array_witn_const_nullable.sql index 08651590c76..734c597051e 100644 --- a/tests/queries/0_stateless/02320_mapped_array_witn_const_nullable.sql +++ b/tests/queries/0_stateless/02320_mapped_array_witn_const_nullable.sql @@ -1,4 +1,4 @@ --- Tags: no-backward-compatibility-check +-- Tags: no-upgrade-check select arrayMap(x -> toNullable(1), range(number)) from numbers(3); select arrayFilter(x -> toNullable(1), range(number)) from numbers(3); diff --git a/tests/queries/0_stateless/02332_dist_insert_send_logs_level.sh b/tests/queries/0_stateless/02332_dist_insert_send_logs_level.sh index 653cb25172a..503b94be715 100755 --- a/tests/queries/0_stateless/02332_dist_insert_send_logs_level.sh +++ b/tests/queries/0_stateless/02332_dist_insert_send_logs_level.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-backward-compatibility-check +# Tags: no-upgrade-check CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=trace CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql b/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql index eb395e5ec41..07f705acd84 100644 --- a/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql +++ b/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql @@ -1,4 +1,4 @@ --- Tags: no-backward-compatibility-check +-- Tags: no-upgrade-check -- Regression for PartialSortingTransform optimization that requires at least 1500 rows. SELECT * FROM (SELECT * FROM (SELECT 0 a, toNullable(number) b, toString(number) c FROM numbers(1e6)) ORDER BY a DESC, b DESC, c LIMIT 1500) limit 10; diff --git a/tests/queries/0_stateless/02354_annoy.sh b/tests/queries/0_stateless/02354_annoy.sh index 526886ec68d..670b31dc2a4 100755 --- a/tests/queries/0_stateless/02354_annoy.sh +++ b/tests/queries/0_stateless/02354_annoy.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-ubsan, no-cpu-aarch64, no-backward-compatibility-check +# Tags: no-fasttest, no-ubsan, no-cpu-aarch64, no-upgrade-check CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02363_mapupdate_improve.sql b/tests/queries/0_stateless/02363_mapupdate_improve.sql index 6b7723cc9b4..b4a4b8e5d91 100644 --- a/tests/queries/0_stateless/02363_mapupdate_improve.sql +++ b/tests/queries/0_stateless/02363_mapupdate_improve.sql @@ -1,4 +1,4 @@ --- Tags: no-backward-compatibility-check +-- Tags: no-upgrade-check DROP TABLE IF EXISTS map_test; CREATE TABLE map_test(`tags` Map(String, String)) ENGINE = MergeTree PRIMARY KEY tags ORDER BY tags SETTINGS index_granularity = 8192; INSERT INTO map_test (tags) VALUES (map('fruit','apple','color','red')); diff --git a/tests/queries/0_stateless/02366_direct_dictionary_dict_has.sql b/tests/queries/0_stateless/02366_direct_dictionary_dict_has.sql index 9d6950051f0..cf9f2971cb0 100644 --- a/tests/queries/0_stateless/02366_direct_dictionary_dict_has.sql +++ b/tests/queries/0_stateless/02366_direct_dictionary_dict_has.sql @@ -1,4 +1,4 @@ --- Tags: no-backward-compatibility-check +-- Tags: no-upgrade-check DROP TABLE IF EXISTS test_table; CREATE TABLE test_table diff --git a/tests/queries/0_stateless/02366_with_fill_date.sql b/tests/queries/0_stateless/02366_with_fill_date.sql index 64e23b845f8..4d41facf423 100644 --- a/tests/queries/0_stateless/02366_with_fill_date.sql +++ b/tests/queries/0_stateless/02366_with_fill_date.sql @@ -1,4 +1,4 @@ --- Tags: no-backward-compatibility-check +-- Tags: no-upgrade-check SELECT toDate('2022-02-01') AS d1 FROM numbers(18) AS number diff --git a/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql b/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql index 0f1b4f638cb..327a09cd96f 100644 --- a/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql +++ b/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql @@ -1,4 +1,4 @@ --- Tags: no-backward-compatibility-check +-- Tags: no-upgrade-check drop table if exists test_02381; create table test_02381(a UInt64, b UInt64) ENGINE = MergeTree order by (a, b) SETTINGS compress_marks=false, compress_primary_key=false; diff --git a/tests/queries/0_stateless/02397_system_parts_race_condition_drop_rm.sh b/tests/queries/0_stateless/02397_system_parts_race_condition_drop_rm.sh index 2372d30497e..548179b94c9 100755 --- a/tests/queries/0_stateless/02397_system_parts_race_condition_drop_rm.sh +++ b/tests/queries/0_stateless/02397_system_parts_race_condition_drop_rm.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, zookeeper, no-parallel, no-backward-compatibility-check, disabled +# Tags: race, zookeeper, no-parallel, no-upgrade-check, disabled CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02429_low_cardinality_trash.sh b/tests/queries/0_stateless/02429_low_cardinality_trash.sh index 258f02b4bb6..91618cb2796 100755 --- a/tests/queries/0_stateless/02429_low_cardinality_trash.sh +++ b/tests/queries/0_stateless/02429_low_cardinality_trash.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-backward-compatibility-check +# Tags: long, no-upgrade-check CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh b/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh index 11ca3f43d8f..abcf1bf4c5b 100755 --- a/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh +++ b/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-backward-compatibility-check +# Tags: long, no-upgrade-check CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From c7366f7906e28c4731234e15e1999a2011fe2e6c Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 3 Jan 2023 19:32:11 +0000 Subject: [PATCH 03/83] Automatic style fix --- tests/ci/stress.py | 8 +++----- tests/ci/stress_check.py | 1 + 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/ci/stress.py b/tests/ci/stress.py index ae35afbc5fa..a1ad1a3f1c5 100755 --- a/tests/ci/stress.py +++ b/tests/ci/stress.py @@ -38,7 +38,7 @@ def get_options(i, upgrade_check): client_options.append("join_algorithm='full_sorting_merge'") if join_alg_num % 4 == 3: client_options.append("join_algorithm='auto'") - client_options.append('max_rows_in_join=1000') + client_options.append("max_rows_in_join=1000") if i == 13: client_options.append("memory_tracker_fault_probability=0.001") @@ -60,9 +60,7 @@ def run_func_test( global_time_limit, upgrade_check, ): - upgrade_check_option = ( - "--upgrade-check" if upgrade_check else "" - ) + upgrade_check_option = "--upgrade-check" if upgrade_check else "" global_time_limit_option = "" if global_time_limit: global_time_limit_option = "--global_time_limit={}".format(global_time_limit) @@ -165,7 +163,7 @@ def prepare_for_hung_check(drop_databases): for db in databases: if db == "system": continue - command = make_query_command(f'DETACH DATABASE {db}') + command = make_query_command(f"DETACH DATABASE {db}") # we don't wait for drop Popen(command, shell=True) break diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index 1437d50f9c5..66905c60569 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -187,5 +187,6 @@ def run_stress_test(docker_image_name): if state == "error": sys.exit(1) + if __name__ == "__main__": run_stress_test("clickhouse/stress-test") From de4aca9c6b7a61e8152f9c3a00747f2a3f7a1442 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 3 Jan 2023 20:06:43 +0000 Subject: [PATCH 04/83] Update workflows --- .github/workflows/pull_request.yml | 140 ++++++++++++++++++++++++++++- 1 file changed, 138 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index efb7d50dd28..9bdd6a44b21 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -2897,10 +2897,10 @@ jobs: - name: Set envs run: | cat >> "$GITHUB_ENV" << 'EOF' - TEMP_PATH=${{runner.temp}}/stress_thread + TEMP_PATH=${{runner.temp}}/stress_asan REPORTS_PATH=${{runner.temp}}/reports_dir CHECK_NAME=Stress test (asan) - REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse + REPO_COPY=${{runner.temp}}/stress_asan/ClickHouse EOF - name: Download json reports uses: actions/download-artifact@v3 @@ -3059,6 +3059,142 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" + ############################################################################################## + ######################################### UPGRADE CHECK ###################################### + ############################################################################################## + UpgradeCheckAsan: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/upgrade_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Upgrade check (asan) + REPO_COPY=${{runner.temp}}/upgrade_asan/ClickHouse + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Upgrade check + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 upgrade_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + UpgradeCheckTsan: + needs: [BuilderDebTsan] + # same as for stress test with tsan + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/upgrade_thread + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Upgrade check (tsan) + REPO_COPY=${{runner.temp}}/upgrade_thread/ClickHouse + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Upgrade check + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 upgrade_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + UpgradeCheckMsan: + needs: [BuilderDebMsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/upgrade_memory + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Upgrade check (msan) + REPO_COPY=${{runner.temp}}/upgrade_memory/ClickHouse + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Upgrade check + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 upgrade_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + UpgradeCheckDebug: + needs: [BuilderDebDebug] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/upgrade_debug + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Upgrade check (debug) + REPO_COPY=${{runner.temp}}/upgrade_debug/ClickHouse + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Upgrade check + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 upgrade_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" ############################################################################################## ##################################### AST FUZZERS ############################################ ############################################################################################## From 9fb8b42d376fbfd996e12cb927e0f07143a6a94b Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 3 Jan 2023 20:13:12 +0000 Subject: [PATCH 05/83] Fix style --- tests/ci/stress.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/ci/stress.py b/tests/ci/stress.py index ae35afbc5fa..337e127cfe6 100755 --- a/tests/ci/stress.py +++ b/tests/ci/stress.py @@ -11,7 +11,7 @@ import time def get_options(i, upgrade_check): options = [] client_options = [] - if 0 < i: + if i > 0: options.append("--order=random") if i % 3 == 2 and not upgrade_check: @@ -72,8 +72,8 @@ def run_func_test( for i in range(num_processes) ] pipes = [] - for i in range(0, len(output_paths)): - f = open(output_paths[i], "w") + for i, path in enumerate(output_paths): + f = open(path, "w") full_command = "{} {} {} {} {} --stress".format( cmd, get_options(i, upgrade_check), @@ -295,7 +295,7 @@ if __name__ == "__main__": res = call(cmd, shell=True, stderr=STDOUT) hung_check_status = "No queries hung\tOK\n" if res != 0 and have_long_running_queries: - logging.info("Hung check failed with exit code {}".format(res)) + logging.info("Hung check failed with exit code %d", res) hung_check_status = "Hung check failed\tFAIL\n" with open( os.path.join(args.output_folder, "test_results.tsv"), "w+" From 399c9aa2354c04ff1b30c60357be26a52d3db0a6 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 4 Jan 2023 00:04:38 +0000 Subject: [PATCH 06/83] Update images.json --- docker/images.json | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docker/images.json b/docker/images.json index 8339205b52f..634c2b0c22c 100644 --- a/docker/images.json +++ b/docker/images.json @@ -55,6 +55,10 @@ "name": "clickhouse/stress-test", "dependent": [] }, + "docker/test/upgrade": { + "name": "clickhouse/upgrade-check", + "dependent": [] + }, "docker/test/split_build_smoke_test": { "name": "clickhouse/split-build-smoke-test", "dependent": [] From f1ca7e54d567ca9e229a041ff152a66b722c3715 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 4 Jan 2023 15:06:16 +0000 Subject: [PATCH 07/83] Make better --- docker/test/ci | 305 --------------------------------- docker/test/stress/Dockerfile | 3 +- docker/test/upgrade/Dockerfile | 3 +- docker/test/upgrade/run.sh | 1 + 4 files changed, 5 insertions(+), 307 deletions(-) delete mode 100755 docker/test/ci diff --git a/docker/test/ci b/docker/test/ci deleted file mode 100755 index d1860e9e14b..00000000000 --- a/docker/test/ci +++ /dev/null @@ -1,305 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -from multiprocessing import cpu_count -from subprocess import Popen, call, check_output, STDOUT -import os -import argparse -import logging -import time - - -def get_options(i, backward_compatibility_check): - options = [] - client_options = [] - if 0 < i: - options.append("--order=random") - - if i % 3 == 2 and not backward_compatibility_check: - options.append( - '''--db-engine="Replicated('/test/db/test_{}', 's1', 'r1')"'''.format(i) - ) - client_options.append("allow_experimental_database_replicated=1") - - # If database name is not specified, new database is created for each functional test. - # Run some threads with one database for all tests. - if i % 2 == 1: - options.append(" --database=test_{}".format(i)) - - if i % 3 == 1: - client_options.append("join_use_nulls=1") - - if i % 2 == 1: - join_alg_num = i // 2 - if join_alg_num % 4 == 0: - client_options.append("join_algorithm='parallel_hash'") - if join_alg_num % 4 == 1: - client_options.append("join_algorithm='partial_merge'") - if join_alg_num % 4 == 2: - client_options.append("join_algorithm='full_sorting_merge'") - if join_alg_num % 4 == 3: - client_options.append("join_algorithm='auto'") - client_options.append('max_rows_in_join=1000') - - if i == 13: - client_options.append("memory_tracker_fault_probability=0.001") - - if i % 2 == 1 and not backward_compatibility_check: - client_options.append("group_by_use_nulls=1") - - if client_options: - options.append(" --client-option " + " ".join(client_options)) - - return " ".join(options) - - -def run_func_test( - cmd, - output_prefix, - num_processes, - skip_tests_option, - global_time_limit, - backward_compatibility_check, -): - backward_compatibility_check_option = ( - "--backward-compatibility-check" if backward_compatibility_check else "" - ) - global_time_limit_option = "" - if global_time_limit: - global_time_limit_option = "--global_time_limit={}".format(global_time_limit) - - output_paths = [ - os.path.join(output_prefix, "stress_test_run_{}.txt".format(i)) - for i in range(num_processes) - ] - pipes = [] - for i in range(0, len(output_paths)): - f = open(output_paths[i], "w") - full_command = "{} {} {} {} {} --stress".format( - cmd, - get_options(i, backward_compatibility_check), - global_time_limit_option, - skip_tests_option, - backward_compatibility_check_option, - ) - logging.info("Run func tests '%s'", full_command) - p = Popen(full_command, shell=True, stdout=f, stderr=f) - pipes.append(p) - time.sleep(0.5) - return pipes - - -def compress_stress_logs(output_path, files_prefix): - cmd = f"cd {output_path} && tar -zcf stress_run_logs.tar.gz {files_prefix}* && rm {files_prefix}*" - check_output(cmd, shell=True) - - -def call_with_retry(query, timeout=30, retry_count=5): - for i in range(retry_count): - code = call(query, shell=True, stderr=STDOUT, timeout=timeout) - if code != 0: - time.sleep(i) - else: - break - - -def make_query_command(query): - return f"""clickhouse client -q "{query}" --max_untracked_memory=1Gi --memory_profiler_step=1Gi --max_memory_usage_for_user=0""" - - -def prepare_for_hung_check(drop_databases): - # FIXME this function should not exist, but... - - # We attach gdb to clickhouse-server before running tests - # to print stacktraces of all crashes even if clickhouse cannot print it for some reason. - # However, it obstruct checking for hung queries. - logging.info("Will terminate gdb (if any)") - call_with_retry("kill -TERM $(pidof gdb)") - - # ThreadFuzzer significantly slows down server and causes false-positive hung check failures - call_with_retry("clickhouse client -q 'SYSTEM STOP THREAD FUZZER'") - - call_with_retry(make_query_command("SELECT 1 FORMAT Null")) - - # Some tests execute SYSTEM STOP MERGES or similar queries. - # It may cause some ALTERs to hang. - # Possibly we should fix tests and forbid to use such queries without specifying table. - call_with_retry(make_query_command("SYSTEM START MERGES")) - call_with_retry(make_query_command("SYSTEM START DISTRIBUTED SENDS")) - call_with_retry(make_query_command("SYSTEM START TTL MERGES")) - call_with_retry(make_query_command("SYSTEM START MOVES")) - call_with_retry(make_query_command("SYSTEM START FETCHES")) - call_with_retry(make_query_command("SYSTEM START REPLICATED SENDS")) - call_with_retry(make_query_command("SYSTEM START REPLICATION QUEUES")) - call_with_retry(make_query_command("SYSTEM DROP MARK CACHE")) - - # Issue #21004, live views are experimental, so let's just suppress it - call_with_retry(make_query_command("KILL QUERY WHERE upper(query) LIKE 'WATCH %'")) - - # Kill other queries which known to be slow - # It's query from 01232_preparing_sets_race_condition_long, it may take up to 1000 seconds in slow builds - call_with_retry( - make_query_command("KILL QUERY WHERE query LIKE 'insert into tableB select %'") - ) - # Long query from 00084_external_agregation - call_with_retry( - make_query_command( - "KILL QUERY WHERE query LIKE 'SELECT URL, uniq(SearchPhrase) AS u FROM test.hits GROUP BY URL ORDER BY u %'" - ) - ) - - if drop_databases: - for i in range(5): - try: - # Here we try to drop all databases in async mode. If some queries really hung, than drop will hung too. - # Otherwise we will get rid of queries which wait for background pool. It can take a long time on slow builds (more than 900 seconds). - # - # Also specify max_untracked_memory to allow 1GiB of memory to overcommit. - databases = ( - check_output( - make_query_command("SHOW DATABASES"), shell=True, timeout=30 - ) - .decode("utf-8") - .strip() - .split() - ) - for db in databases: - if db == "system": - continue - command = make_query_command(f'DETACH DATABASE {db}') - # we don't wait for drop - Popen(command, shell=True) - break - except Exception as ex: - logging.error( - "Failed to SHOW or DROP databasese, will retry %s", str(ex) - ) - time.sleep(i) - else: - raise Exception( - "Cannot drop databases after stress tests. Probably server consumed too much memory and cannot execute simple queries" - ) - - # Wait for last queries to finish if any, not longer than 300 seconds - call( - make_query_command( - """ - select sleepEachRow(( - select maxOrDefault(300 - elapsed) + 1 - from system.processes - where query not like '%from system.processes%' and elapsed < 300 - ) / 300) - from numbers(300) - format Null - """ - ), - shell=True, - stderr=STDOUT, - timeout=330, - ) - - # Even if all clickhouse-test processes are finished, there are probably some sh scripts, - # which still run some new queries. Let's ignore them. - try: - query = """clickhouse client -q "SELECT count() FROM system.processes where where elapsed > 300" """ - output = ( - check_output(query, shell=True, stderr=STDOUT, timeout=30) - .decode("utf-8") - .strip() - ) - if int(output) == 0: - return False - except: - pass - return True - - -if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") - parser = argparse.ArgumentParser( - description="ClickHouse script for running stresstest" - ) - parser.add_argument("--test-cmd", default="/usr/bin/clickhouse-test") - parser.add_argument("--skip-func-tests", default="") - parser.add_argument("--client-cmd", default="clickhouse-client") - parser.add_argument("--server-log-folder", default="/var/log/clickhouse-server") - parser.add_argument("--output-folder") - parser.add_argument("--global-time-limit", type=int, default=1800) - parser.add_argument("--num-parallel", type=int, default=cpu_count()) - parser.add_argument("--backward-compatibility-check", action="store_true") - parser.add_argument("--hung-check", action="store_true", default=False) - # make sense only for hung check - parser.add_argument("--drop-databases", action="store_true", default=False) - - args = parser.parse_args() - if args.drop_databases and not args.hung_check: - raise Exception("--drop-databases only used in hung check (--hung-check)") - func_pipes = [] - func_pipes = run_func_test( - args.test_cmd, - args.output_folder, - args.num_parallel, - args.skip_func_tests, - args.global_time_limit, - args.backward_compatibility_check, - ) - - logging.info("Will wait functests to finish") - while True: - retcodes = [] - for p in func_pipes: - if p.poll() is not None: - retcodes.append(p.returncode) - if len(retcodes) == len(func_pipes): - break - logging.info("Finished %s from %s processes", len(retcodes), len(func_pipes)) - time.sleep(5) - - logging.info("All processes finished") - - logging.info("Compressing stress logs") - compress_stress_logs(args.output_folder, "stress_test_run_") - logging.info("Logs compressed") - - if args.hung_check: - try: - have_long_running_queries = prepare_for_hung_check(args.drop_databases) - except Exception as ex: - have_long_running_queries = True - logging.error("Failed to prepare for hung check %s", str(ex)) - logging.info("Checking if some queries hung") - cmd = " ".join( - [ - args.test_cmd, - # Do not track memory allocations up to 1Gi, - # this will allow to ignore server memory limit (max_server_memory_usage) for this query. - # - # NOTE: memory_profiler_step should be also adjusted, because: - # - # untracked_memory_limit = min(settings.max_untracked_memory, settings.memory_profiler_step) - # - # NOTE: that if there will be queries with GROUP BY, this trick - # will not work due to CurrentMemoryTracker::check() from - # Aggregator code. - # But right now it should work, since neither hung check, nor 00001_select_1 has GROUP BY. - "--client-option", - "max_untracked_memory=1Gi", - "max_memory_usage_for_user=0", - "memory_profiler_step=1Gi", - # Use system database to avoid CREATE/DROP DATABASE queries - "--database=system", - "--hung-check", - "--stress", - "00001_select_1", - ] - ) - res = call(cmd, shell=True, stderr=STDOUT) - hung_check_status = "No queries hung\tOK\n" - if res != 0 and have_long_running_queries: - logging.info("Hung check failed with exit code {}".format(res)) - hung_check_status = "Hung check failed\tFAIL\n" - with open( - os.path.join(args.output_folder, "test_results.tsv"), "w+" - ) as results: - results.write(hung_check_status) - - logging.info("Stress test finished") diff --git a/docker/test/stress/Dockerfile b/docker/test/stress/Dockerfile index 2778b63774d..1cabea58a65 100644 --- a/docker/test/stress/Dockerfile +++ b/docker/test/stress/Dockerfile @@ -22,7 +22,8 @@ RUN apt-get update -y \ netcat-openbsd \ telnet \ llvm-9 \ - brotli + brotli \ + && apt-get clean COPY run.sh / diff --git a/docker/test/upgrade/Dockerfile b/docker/test/upgrade/Dockerfile index c98220b3403..a91088fb01e 100644 --- a/docker/test/upgrade/Dockerfile +++ b/docker/test/upgrade/Dockerfile @@ -22,7 +22,8 @@ RUN apt-get update -y \ netcat-openbsd \ telnet \ llvm-9 \ - brotli + brotli \ + && apt-get clean COPY run.sh / diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index 1a107b6df2a..c0a7f279588 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -30,6 +30,7 @@ function configure() /usr/share/clickhouse-test/config/install.sh # we mount tests folder from repo to /usr/share + ln -s /usr/share/clickhouse-test/ci/stress.py /usr/bin/stress ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test ln -s /usr/share/clickhouse-test/ci/download_release_packages.py /usr/bin/download_release_packages ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag From fd9b735505ce7bcd090e5a6edada8314c4ea05b1 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 20 Jan 2023 20:22:03 +0000 Subject: [PATCH 08/83] Merge with master, add some fixes --- docker/test/stress/run.sh | 2 +- docker/test/upgrade/run.sh | 62 +++++++++++++++++++++----------------- tests/ci/ci_config.py | 12 ++++++++ 3 files changed, 47 insertions(+), 29 deletions(-) diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 01da3e014e2..5fc0705412f 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -362,7 +362,7 @@ rg -Fa " received signal " /test_output/gdb.log > /dev/null \ for table in query_log trace_log do - clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.tsv.gz ||: + clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||: done tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||: diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index c0a7f279588..9f6ead84de9 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -53,9 +53,11 @@ function configure() echo "1" \ > /etc/clickhouse-server/config.d/asynchronous_metrics_update_period_s.xml + local total_mem total_mem=$(awk '/MemTotal/ { print $(NF-1) }' /proc/meminfo) # KiB total_mem=$(( total_mem*1024 )) # bytes + # Set maximum memory usage as half of total memory (less chance of OOM). # # But not via max_server_memory_usage but via max_memory_usage_for_user, @@ -68,21 +70,23 @@ function configure() # max_server_memory_usage will be hard limit, and queries that should be # executed regardless memory limits will use max_memory_usage_for_user=0, # instead of relying on max_untracked_memory - local max_server_mem - max_server_mem=$((total_mem*75/100)) # 75% - echo "Setting max_server_memory_usage=$max_server_mem" + + max_server_memory_usage_to_ram_ratio=0.5 + echo "Setting max_server_memory_usage_to_ram_ratio to ${max_server_memory_usage_to_ram_ratio}" cat > /etc/clickhouse-server/config.d/max_server_memory_usage.xml < - ${max_server_mem} + ${max_server_memory_usage_to_ram_ratio} EOL + local max_users_mem - max_users_mem=$((total_mem*50/100)) # 50% - echo "Setting max_memory_usage_for_user=$max_users_mem" + max_users_mem=$((total_mem*30/100)) # 30% + echo "Setting max_memory_usage_for_user=$max_users_mem and max_memory_usage for queries to 10G" cat > /etc/clickhouse-server/users.d/max_memory_usage_for_user.xml < + 10G ${max_users_mem} @@ -100,6 +104,13 @@ EOL --> $PWD +EOL + + # Let OOM killer terminate other processes before clickhouse-server: + cat > /etc/clickhouse-server/config.d/oom_score.xml < + -1000 + EOL # Analyzer is not yet ready for testing @@ -121,18 +132,12 @@ EOL function stop() { + local max_tries="${1:-90}" local pid # Preserve the pid, since the server can hung after the PID will be deleted. pid="$(cat /var/run/clickhouse-server/clickhouse-server.pid)" - clickhouse stop $max_tries --do-not-kill && return - - if [ -n "$1" ] - then - # temporarily disable it in BC check - clickhouse stop --force - return - fi + clickhouse stop --max-tries "$max_tries" --do-not-kill && return # We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces. kill -TERM "$(pidof gdb)" ||: @@ -322,7 +327,8 @@ else clickhouse stop --force ) - stop 1 + # Use bigger timeout for previous version + stop 300 mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log # Start new server @@ -334,7 +340,7 @@ else start 500 clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/test_results.tsv \ || (echo -e 'Server failed to start\tFAIL' >> /test_output/test_results.tsv \ - && grep -a ".*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/application_errors.txt) + && rg --text ".*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/application_errors.txt) # Remove file application_errors.txt if it's empty [ -s /test_output/application_errors.txt ] || rm /test_output/application_errors.txt @@ -357,7 +363,7 @@ else # ("This engine is deprecated and is not supported in transactions", "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part") # FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 - bad mutation does not indicate backward incompatibility echo "Check for Error messages in server log:" - zgrep -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \ + rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \ -e "Code: 236. DB::Exception: Cancelled mutating parts" \ -e "REPLICA_IS_ALREADY_ACTIVE" \ -e "REPLICA_ALREADY_EXISTS" \ @@ -401,21 +407,21 @@ else [ -s /test_output/upgrade_error_messages.txt ] || rm /test_output/upgrade_error_messages.txt # Sanitizer asserts - zgrep -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp - zgrep -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp - zgrep -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \ + rg -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp + rg -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp + rg -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \ && echo -e 'Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'No sanitizer asserts\tOK' >> /test_output/test_results.tsv rm -f /test_output/tmp # OOM - zgrep -Fa " Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.*.log > /dev/null \ + rg -Fa " Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.*.log > /dev/null \ && echo -e 'OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv # Logical errors echo "Check for Logical errors in server log:" - zgrep -Fa -A20 "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.*.log > /test_output/logical_errors.txt \ + rg -Fa -A20 "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.*.log > /test_output/logical_errors.txt \ && echo -e 'Logical error thrown (see server logs or logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'No logical errors\tOK' >> /test_output/test_results.tsv @@ -423,13 +429,13 @@ else [ -s /test_output/logical_errors.txt ] || rm /test_output/logical_errors.txt # Crash - zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.*.log > /dev/null \ + rg -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.*.log > /dev/null \ && echo -e 'Killed by signal (in server logs)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'Not crashed\tOK' >> /test_output/test_results.tsv # It also checks for crash without stacktrace (printed by watchdog) echo "Check for Fatal message in server log:" - zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.*.log > /test_output/fatal_messages.txt \ + rg -Fa " " /var/log/clickhouse-server/clickhouse-server.*.log > /test_output/fatal_messages.txt \ && echo -e 'Fatal message in server logs (see bc_check_fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'No fatal messages in server logs\tOK' >> /test_output/test_results.tsv @@ -439,7 +445,7 @@ else tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||: for table in query_log trace_log do - clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.backward.tsv.gz ||: + clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.backward.tsv.zst ||: done fi @@ -457,7 +463,7 @@ clickhouse-local --structure "test String, res String" -q "SELECT 'failure', tes [ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv # Core dumps -for core in core.*; do - pigz $core - mv $core.gz /test_output/ +find . -type f -maxdepth 1 -name 'core.*' | while read core; do + zstd --threads=0 $core + mv $core.zst /test_output/ done diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index c77acfb679f..11a9a24d8a4 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -263,6 +263,18 @@ CI_CONFIG = { "Stress test (debug)": { "required_build": "package_debug", }, + "Upgrade check (asan)": { + "required_build": "package_asan", + }, + "Upgrade check (tsan)": { + "required_build": "package_tsan", + }, + "Upgrade check (msan)": { + "required_build": "package_msan", + }, + "Upgrade check (debug)": { + "required_build": "package_debug", + }, "Integration tests (asan)": { "required_build": "package_asan", }, From 09951c70c4194b1e9265c2757a159a9e84068581 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 24 Jan 2023 16:45:07 +0000 Subject: [PATCH 09/83] Fix --- docker/test/upgrade/run.sh | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index 9f6ead84de9..503744e8baf 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -29,12 +29,6 @@ function configure() export EXPORT_S3_STORAGE_POLICIES=1 /usr/share/clickhouse-test/config/install.sh - # we mount tests folder from repo to /usr/share - ln -s /usr/share/clickhouse-test/ci/stress.py /usr/bin/stress - ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test - ln -s /usr/share/clickhouse-test/ci/download_release_packages.py /usr/bin/download_release_packages - ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag - # avoid too slow startup sudo cat /etc/clickhouse-server/config.d/keeper_port.xml | sed "s|100000|10000|" > /etc/clickhouse-server/config.d/keeper_port.xml.tmp sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml @@ -241,6 +235,12 @@ mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/cli sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml +# we mount tests folder from repo to /usr/share +ln -s /usr/share/clickhouse-test/ci/stress.py /usr/bin/stress +ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test +ln -s /usr/share/clickhouse-test/ci/download_release_packages.py /usr/bin/download_release_packages +ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag + echo "Get previous release tag" previous_release_tag=$(dpkg --info package_folder/clickhouse-client*.deb | grep "Version: " | awk '{print $2}' | get_previous_release_tag) echo $previous_release_tag @@ -294,8 +294,7 @@ else clickhouse-client --query="SELECT 'Server version: ', version()" - # Install new package before running stress test because we should use new - # clickhouse-client and new clickhouse-test. + # Install new package before running stress test because we should use new clickhouse-client. # # But we should leave old binary in /usr/bin/ and debug symbols in # /usr/lib/debug/usr/bin (if any) for gdb and internal DWARF parser, so it @@ -458,6 +457,14 @@ grep -q -F -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e mv /var/log/clickhouse-server/stderr.log /test_output/ +# If we failed to clone repo or download previous release packages, +# we don't have any packages installed, but we need clickhouse-local +# to be installed to create check_status.tsv. +if ! command -v clickhouse-local &> /dev/null +then + install_packages package_folder +fi + # Write check result into check_status.tsv clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%'), rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv [ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv From 61d836b79d32ac9aa1cd0970ef0c889a534bf113 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 25 Jan 2023 18:59:50 +0100 Subject: [PATCH 10/83] Update run.sh --- docker/test/upgrade/run.sh | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index 503744e8baf..95ed588e054 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -242,7 +242,7 @@ ln -s /usr/share/clickhouse-test/ci/download_release_packages.py /usr/bin/downlo ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag echo "Get previous release tag" -previous_release_tag=$(dpkg --info package_folder/clickhouse-client*.deb | grep "Version: " | awk '{print $2}' | get_previous_release_tag) +previous_release_tag=$(dpkg --info package_folder/clickhouse-client*.deb | grep "Version: " | awk '{print $2}' | cut -f1 -d'+' | get_previous_release_tag) echo $previous_release_tag echo "Clone previous release repository" @@ -293,24 +293,7 @@ else start clickhouse-client --query="SELECT 'Server version: ', version()" - - # Install new package before running stress test because we should use new clickhouse-client. - # - # But we should leave old binary in /usr/bin/ and debug symbols in - # /usr/lib/debug/usr/bin (if any) for gdb and internal DWARF parser, so it - # will print sane stacktraces and also to avoid possible crashes. - # - # FIXME: those files can be extracted directly from debian package, but - # actually better solution will be to use different PATH instead of playing - # games with files from packages. - mv /usr/bin/clickhouse previous_release_package_folder/ - mv /usr/lib/debug/usr/bin/clickhouse.debug previous_release_package_folder/ - install_packages package_folder - mv /usr/bin/clickhouse package_folder/ - mv /usr/lib/debug/usr/bin/clickhouse.debug package_folder/ - mv previous_release_package_folder/clickhouse /usr/bin/ - mv previous_release_package_folder/clickhouse.debug /usr/lib/debug/usr/bin/clickhouse.debug - + mkdir tmp_stress_output stress --test-cmd="/usr/bin/clickhouse-test --queries=\"previous_release_repository/tests/queries\"" --upgrade-check --output-folder tmp_stress_output --global-time-limit=1200 \ @@ -330,9 +313,8 @@ else stop 300 mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log - # Start new server - mv package_folder/clickhouse /usr/bin/ - mv package_folder/clickhouse.debug /usr/lib/debug/usr/bin/clickhouse.debug + # Install and start new server + install_packages package_folder # Disable fault injections on start (we don't test them here, and it can lead to tons of requests in case of huge number of tables). export ZOOKEEPER_FAULT_INJECTION=0 configure From dc0c1c33bb6364e0caafed2cc6938c8f7c1d1a13 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 26 Jan 2023 14:50:59 +0000 Subject: [PATCH 11/83] Update --- docker/test/upgrade/run.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index 95ed588e054..37357297d08 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -152,7 +152,7 @@ function start() echo -e "Cannot start clickhouse-server\tFAIL" >> /test_output/test_results.tsv cat /var/log/clickhouse-server/stdout.log tail -n1000 /var/log/clickhouse-server/stderr.log - tail -n100000 /var/log/clickhouse-server/clickhouse-server.log | grep -F -v -e ' RaftInstance:' -e ' RaftInstance' | tail -n1000 + tail -n100000 /var/log/clickhouse-server/clickhouse-server.log | rg -F -v -e ' RaftInstance:' -e ' RaftInstance' | tail -n1000 break fi # use root to match with current uid @@ -229,12 +229,6 @@ fi azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & ./setup_minio.sh stateless # to have a proper environment -# But we still need default disk because some tables loaded only into it -sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml | sed "s|
s3
|
s3
default|" > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp -mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml -sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml -sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml - # we mount tests folder from repo to /usr/share ln -s /usr/share/clickhouse-test/ci/stress.py /usr/bin/stress ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test @@ -277,6 +271,12 @@ else export ZOOKEEPER_FAULT_INJECTION=0 configure + # But we still need default disk because some tables loaded only into it + sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml | sed "s|
s3
|
s3
default|" > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp + mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml + sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml + sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml + # Avoid "Setting s3_check_objects_after_upload is neither a builtin setting..." rm -f /etc/clickhouse-server/users.d/enable_blobs_check.xml ||: rm -f /etc/clickhouse-server/users.d/marks.xml ||: From 81875fe5591ee37f45b6681e1ff46714d96304ec Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 27 Jan 2023 14:45:10 +0100 Subject: [PATCH 12/83] Update run.sh --- docker/test/upgrade/run.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index 37357297d08..89731940dd2 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -267,6 +267,8 @@ else install_packages previous_release_package_folder # Start server from previous release + # Let's enable S3 storage by default + export USE_S3_STORAGE_FOR_MERGE_TREE=1 # Previous version may not be ready for fault injections export ZOOKEEPER_FAULT_INJECTION=0 configure From 82521bb9734d4ecfbfdf5e4842826ea9228f01a0 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 9 Feb 2023 13:33:49 +0000 Subject: [PATCH 13/83] Automatic style fix --- tests/ci/stress.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/stress.py b/tests/ci/stress.py index e21d34da5c0..2c77731a7b6 100755 --- a/tests/ci/stress.py +++ b/tests/ci/stress.py @@ -298,7 +298,7 @@ if __name__ == "__main__": ] ) hung_check_log = os.path.join(args.output_folder, "hung_check.log") - tee = Popen(['/usr/bin/tee', hung_check_log], stdin=PIPE) + tee = Popen(["/usr/bin/tee", hung_check_log], stdin=PIPE) res = call(cmd, shell=True, stdout=tee.stdin, stderr=STDOUT) tee.stdin.close() if res != 0 and have_long_running_queries: From 5c62afd15e8054a66162b4be359248c867709f97 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 9 Feb 2023 15:01:32 +0100 Subject: [PATCH 14/83] Fix style --- tests/ci/stress.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/stress.py b/tests/ci/stress.py index 2c77731a7b6..e62a37fb165 100755 --- a/tests/ci/stress.py +++ b/tests/ci/stress.py @@ -302,7 +302,7 @@ if __name__ == "__main__": res = call(cmd, shell=True, stdout=tee.stdin, stderr=STDOUT) tee.stdin.close() if res != 0 and have_long_running_queries: - logging.info("Hung check failed with exit code {}".format(res)) + logging.info("Hung check failed with exit code %d", res) else: hung_check_status = "No queries hung\tOK\t\\N\t\n" with open( From 93dfbe6617b3b5d47ade4c2127026c1e5b2108c2 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 9 Feb 2023 15:05:06 +0100 Subject: [PATCH 15/83] Fix mypy style --- tests/ci/stress.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/ci/stress.py b/tests/ci/stress.py index e62a37fb165..979806a87fd 100755 --- a/tests/ci/stress.py +++ b/tests/ci/stress.py @@ -300,7 +300,8 @@ if __name__ == "__main__": hung_check_log = os.path.join(args.output_folder, "hung_check.log") tee = Popen(["/usr/bin/tee", hung_check_log], stdin=PIPE) res = call(cmd, shell=True, stdout=tee.stdin, stderr=STDOUT) - tee.stdin.close() + if tee.stdin is not None: + tee.stdin.close() if res != 0 and have_long_running_queries: logging.info("Hung check failed with exit code %d", res) else: From 4067453602b6a2c93550ad11d4e1ddefa887b622 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 10 Feb 2023 11:57:02 +0100 Subject: [PATCH 16/83] Update run.sh --- docker/test/upgrade/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index d5b29485afe..bbf24e96685 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -467,7 +467,7 @@ else for table in query_log trace_log do clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" \ - | zstd --threads=0 > /test_output/$table.backward.tsv.zst ||: + | zstd --threads=0 > /test_output/$table.tsv.zst ||: done fi From 43e660ebd2cdb18fadda2a1385c014135654c4d9 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 10 Feb 2023 18:28:19 +0100 Subject: [PATCH 17/83] Update run.sh --- docker/test/upgrade/run.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index bbf24e96685..e2392dd4438 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -416,6 +416,7 @@ else -e "Session expired" \ -e "TOO_MANY_PARTS" \ -e "Authentication failed" \ + -e "Container already exists" \ /var/log/clickhouse-server/clickhouse-server.upgrade.log | zgrep -Fa "" > /test_output/upgrade_error_messages.txt \ && echo -e "Error message in clickhouse-server.log (see upgrade_error_messages.txt)$FAIL$(head_escaped /test_output/bc_check_error_messages.txt)" \ >> /test_output/test_results.tsv \ From 42ae0582d393c7795187ddf786ef07bcf8ac5fc1 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 20 Feb 2023 17:46:54 +0000 Subject: [PATCH 18/83] Try to use parquet v2 instead of v1 in output format --- src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp index 0fce98f8a11..375d3878b10 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp @@ -44,6 +44,7 @@ void ParquetBlockOutputFormat::consume(Chunk chunk) auto sink = std::make_shared(out); parquet::WriterProperties::Builder builder; + builder.version(parquet::ParquetVersion::PARQUET_2_LATEST); #if USE_SNAPPY builder.compression(parquet::Compression::SNAPPY); #endif From ce2b04453245780406994bf9c0e25821878a7279 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Tue, 21 Feb 2023 03:29:14 +0000 Subject: [PATCH 19/83] analyzer - apply limit and offset settings --- src/Analyzer/QueryTreeBuilder.cpp | 11 ++++ src/Planner/Planner.cpp | 17 +++--- src/Storages/StorageView.cpp | 2 + .../02667_analyzer_limit_settings.reference | 54 +++++++++++++++++++ .../02667_analyzer_limit_settings.sql | 24 +++++++++ 5 files changed, 102 insertions(+), 6 deletions(-) create mode 100644 tests/queries/0_stateless/02667_analyzer_limit_settings.reference create mode 100644 tests/queries/0_stateless/02667_analyzer_limit_settings.sql diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp index 05b643aa6af..57b1ae1c994 100644 --- a/src/Analyzer/QueryTreeBuilder.cpp +++ b/src/Analyzer/QueryTreeBuilder.cpp @@ -233,6 +233,17 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q auto select_settings = select_query_typed.settings(); SettingsChanges settings_changes; + if (is_subquery) + { + if (const Settings & settings_ref = updated_context->getSettingsRef(); settings_ref.limit || settings_ref.offset) + { + Settings settings = updated_context->getSettings(); + settings.limit = 0; + settings.offset = 0; + updated_context->setSettings(settings); + } + } + if (select_settings) { auto & set_query = select_settings->as(); diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index f0fe44e368f..7c2a3ffed78 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -216,12 +216,17 @@ public: limit_length = query_node.getLimit()->as().getValue().safeGet(); } + if (settings.limit) + limit_length = limit_length ? std::min(limit_length, settings.limit.value) : settings.limit; + if (query_node.hasOffset()) { /// Constness of offset is validated during query analysis stage limit_offset = query_node.getOffset()->as().getValue().safeGet(); } + limit_offset += settings.offset; + /// Partial sort can be done if there is LIMIT, but no DISTINCT, LIMIT WITH TIES, LIMIT BY, ARRAY JOIN if (limit_length != 0 && !query_node.isDistinct() && @@ -720,7 +725,7 @@ bool addPreliminaryLimitOptimizationStepIfNeeded(QueryPlan & query_plan, bool apply_limit = query_processing_info.getToStage() != QueryProcessingStage::WithMergeableStateAfterAggregation; bool apply_prelimit = apply_limit && - query_node.hasLimit() && + query_analysis_result.limit_length && !query_node.isLimitWithTies() && !query_node.isGroupByWithTotals() && !query_analysis_result.query_has_with_totals_in_any_subquery_in_join_tree && @@ -767,7 +772,7 @@ void addPreliminarySortOrDistinctOrLimitStepsIfNeeded(QueryPlan & query_plan, * Otherwise we can take several equal values from different streams * according to limit and skip some distinct values. */ - if (query_node.hasLimit() && query_node.isDistinct()) + if (query_analysis_result.limit_length && query_node.isDistinct()) { addDistinctStep(query_plan, query_analysis_result, @@ -785,7 +790,7 @@ void addPreliminarySortOrDistinctOrLimitStepsIfNeeded(QueryPlan & query_plan, addLimitByStep(query_plan, limit_by_analysis_result, query_node); } - if (query_node.hasLimit()) + if (query_analysis_result.limit_length) addPreliminaryLimitStep(query_plan, query_analysis_result, planner_context, true /*do_not_skip_offset*/); } @@ -1420,7 +1425,7 @@ void Planner::buildPlanForQueryNode() bool apply_offset = query_processing_info.getToStage() != QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit; - if (query_node.hasLimit() && query_node.isLimitWithTies() && apply_offset) + if (query_analysis_result.limit_length && query_node.isLimitWithTies() && apply_offset) addLimitStep(query_plan, query_analysis_result, planner_context, query_node); addExtremesStepIfNeeded(query_plan, planner_context); @@ -1434,9 +1439,9 @@ void Planner::buildPlanForQueryNode() * This is the case for various optimizations for distributed queries, * and when LIMIT cannot be applied it will be applied on the initiator anyway. */ - if (query_node.hasLimit() && apply_limit && !limit_applied && apply_offset) + if (query_analysis_result.limit_length && apply_limit && !limit_applied && apply_offset) addLimitStep(query_plan, query_analysis_result, planner_context, query_node); - else if (!limit_applied && apply_offset && query_node.hasOffset()) + else if (!limit_applied && apply_offset && query_analysis_result.limit_length) addOffsetStep(query_plan, query_analysis_result); /// Project names is not done on shards, because initiator will not find columns in blocks diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 1a7050b4dff..d3a2ec470cf 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -95,6 +95,8 @@ ContextPtr getViewContext(ContextPtr context) view_settings.max_result_rows = 0; view_settings.max_result_bytes = 0; view_settings.extremes = false; + view_settings.limit = 0; + view_settings.offset = 0; view_context->setSettings(view_settings); return view_context; } diff --git a/tests/queries/0_stateless/02667_analyzer_limit_settings.reference b/tests/queries/0_stateless/02667_analyzer_limit_settings.reference new file mode 100644 index 00000000000..9e38ed9a59b --- /dev/null +++ b/tests/queries/0_stateless/02667_analyzer_limit_settings.reference @@ -0,0 +1,54 @@ +-- { echoOn } +SET limit = 0; +SELECT * FROM numbers(10); +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +SELECT * FROM numbers(10) SETTINGS limit=5, offset=2; +2 +3 +4 +5 +6 +SELECT count(*) FROM (SELECT * FROM numbers(10)); +10 +SELECT count(*) FROM (SELECT * FROM numbers(10) SETTINGS limit=5); +5 +SELECT count(*) FROM (SELECT * FROM numbers(10)) SETTINGS limit=5; +10 +SELECT count(*) FROM view(SELECT * FROM numbers(10)); +10 +SELECT count(*) FROM view(SELECT * FROM numbers(10) SETTINGS limit=5); +5 +SELECT count(*) FROM view(SELECT * FROM numbers(10)) SETTINGS limit=5; +10 +SET limit = 3; +SELECT * FROM numbers(10); +0 +1 +2 +SELECT * FROM numbers(10) SETTINGS limit=5, offset=2; +2 +3 +4 +5 +6 +SELECT count(*) FROM (SELECT * FROM numbers(10)); +10 +SELECT count(*) FROM (SELECT * FROM numbers(10) SETTINGS limit=5); +5 +SELECT count(*) FROM (SELECT * FROM numbers(10)) SETTINGS limit=5; +10 +SELECT count(*) FROM view(SELECT * FROM numbers(10)); +10 +SELECT count(*) FROM view(SELECT * FROM numbers(10) SETTINGS limit=5); +5 +SELECT count(*) FROM view(SELECT * FROM numbers(10)) SETTINGS limit=5; +10 diff --git a/tests/queries/0_stateless/02667_analyzer_limit_settings.sql b/tests/queries/0_stateless/02667_analyzer_limit_settings.sql new file mode 100644 index 00000000000..35dd65ab33e --- /dev/null +++ b/tests/queries/0_stateless/02667_analyzer_limit_settings.sql @@ -0,0 +1,24 @@ +SET allow_experimental_analyzer = 1; + +-- { echoOn } +SET limit = 0; + +SELECT * FROM numbers(10); +SELECT * FROM numbers(10) SETTINGS limit=5, offset=2; +SELECT count(*) FROM (SELECT * FROM numbers(10)); +SELECT count(*) FROM (SELECT * FROM numbers(10) SETTINGS limit=5); +SELECT count(*) FROM (SELECT * FROM numbers(10)) SETTINGS limit=5; +SELECT count(*) FROM view(SELECT * FROM numbers(10)); +SELECT count(*) FROM view(SELECT * FROM numbers(10) SETTINGS limit=5); +SELECT count(*) FROM view(SELECT * FROM numbers(10)) SETTINGS limit=5; + +SET limit = 3; +SELECT * FROM numbers(10); +SELECT * FROM numbers(10) SETTINGS limit=5, offset=2; +SELECT count(*) FROM (SELECT * FROM numbers(10)); +SELECT count(*) FROM (SELECT * FROM numbers(10) SETTINGS limit=5); +SELECT count(*) FROM (SELECT * FROM numbers(10)) SETTINGS limit=5; +SELECT count(*) FROM view(SELECT * FROM numbers(10)); +SELECT count(*) FROM view(SELECT * FROM numbers(10) SETTINGS limit=5); +SELECT count(*) FROM view(SELECT * FROM numbers(10)) SETTINGS limit=5; +-- { echoOff } From cf493d1dfb83e66af712986f6346d8812da7420f Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Tue, 21 Feb 2023 22:36:02 +0000 Subject: [PATCH 20/83] replace settings limit and offset with corresponding expression nodes --- src/Analyzer/QueryTreeBuilder.cpp | 65 +++++++++++++++---- src/Common/SettingsChanges.cpp | 25 +++++++ src/Common/SettingsChanges.h | 7 ++ src/Planner/Planner.cpp | 5 -- .../02667_analyzer_limit_settings.reference | 16 +++++ .../02667_analyzer_limit_settings.sql | 6 ++ 6 files changed, 107 insertions(+), 17 deletions(-) diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp index 57b1ae1c994..e9e084615ca 100644 --- a/src/Analyzer/QueryTreeBuilder.cpp +++ b/src/Analyzer/QueryTreeBuilder.cpp @@ -233,22 +233,43 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q auto select_settings = select_query_typed.settings(); SettingsChanges settings_changes; - if (is_subquery) + /// We are going to remove settings LIMIT and OFFSET and + /// further replace them with corresponding expression nodes + UInt64 limit = 0; + UInt64 offset = 0; + + /// remove global settings limit and offset + if (const Settings & settings_ref = updated_context->getSettingsRef(); settings_ref.limit || settings_ref.offset) { - if (const Settings & settings_ref = updated_context->getSettingsRef(); settings_ref.limit || settings_ref.offset) - { - Settings settings = updated_context->getSettings(); - settings.limit = 0; - settings.offset = 0; - updated_context->setSettings(settings); - } + Settings settings = updated_context->getSettings(); + limit = settings.limit; + offset = settings.offset; + settings.limit = 0; + settings.offset = 0; + updated_context->setSettings(settings); } if (select_settings) { auto & set_query = select_settings->as(); - updated_context->applySettingsChanges(set_query.changes); - settings_changes = set_query.changes; + + /// remove expression settings limit and offset + if (auto * limit_field = set_query.changes.tryGet("limit")) + { + limit = limit_field->safeGet(); + set_query.changes.removeSetting("limit"); + } + if (auto * offset_field = set_query.changes.tryGet("offset")) + { + offset = offset_field->safeGet(); + set_query.changes.removeSetting("offset"); + } + + if (!set_query.changes.empty()) + { + updated_context->applySettingsChanges(set_query.changes); + settings_changes = set_query.changes; + } } auto current_query_tree = std::make_shared(std::move(updated_context), std::move(settings_changes)); @@ -334,12 +355,32 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q if (select_limit_by) current_query_tree->getLimitByNode() = buildExpressionList(select_limit_by, current_context); + /// combine limit expression with limit setting auto select_limit = select_query_typed.limitLength(); - if (select_limit) + if (select_limit && limit) + { + auto function_node = std::make_shared("least"); + function_node->getArguments().getNodes().push_back(buildExpression(select_limit, current_context)); + function_node->getArguments().getNodes().push_back(std::make_shared(limit)); + current_query_tree->getLimit() = function_node; + } + else if (limit) + current_query_tree->getLimit() = std::make_shared(limit); + else if (select_limit) current_query_tree->getLimit() = buildExpression(select_limit, current_context); + /// combine offset expression with offset setting auto select_offset = select_query_typed.limitOffset(); - if (select_offset) + if (select_offset && offset) + { + auto function_node = std::make_shared("plus"); + function_node->getArguments().getNodes().push_back(buildExpression(select_offset, current_context)); + function_node->getArguments().getNodes().push_back(std::make_shared(offset)); + current_query_tree->getOffset() = function_node; + } + else if (offset) + current_query_tree->getOffset() = std::make_shared(offset); + else if (select_offset) current_query_tree->getOffset() = buildExpression(select_offset, current_context); return current_query_tree; diff --git a/src/Common/SettingsChanges.cpp b/src/Common/SettingsChanges.cpp index 9fb4f361e09..45490f86abc 100644 --- a/src/Common/SettingsChanges.cpp +++ b/src/Common/SettingsChanges.cpp @@ -46,4 +46,29 @@ Field * SettingsChanges::tryGet(std::string_view name) return &change->value; } +bool SettingsChanges::insertSetting(std::string_view name, const Field & value) +{ + if (std::find_if(begin(), end(), [&name](const SettingChange & change) { return change.name == name; }) != end()) + return false; + emplace_back(name, value); + return true; +} + +void SettingsChanges::setSetting(std::string_view name, const Field & value) +{ + if (auto * v = tryGet(name)) + *v = value; + else + insertSetting(name, value); +} + +bool SettingsChanges::removeSetting(std::string_view name) +{ + auto it = std::find_if(begin(), end(), [&name](const SettingChange & change) { return change.name == name; }); + if (it == end()) + return false; + erase(it); + return true; +} + } diff --git a/src/Common/SettingsChanges.h b/src/Common/SettingsChanges.h index 776dacb93e8..d16934dbc76 100644 --- a/src/Common/SettingsChanges.h +++ b/src/Common/SettingsChanges.h @@ -28,6 +28,13 @@ public: bool tryGet(std::string_view name, Field & out_value) const; const Field * tryGet(std::string_view name) const; Field * tryGet(std::string_view name); + + /// inserts element if doesn't exists and returns true, else just returns false + bool insertSetting(std::string_view name, const Field & value); + /// sets element to value, inserts if doesn't exist + void setSetting(std::string_view name, const Field & value); + /// if element exists - removes it and returns true, else returns false + bool removeSetting(std::string_view name); }; } diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 7c2a3ffed78..307e8d73b29 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -216,17 +216,12 @@ public: limit_length = query_node.getLimit()->as().getValue().safeGet(); } - if (settings.limit) - limit_length = limit_length ? std::min(limit_length, settings.limit.value) : settings.limit; - if (query_node.hasOffset()) { /// Constness of offset is validated during query analysis stage limit_offset = query_node.getOffset()->as().getValue().safeGet(); } - limit_offset += settings.offset; - /// Partial sort can be done if there is LIMIT, but no DISTINCT, LIMIT WITH TIES, LIMIT BY, ARRAY JOIN if (limit_length != 0 && !query_node.isDistinct() && diff --git a/tests/queries/0_stateless/02667_analyzer_limit_settings.reference b/tests/queries/0_stateless/02667_analyzer_limit_settings.reference index 9e38ed9a59b..6f23097612e 100644 --- a/tests/queries/0_stateless/02667_analyzer_limit_settings.reference +++ b/tests/queries/0_stateless/02667_analyzer_limit_settings.reference @@ -52,3 +52,19 @@ SELECT count(*) FROM view(SELECT * FROM numbers(10) SETTINGS limit=5); 5 SELECT count(*) FROM view(SELECT * FROM numbers(10)) SETTINGS limit=5; 10 +SET limit = 4; +SET offset = 1; +SELECT * FROM numbers(10); +1 +2 +3 +4 +SELECT * FROM numbers(10) LIMIT 3 OFFSET 2; +3 +4 +5 +SELECT * FROM numbers(10) LIMIT 5 OFFSET 2; +3 +4 +5 +6 diff --git a/tests/queries/0_stateless/02667_analyzer_limit_settings.sql b/tests/queries/0_stateless/02667_analyzer_limit_settings.sql index 35dd65ab33e..7c02c2d0d20 100644 --- a/tests/queries/0_stateless/02667_analyzer_limit_settings.sql +++ b/tests/queries/0_stateless/02667_analyzer_limit_settings.sql @@ -21,4 +21,10 @@ SELECT count(*) FROM (SELECT * FROM numbers(10)) SETTINGS limit=5; SELECT count(*) FROM view(SELECT * FROM numbers(10)); SELECT count(*) FROM view(SELECT * FROM numbers(10) SETTINGS limit=5); SELECT count(*) FROM view(SELECT * FROM numbers(10)) SETTINGS limit=5; + +SET limit = 4; +SET offset = 1; +SELECT * FROM numbers(10); +SELECT * FROM numbers(10) LIMIT 3 OFFSET 2; +SELECT * FROM numbers(10) LIMIT 5 OFFSET 2; -- { echoOff } From ea244e539032bc0b7f0c1f1f9c081cd42b970bd4 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 22 Feb 2023 13:28:45 +0000 Subject: [PATCH 21/83] revert getViewContext --- src/Storages/StorageView.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index d3a2ec470cf..1a7050b4dff 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -95,8 +95,6 @@ ContextPtr getViewContext(ContextPtr context) view_settings.max_result_rows = 0; view_settings.max_result_bytes = 0; view_settings.extremes = false; - view_settings.limit = 0; - view_settings.offset = 0; view_context->setSettings(view_settings); return view_context; } From 620071bb42df86069aaa12aeca2c432f01fedfc3 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 22 Feb 2023 13:33:40 +0000 Subject: [PATCH 22/83] fix --- src/Planner/Planner.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 307e8d73b29..c64dfd20f62 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -1436,7 +1436,7 @@ void Planner::buildPlanForQueryNode() */ if (query_analysis_result.limit_length && apply_limit && !limit_applied && apply_offset) addLimitStep(query_plan, query_analysis_result, planner_context, query_node); - else if (!limit_applied && apply_offset && query_analysis_result.limit_length) + else if (!limit_applied && apply_offset && query_analysis_result.limit_offset) addOffsetStep(query_plan, query_analysis_result); /// Project names is not done on shards, because initiator will not find columns in blocks From e0931dbdbe80ff677a150a4a3d812ca73e1ee4fe Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 22 Feb 2023 16:40:53 +0000 Subject: [PATCH 23/83] Enable input_format_json_ignore_unknown_keys_in_named_tuple by default --- src/Core/Settings.h | 2 +- src/Core/SettingsChangesHistory.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index c73c56ca5d7..b5e3c3f1e88 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -809,7 +809,7 @@ class IColumn; M(Bool, input_format_json_read_numbers_as_strings, false, "Allow to parse numbers as strings in JSON input formats", 0) \ M(Bool, input_format_json_read_objects_as_strings, true, "Allow to parse JSON objects as strings in JSON input formats", 0) \ M(Bool, input_format_json_named_tuples_as_objects, true, "Deserialize named tuple columns as JSON objects", 0) \ - M(Bool, input_format_json_ignore_unknown_keys_in_named_tuple, false, "Ignore unknown keys in json object for named tuples", 0) \ + M(Bool, input_format_json_ignore_unknown_keys_in_named_tuple, true, "Ignore unknown keys in json object for named tuples", 0) \ M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \ M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 48dcded1868..2d346467338 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -84,7 +84,8 @@ static std::map sett {"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"}, {"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"}, {"optimize_duplicate_order_by_and_distinct", true, false, "Remove duplicate ORDER BY and DISTINCT if it's possible"}, - {"insert_keeper_max_retries", 0, 20, "Enable reconnections to Keeper on INSERT, improve reliability"}}}, + {"insert_keeper_max_retries", 0, 20, "Enable reconnections to Keeper on INSERT, improve reliability"}, + {"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}}}, {"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"}, {"input_format_json_defaults_for_missing_elements_in_named_tuple", false, true, "Allow missing elements in JSON objects while reading named tuples by default"}, {"input_format_csv_detect_header", false, true, "Detect header in CSV format by default"}, From 50caa3d66c935dfa4cfe97ca018e20bf6a22f4c7 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 22 Feb 2023 16:41:49 +0000 Subject: [PATCH 24/83] Update docs --- docs/en/operations/settings/settings-formats.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 3f81dc528f5..80308c2ad1d 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -504,7 +504,7 @@ Enabled by default. Ignore unknown keys in json object for named tuples. -Disabled by default. +Enabled by default. ## input_format_json_defaults_for_missing_elements_in_named_tuple {#input_format_json_defaults_for_missing_elements_in_named_tuple} From 99095446afa6428f3cd62af5c63e2d3987927be9 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 22 Feb 2023 17:22:13 +0000 Subject: [PATCH 25/83] review suggestions --- src/Analyzer/QueryTreeBuilder.cpp | 14 ++++---- src/Common/SettingsChanges.cpp | 7 ++-- src/Common/SettingsChanges.h | 6 ++-- ...lyzer_limit_settings_distributed.reference | 14 ++++++++ ...67_analyzer_limit_settings_distributed.sql | 34 +++++++++++++++++++ 5 files changed, 62 insertions(+), 13 deletions(-) create mode 100644 tests/queries/0_stateless/02667_analyzer_limit_settings_distributed.reference create mode 100644 tests/queries/0_stateless/02667_analyzer_limit_settings_distributed.sql diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp index e9e084615ca..c7b9f9aae08 100644 --- a/src/Analyzer/QueryTreeBuilder.cpp +++ b/src/Analyzer/QueryTreeBuilder.cpp @@ -238,8 +238,8 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q UInt64 limit = 0; UInt64 offset = 0; - /// remove global settings limit and offset - if (const Settings & settings_ref = updated_context->getSettingsRef(); settings_ref.limit || settings_ref.offset) + /// Remove global settings limit and offset + if (const auto & settings_ref = updated_context->getSettingsRef(); settings_ref.limit || settings_ref.offset) { Settings settings = updated_context->getSettings(); limit = settings.limit; @@ -253,7 +253,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q { auto & set_query = select_settings->as(); - /// remove expression settings limit and offset + /// Remove expression settings limit and offset if (auto * limit_field = set_query.changes.tryGet("limit")) { limit = limit_field->safeGet(); @@ -355,28 +355,28 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q if (select_limit_by) current_query_tree->getLimitByNode() = buildExpressionList(select_limit_by, current_context); - /// combine limit expression with limit setting + /// Combine limit expression with limit setting auto select_limit = select_query_typed.limitLength(); if (select_limit && limit) { auto function_node = std::make_shared("least"); function_node->getArguments().getNodes().push_back(buildExpression(select_limit, current_context)); function_node->getArguments().getNodes().push_back(std::make_shared(limit)); - current_query_tree->getLimit() = function_node; + current_query_tree->getLimit() = std::move(function_node); } else if (limit) current_query_tree->getLimit() = std::make_shared(limit); else if (select_limit) current_query_tree->getLimit() = buildExpression(select_limit, current_context); - /// combine offset expression with offset setting + /// Combine offset expression with offset setting auto select_offset = select_query_typed.limitOffset(); if (select_offset && offset) { auto function_node = std::make_shared("plus"); function_node->getArguments().getNodes().push_back(buildExpression(select_offset, current_context)); function_node->getArguments().getNodes().push_back(std::make_shared(offset)); - current_query_tree->getOffset() = function_node; + current_query_tree->getOffset() = std::move(function_node); } else if (offset) current_query_tree->getOffset() = std::make_shared(offset); diff --git a/src/Common/SettingsChanges.cpp b/src/Common/SettingsChanges.cpp index 45490f86abc..7790c272606 100644 --- a/src/Common/SettingsChanges.cpp +++ b/src/Common/SettingsChanges.cpp @@ -48,7 +48,8 @@ Field * SettingsChanges::tryGet(std::string_view name) bool SettingsChanges::insertSetting(std::string_view name, const Field & value) { - if (std::find_if(begin(), end(), [&name](const SettingChange & change) { return change.name == name; }) != end()) + auto it = std::find_if(begin(), end(), [&name](const SettingChange & change) { return change.name == name; }); + if (it != end()) return false; emplace_back(name, value); return true; @@ -56,8 +57,8 @@ bool SettingsChanges::insertSetting(std::string_view name, const Field & value) void SettingsChanges::setSetting(std::string_view name, const Field & value) { - if (auto * v = tryGet(name)) - *v = value; + if (auto * setting_value = tryGet(name)) + *setting_value = value; else insertSetting(name, value); } diff --git a/src/Common/SettingsChanges.h b/src/Common/SettingsChanges.h index d16934dbc76..61f5a1c7eba 100644 --- a/src/Common/SettingsChanges.h +++ b/src/Common/SettingsChanges.h @@ -29,11 +29,11 @@ public: const Field * tryGet(std::string_view name) const; Field * tryGet(std::string_view name); - /// inserts element if doesn't exists and returns true, else just returns false + /// Inserts element if doesn't exists and returns true, otherwise just returns false bool insertSetting(std::string_view name, const Field & value); - /// sets element to value, inserts if doesn't exist + /// Sets element to value, inserts if doesn't exist void setSetting(std::string_view name, const Field & value); - /// if element exists - removes it and returns true, else returns false + /// If element exists - removes it and returns true, otherwise returns false bool removeSetting(std::string_view name); }; diff --git a/tests/queries/0_stateless/02667_analyzer_limit_settings_distributed.reference b/tests/queries/0_stateless/02667_analyzer_limit_settings_distributed.reference new file mode 100644 index 00000000000..466e80931e5 --- /dev/null +++ b/tests/queries/0_stateless/02667_analyzer_limit_settings_distributed.reference @@ -0,0 +1,14 @@ +limit 0 +limit 1 +limit 2 +limit 3 +limit 4 +offset 5 +offset 6 +offset 7 +offset 8 +offset 9 +limit w/ GROUP BY 4 4 +limit w/ GROUP BY 4 3 +limit/offset w/ GROUP BY 4 2 +limit/offset w/ GROUP BY 4 1 diff --git a/tests/queries/0_stateless/02667_analyzer_limit_settings_distributed.sql b/tests/queries/0_stateless/02667_analyzer_limit_settings_distributed.sql new file mode 100644 index 00000000000..1624344b5a9 --- /dev/null +++ b/tests/queries/0_stateless/02667_analyzer_limit_settings_distributed.sql @@ -0,0 +1,34 @@ +-- Tags: distributed + +SET allow_experimental_analyzer = 1; + +SELECT 'limit', * FROM remote('127.1', view(SELECT * FROM numbers(10))) SETTINGS limit=5; +SELECT 'offset', * FROM remote('127.1', view(SELECT * FROM numbers(10))) SETTINGS offset=5; + +SELECT + 'limit w/ GROUP BY', + count(), + number +FROM remote('127.{1,2}', view( + SELECT intDiv(number, 2) AS number + FROM numbers(10) +)) +GROUP BY number +ORDER BY + count() ASC, + number DESC +SETTINGS limit=2; + +SELECT + 'limit/offset w/ GROUP BY', + count(), + number +FROM remote('127.{1,2}', view( + SELECT intDiv(number, 2) AS number + FROM numbers(10) +)) +GROUP BY number +ORDER BY + count() ASC, + number DESC +SETTINGS limit=2, offset=2; From 418c83c1aa041a4dff9b83623014dfd4af3013f4 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 23 Feb 2023 13:40:09 +0000 Subject: [PATCH 26/83] Fix tests --- .../0_stateless/02532_json_missing_named_tuple_elements.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02532_json_missing_named_tuple_elements.sh b/tests/queries/0_stateless/02532_json_missing_named_tuple_elements.sh index 97847b08203..2c12248cafd 100755 --- a/tests/queries/0_stateless/02532_json_missing_named_tuple_elements.sh +++ b/tests/queries/0_stateless/02532_json_missing_named_tuple_elements.sh @@ -13,7 +13,7 @@ echo '{"t" : {}}' | $CLICKHOUSE_LOCAL --input-format=NDJSON --structure='t Tuple echo '{"t" : {"a" : 1, "b" : 2}}' | $CLICKHOUSE_LOCAL --input-format=NDJSON --structure='t Tuple(a Nullable(UInt32), b Nullable(UInt32), c Nullable(UInt32))' -q "select * from table" --input_format_json_defaults_for_missing_elements_in_named_tuple=0 2>&1 | grep -F "INCORRECT_DATA" -c -echo '{"t" : {"a" : 1, "d" : 2}}' | $CLICKHOUSE_LOCAL --input-format=NDJSON --structure='t Tuple(a Nullable(UInt32), b Nullable(UInt32), c Nullable(UInt32))' -q "select * from table" 2>&1 | grep -F "NOT_FOUND_COLUMN_IN_BLOCK" -c +echo '{"t" : {"a" : 1, "d" : 2}}' | $CLICKHOUSE_LOCAL --input_format_json_ignore_unknown_keys_in_named_tuple=0 --input-format=NDJSON --structure='t Tuple(a Nullable(UInt32), b Nullable(UInt32), c Nullable(UInt32))' -q "select * from table" 2>&1 | grep -F "NOT_FOUND_COLUMN_IN_BLOCK" -c -echo '{"t" : {"a" : 1, "b" : 2, "c" : 3, "d" : 4}}' | $CLICKHOUSE_LOCAL --input-format=NDJSON --structure='t Tuple(a Nullable(UInt32), b Nullable(UInt32), c Nullable(UInt32))' -q "select * from table" 2>&1 | grep -F "INCORRECT_DATA" -c +echo '{"t" : {"a" : 1, "b" : 2, "c" : 3, "d" : 4}}' | $CLICKHOUSE_LOCAL --input_format_json_ignore_unknown_keys_in_named_tuple=0 --input-format=NDJSON --structure='t Tuple(a Nullable(UInt32), b Nullable(UInt32), c Nullable(UInt32))' -q "select * from table" 2>&1 | grep -F "INCORRECT_DATA" -c From 54622566df3447ea970569ef6b26a281af22d53f Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 23 Feb 2023 16:14:10 +0000 Subject: [PATCH 27/83] Add setting to change parquet version --- docs/en/interfaces/formats.md | 3 ++- docs/en/operations/settings/settings-formats.md | 6 ++++++ src/Core/Settings.h | 1 + src/Core/SettingsChangesHistory.h | 1 + src/Core/SettingsEnums.cpp | 8 ++++++++ src/Core/SettingsEnums.h | 2 ++ src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatSettings.h | 15 ++++++++++++--- .../Formats/Impl/ParquetBlockOutputFormat.cpp | 17 ++++++++++++++++- ...hema_inference_formats_with_schema.reference | 2 +- 10 files changed, 50 insertions(+), 6 deletions(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index e94c6377ae9..10a9e59176e 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1971,7 +1971,8 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t - [input_format_parquet_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_case_insensitive_column_matching) - ignore case when matching Parquet columns with ClickHouse columns. Default value - `false`. - [input_format_parquet_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_allow_missing_columns) - allow missing columns while reading Parquet data. Default value - `false`. - [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`. -- [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`. +- [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`. +- [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `v2_latest`. ## Arrow {#data-format-arrow} diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 3f81dc528f5..84128b1d122 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -1099,6 +1099,12 @@ Use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedStrin Enabled by default. +### output_format_parquet_version {#output_format_parquet_version} + +The version of Parquet format used in output format. Supported versions: `v1_0`, `v2_4`, `v2_6` and `v2_latest`. + +Default value: `v2_latest`. + ## Hive format settings {#hive-format-settings} ### input_format_hive_text_fields_delimiter {#input_format_hive_text_fields_delimiter} diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 376395e370f..48caa1740cc 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -851,6 +851,7 @@ class IColumn; M(UInt64, output_format_parquet_row_group_size, 1000000, "Row group size in rows.", 0) \ M(Bool, output_format_parquet_string_as_string, false, "Use Parquet String type instead of Binary for String columns.", 0) \ M(Bool, output_format_parquet_fixed_string_as_fixed_byte_array, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary for FixedString columns.", 0) \ + M(ParquetVersion, output_format_parquet_version, "v2_latest", "Parquet format version for output format. Supported versions: v1_0, v2_4, v2_6 and v2_latest (default)", 0) \ M(String, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \ M(UInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \ M(String, output_format_avro_string_column_pattern, "", "For Avro format: regexp of String columns to select as AVRO string.", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 5684e4f3114..7621ab03f61 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -80,6 +80,7 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { + {"23.3", {{"output_format_parquet_version", "v1_0", "v2_latest", "Use latest Parquet format version for output format"}}}, {"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"}, {"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"}, {"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"}, diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index bce4f7c0000..751fbf65281 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -171,4 +171,12 @@ IMPLEMENT_SETTING_ENUM(LocalFSReadMethod, ErrorCodes::BAD_ARGUMENTS, {{"mmap", LocalFSReadMethod::mmap}, {"pread", LocalFSReadMethod::pread}, {"read", LocalFSReadMethod::read}}) + + +IMPLEMENT_SETTING_ENUM_WITH_RENAME(ParquetVersion, ErrorCodes::BAD_ARGUMENTS, + {{"v1_0", FormatSettings::ParquetVersion::V1_0}, + {"v2_4", FormatSettings::ParquetVersion::V2_4}, + {"v2_6", FormatSettings::ParquetVersion::V2_6}, + {"v2_latest", FormatSettings::ParquetVersion::V2_LATEST}}) + } diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 35eb8eb4b6c..139a04f3a5a 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -72,6 +72,8 @@ DECLARE_SETTING_ENUM_WITH_RENAME(DateTimeInputFormat, FormatSettings::DateTimeIn DECLARE_SETTING_ENUM_WITH_RENAME(DateTimeOutputFormat, FormatSettings::DateTimeOutputFormat) +DECLARE_SETTING_ENUM_WITH_RENAME(ParquetVersion, FormatSettings::ParquetVersion) + enum class LogsLevel { none = 0, /// Disable diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 5bee13075fb..0480c92edcf 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -111,6 +111,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.null_as_default = settings.input_format_null_as_default; format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros; format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size; + format_settings.parquet.output_version = settings.output_format_parquet_version; format_settings.parquet.import_nested = settings.input_format_parquet_import_nested; format_settings.parquet.case_insensitive_column_matching = settings.input_format_parquet_case_insensitive_column_matching; format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 9b657d37862..58823b483be 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -46,9 +46,9 @@ struct FormatSettings enum class DateTimeInputFormat { - Basic, /// Default format for fast parsing: YYYY-MM-DD hh:mm:ss (ISO-8601 without fractional part and timezone) or NNNNNNNNNN unix timestamp. - BestEffort, /// Use sophisticated rules to parse whatever possible. - BestEffortUS /// Use sophisticated rules to parse American style: mm/dd/yyyy + Basic, /// Default format for fast parsing: YYYY-MM-DD hh:mm:ss (ISO-8601 without fractional part and timezone) or NNNNNNNNNN unix timestamp. + BestEffort, /// Use sophisticated rules to parse whatever possible. + BestEffortUS /// Use sophisticated rules to parse American style: mm/dd/yyyy }; DateTimeInputFormat date_time_input_format = DateTimeInputFormat::Basic; @@ -176,6 +176,14 @@ struct FormatSettings String column_for_object_name; } json_object_each_row; + enum class ParquetVersion + { + V1_0, + V2_4, + V2_6, + V2_LATEST, + }; + struct { UInt64 row_group_size = 1000000; @@ -187,6 +195,7 @@ struct FormatSettings bool output_string_as_string = false; bool output_fixed_string_as_fixed_byte_array = true; UInt64 max_block_size = 8192; + ParquetVersion output_version; } parquet; struct Pretty diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp index 375d3878b10..18c81f8fd6a 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp @@ -16,6 +16,21 @@ namespace ErrorCodes extern const int UNKNOWN_EXCEPTION; } +static parquet::ParquetVersion::type getParquetVersion(const FormatSettings & settings) +{ + switch (settings.parquet.output_version) + { + case FormatSettings::ParquetVersion::V1_0: + return parquet::ParquetVersion::PARQUET_1_0; + case FormatSettings::ParquetVersion::V2_4: + return parquet::ParquetVersion::PARQUET_2_4; + case FormatSettings::ParquetVersion::V2_6: + return parquet::ParquetVersion::PARQUET_2_6; + case FormatSettings::ParquetVersion::V2_LATEST: + return parquet::ParquetVersion::PARQUET_2_LATEST; + } +} + ParquetBlockOutputFormat::ParquetBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) : IOutputFormat(header_, out_), format_settings{format_settings_} { @@ -44,7 +59,7 @@ void ParquetBlockOutputFormat::consume(Chunk chunk) auto sink = std::make_shared(out); parquet::WriterProperties::Builder builder; - builder.version(parquet::ParquetVersion::PARQUET_2_LATEST); + builder.version(getParquetVersion(format_settings)); #if USE_SNAPPY builder.compression(parquet::Compression::SNAPPY); #endif diff --git a/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference b/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference index f3bfd9f1048..47391a77ee8 100644 --- a/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference +++ b/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference @@ -72,7 +72,7 @@ uint8 Nullable(UInt8) int16 Nullable(Int16) uint16 Nullable(UInt16) int32 Nullable(Int32) -uint32 Nullable(Int64) +uint32 Nullable(UInt32) int64 Nullable(Int64) uint64 Nullable(UInt64) 0 0 0 0 0 0 0 0 From eec6051a50d18088f85f5b8e75b6c08d46144409 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 23 Feb 2023 16:16:08 +0000 Subject: [PATCH 28/83] style --- src/Formats/FormatSettings.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 58823b483be..44c710e46e2 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -46,9 +46,9 @@ struct FormatSettings enum class DateTimeInputFormat { - Basic, /// Default format for fast parsing: YYYY-MM-DD hh:mm:ss (ISO-8601 without fractional part and timezone) or NNNNNNNNNN unix timestamp. - BestEffort, /// Use sophisticated rules to parse whatever possible. - BestEffortUS /// Use sophisticated rules to parse American style: mm/dd/yyyy + Basic, /// Default format for fast parsing: YYYY-MM-DD hh:mm:ss (ISO-8601 without fractional part and timezone) or NNNNNNNNNN unix timestamp. + BestEffort, /// Use sophisticated rules to parse whatever possible. + BestEffortUS /// Use sophisticated rules to parse American style: mm/dd/yyyy }; DateTimeInputFormat date_time_input_format = DateTimeInputFormat::Basic; From ef529de7db112ad5b327f11f2c3d210cb54d5689 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 23 Feb 2023 12:58:42 +0000 Subject: [PATCH 29/83] Cosmetics --- src/Common/OptimizedRegularExpression.h | 4 +- src/Common/StringSearcher.h | 166 ++++++++++++------------ 2 files changed, 86 insertions(+), 84 deletions(-) diff --git a/src/Common/OptimizedRegularExpression.h b/src/Common/OptimizedRegularExpression.h index 0c6e32cb383..d8ed1e205c8 100644 --- a/src/Common/OptimizedRegularExpression.h +++ b/src/Common/OptimizedRegularExpression.h @@ -100,8 +100,8 @@ private: bool required_substring_is_prefix; bool is_case_insensitive; std::string required_substring; - std::optional> case_sensitive_substring_searcher; - std::optional> case_insensitive_substring_searcher; + std::optional case_sensitive_substring_searcher; + std::optional case_insensitive_substring_searcher; std::unique_ptr re2; unsigned number_of_subpatterns; diff --git a/src/Common/StringSearcher.h b/src/Common/StringSearcher.h index a80c8c73292..c595c649a27 100644 --- a/src/Common/StringSearcher.h +++ b/src/Common/StringSearcher.h @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include @@ -32,25 +31,30 @@ namespace ErrorCodes * In most cases, performance is less than Volnitsky (see Volnitsky.h). */ +namespace impl +{ class StringSearcherBase { public: bool force_fallback = false; + #ifdef __SSE2__ protected: - static constexpr auto n = sizeof(__m128i); - const Int64 page_size = ::getPageSize(); + static constexpr size_t N = sizeof(__m128i); - bool pageSafe(const void * const ptr) const + bool isPageSafe(const void * const ptr) const { - return ((page_size - 1) & reinterpret_cast(ptr)) <= page_size - n; + return ((page_size - 1) & reinterpret_cast(ptr)) <= page_size - N; } + +private: + const Int64 page_size = ::getPageSize(); #endif }; -/// Performs case-sensitive and case-insensitive search of UTF-8 strings +/// Performs case-sensitive or case-insensitive search of ASCII or UTF-8 strings template class StringSearcher; /// Case-insensitive UTF-8 searcher @@ -65,9 +69,9 @@ private: const size_t needle_size; const uint8_t * const needle_end = needle + needle_size; /// lower and uppercase variants of the first octet of the first character in `needle` - bool first_needle_symbol_is_ascii{}; - uint8_t l{}; - uint8_t u{}; + bool first_needle_symbol_is_ascii = false; + uint8_t l = 0; + uint8_t u = 0; #ifdef __SSE4_1__ /// vectors filled with `l` and `u`, for determining leftmost position of the first symbol @@ -76,16 +80,17 @@ private: /// lower and uppercase vectors of first 16 characters of `needle` __m128i cachel = _mm_setzero_si128(); __m128i cacheu = _mm_setzero_si128(); - int cachemask{}; - size_t cache_valid_len{}; - size_t cache_actual_len{}; + int cachemask = 0; + size_t cache_valid_len = 0; + size_t cache_actual_len = 0; #endif public: template requires (sizeof(CharT) == 1) - StringSearcher(const CharT * needle_, const size_t needle_size_) - : needle{reinterpret_cast(needle_)}, needle_size{needle_size_} + StringSearcher(const CharT * needle_, size_t needle_size_) + : needle(reinterpret_cast(needle_)) + , needle_size(needle_size_) { if (0 == needle_size) return; @@ -140,7 +145,7 @@ public: const auto * needle_pos = needle; - for (size_t i = 0; i < n;) + for (size_t i = 0; i < N;) { if (needle_pos == needle_end) { @@ -171,18 +176,18 @@ public: } cache_actual_len += src_len; - if (cache_actual_len < n) + if (cache_actual_len < N) cache_valid_len += src_len; - for (size_t j = 0; j < src_len && i < n; ++j, ++i) + for (size_t j = 0; j < src_len && i < N; ++j, ++i) { cachel = _mm_srli_si128(cachel, 1); cacheu = _mm_srli_si128(cacheu, 1); if (needle_pos != needle_end) { - cachel = _mm_insert_epi8(cachel, l_seq[j], n - 1); - cacheu = _mm_insert_epi8(cacheu, u_seq[j], n - 1); + cachel = _mm_insert_epi8(cachel, l_seq[j], N - 1); + cacheu = _mm_insert_epi8(cacheu, u_seq[j], N - 1); cachemask |= 1 << i; ++needle_pos; @@ -225,7 +230,7 @@ public: { #ifdef __SSE4_1__ - if (pageSafe(pos) && !force_fallback) + if (isPageSafe(pos) && !force_fallback) { const auto v_haystack = _mm_loadu_si128(reinterpret_cast(pos)); const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel); @@ -275,7 +280,7 @@ public: while (haystack < haystack_end) { #ifdef __SSE4_1__ - if (haystack + n <= haystack_end && pageSafe(haystack) && !force_fallback) + if (haystack + N <= haystack_end && isPageSafe(haystack) && !force_fallback) { const auto v_haystack = _mm_loadu_si128(reinterpret_cast(haystack)); const auto v_against_l = _mm_cmpeq_epi8(v_haystack, patl); @@ -286,7 +291,7 @@ public: if (mask == 0) { - haystack += n; + haystack += N; UTF8::syncForward(haystack, haystack_end); continue; } @@ -294,7 +299,7 @@ public: const auto offset = __builtin_ctz(mask); haystack += offset; - if (haystack + n <= haystack_end && pageSafe(haystack)) + if (haystack + N <= haystack_end && isPageSafe(haystack)) { const auto v_haystack_offset = _mm_loadu_si128(reinterpret_cast(haystack)); const auto v_against_l_offset = _mm_cmpeq_epi8(v_haystack_offset, cachel); @@ -344,7 +349,7 @@ public: template requires (sizeof(CharT) == 1) - const CharT * search(const CharT * haystack, const size_t haystack_size) const + const CharT * search(const CharT * haystack, size_t haystack_size) const { return search(haystack, haystack + haystack_size); } @@ -360,22 +365,23 @@ private: const uint8_t * const needle; const uint8_t * const needle_end; /// lower and uppercase variants of the first character in `needle` - uint8_t l{}; - uint8_t u{}; + uint8_t l = 0; + uint8_t u = 0; #ifdef __SSE4_1__ /// vectors filled with `l` and `u`, for determining leftmost position of the first symbol __m128i patl, patu; /// lower and uppercase vectors of first 16 characters of `needle` __m128i cachel = _mm_setzero_si128(), cacheu = _mm_setzero_si128(); - int cachemask{}; + int cachemask = 0; #endif public: template requires (sizeof(CharT) == 1) - StringSearcher(const CharT * needle_, const size_t needle_size) - : needle{reinterpret_cast(needle_)}, needle_end{needle + needle_size} + StringSearcher(const CharT * needle_, size_t needle_size) + : needle(reinterpret_cast(needle_)) + , needle_end(needle + needle_size) { if (0 == needle_size) return; @@ -389,15 +395,15 @@ public: const auto * needle_pos = needle; - for (const auto i : collections::range(0, n)) + for (size_t i = 0; i < N; ++i) { cachel = _mm_srli_si128(cachel, 1); cacheu = _mm_srli_si128(cacheu, 1); if (needle_pos != needle_end) { - cachel = _mm_insert_epi8(cachel, std::tolower(*needle_pos), n - 1); - cacheu = _mm_insert_epi8(cacheu, std::toupper(*needle_pos), n - 1); + cachel = _mm_insert_epi8(cachel, std::tolower(*needle_pos), N - 1); + cacheu = _mm_insert_epi8(cacheu, std::toupper(*needle_pos), N - 1); cachemask |= 1 << i; ++needle_pos; } @@ -410,7 +416,7 @@ public: ALWAYS_INLINE bool compare(const CharT * /*haystack*/, const CharT * /*haystack_end*/, const CharT * pos) const { #ifdef __SSE4_1__ - if (pageSafe(pos)) + if (isPageSafe(pos)) { const auto v_haystack = _mm_loadu_si128(reinterpret_cast(pos)); const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel); @@ -422,8 +428,8 @@ public: { if (mask == cachemask) { - pos += n; - const auto * needle_pos = needle + n; + pos += N; + const auto * needle_pos = needle + N; while (needle_pos < needle_end && std::tolower(*pos) == std::tolower(*needle_pos)) { @@ -470,7 +476,7 @@ public: while (haystack < haystack_end) { #ifdef __SSE4_1__ - if (haystack + n <= haystack_end && pageSafe(haystack)) + if (haystack + N <= haystack_end && isPageSafe(haystack)) { const auto v_haystack = _mm_loadu_si128(reinterpret_cast(haystack)); const auto v_against_l = _mm_cmpeq_epi8(v_haystack, patl); @@ -481,14 +487,14 @@ public: if (mask == 0) { - haystack += n; + haystack += N; continue; } const auto offset = __builtin_ctz(mask); haystack += offset; - if (haystack + n <= haystack_end && pageSafe(haystack)) + if (haystack + N <= haystack_end && isPageSafe(haystack)) { const auto v_haystack_offset = _mm_loadu_si128(reinterpret_cast(haystack)); const auto v_against_l_offset = _mm_cmpeq_epi8(v_haystack_offset, cachel); @@ -500,8 +506,8 @@ public: { if (mask_offset == cachemask) { - const auto * haystack_pos = haystack + n; - const auto * needle_pos = needle + n; + const auto * haystack_pos = haystack + N; + const auto * needle_pos = needle + N; while (haystack_pos < haystack_end && needle_pos < needle_end && std::tolower(*haystack_pos) == std::tolower(*needle_pos)) @@ -550,14 +556,14 @@ public: template requires (sizeof(CharT) == 1) - const CharT * search(const CharT * haystack, const size_t haystack_size) const + const CharT * search(const CharT * haystack, size_t haystack_size) const { return search(haystack, haystack + haystack_size); } }; -/// Case-sensitive searcher (both ASCII and UTF-8) +/// Case-sensitive ASCII and UTF8 searcher template class StringSearcher : public StringSearcherBase { @@ -571,7 +577,7 @@ private: #ifdef __SSE4_1__ /// second character of "needle" (if its length is > 1) uint8_t second_needle_character = 0; - /// The first/second needle character broadcasted into a 16 bytes vector + /// first/second needle character broadcasted into a 16 bytes vector __m128i first_needle_character_vec; __m128i second_needle_character_vec; /// vector of first 16 characters of `needle` @@ -582,10 +588,11 @@ private: public: template requires (sizeof(CharT) == 1) - StringSearcher(const CharT * needle_, const size_t needle_size) - : needle{reinterpret_cast(needle_)}, needle_end{needle + needle_size} + StringSearcher(const CharT * needle_, size_t needle_size) + : needle(reinterpret_cast(needle_)) + , needle_end(needle + needle_size) { - if (0 == needle_size) + if (needle_size == 0) return; first_needle_character = *needle; @@ -599,13 +606,13 @@ public: } const auto * needle_pos = needle; - for (uint8_t i = 0; i < n; ++i) + for (uint8_t i = 0; i < N; ++i) { cache = _mm_srli_si128(cache, 1); if (needle_pos != needle_end) { - cache = _mm_insert_epi8(cache, *needle_pos, n - 1); + cache = _mm_insert_epi8(cache, *needle_pos, N - 1); cachemask |= 1 << i; ++needle_pos; } @@ -618,7 +625,7 @@ public: ALWAYS_INLINE bool compare(const CharT * /*haystack*/, const CharT * /*haystack_end*/, const CharT * pos) const { #ifdef __SSE4_1__ - if (pageSafe(pos)) + if (isPageSafe(pos)) { const __m128i haystack_characters = _mm_loadu_si128(reinterpret_cast(pos)); const __m128i comparison_result = _mm_cmpeq_epi8(haystack_characters, cache); @@ -628,8 +635,8 @@ public: { if (comparison_result_mask == cachemask) { - pos += n; - const auto * needle_pos = needle + n; + pos += N; + const auto * needle_pos = needle + N; while (needle_pos < needle_end && *pos == *needle_pos) ++pos, ++needle_pos; @@ -675,18 +682,18 @@ public: { while (haystack < haystack_end) { - if (haystack + n <= haystack_end && pageSafe(haystack)) + if (haystack + N <= haystack_end && isPageSafe(haystack)) { const __m128i haystack_characters = _mm_loadu_si128(reinterpret_cast(haystack)); const __m128i comparison_result = _mm_cmpeq_epi8(haystack_characters, first_needle_character_vec); const uint16_t comparison_result_mask = _mm_movemask_epi8(comparison_result); if (comparison_result_mask == 0) { - haystack += n; + haystack += N; continue; } - const auto offset = std::countr_zero(comparison_result_mask); + const int offset = std::countr_zero(comparison_result_mask); haystack += offset; return haystack; @@ -711,7 +718,7 @@ public: /// Compare the [0:15] bytes from haystack and broadcasted 16 bytes vector from first character of needle. /// Compare the [1:16] bytes from haystack and broadcasted 16 bytes vector from second character of needle. /// Bit AND the results of above two comparisons and get the mask. - if ((haystack + 1 + n) <= haystack_end && pageSafe(haystack + 1)) + if ((haystack + 1 + N) <= haystack_end && isPageSafe(haystack + 1)) { const __m128i haystack_characters_from_1st = _mm_loadu_si128(reinterpret_cast(haystack)); const __m128i haystack_characters_from_2nd = _mm_loadu_si128(reinterpret_cast(haystack + 1)); @@ -722,14 +729,14 @@ public: /// If the mask = 0, then first two characters [0:1] from needle are not in the [0:17] bytes of haystack. if (comparison_result_mask == 0) { - haystack += n; + haystack += N; continue; } - const auto offset = std::countr_zero(comparison_result_mask); + const int offset = std::countr_zero(comparison_result_mask); haystack += offset; - if (haystack + n <= haystack_end && pageSafe(haystack)) + if (haystack + N <= haystack_end && isPageSafe(haystack)) { /// Already find the haystack position where the [pos:pos + 1] two characters exactly match the first two characters of needle. /// Compare the 16 bytes from needle (cache) and the first 16 bytes from haystack at once if the haystack size >= 16 bytes. @@ -741,8 +748,8 @@ public: { if (mask_offset == cachemask) { - const auto * haystack_pos = haystack + n; - const auto * needle_pos = needle + n; + const auto * haystack_pos = haystack + N; + const auto * needle_pos = needle + N; while (haystack_pos < haystack_end && needle_pos < needle_end && *haystack_pos == *needle_pos) @@ -785,7 +792,7 @@ public: template requires (sizeof(CharT) == 1) - const CharT * search(const CharT * haystack, const size_t haystack_size) const + const CharT * search(const CharT * haystack, size_t haystack_size) const { return search(haystack, haystack + haystack_size); } @@ -804,14 +811,12 @@ class TokenSearcher : public StringSearcherBase public: template requires (sizeof(CharT) == 1) - TokenSearcher(const CharT * needle_, const size_t needle_size_) - : searcher{needle_, needle_size_}, - needle_size(needle_size_) + TokenSearcher(const CharT * needle_, size_t needle_size_) + : searcher(needle_, needle_size_) + , needle_size(needle_size_) { if (std::any_of(needle_, needle_ + needle_size_, isTokenSeparator)) - { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Needle must not contain whitespace or separator characters"); - } } @@ -848,7 +853,7 @@ public: template requires (sizeof(CharT) == 1) - const CharT * search(const CharT * haystack, const size_t haystack_size) const + const CharT * search(const CharT * haystack, size_t haystack_size) const { return search(haystack, haystack + haystack_size); } @@ -867,26 +872,27 @@ public: } }; +} -using ASCIICaseSensitiveStringSearcher = StringSearcher; -using ASCIICaseInsensitiveStringSearcher = StringSearcher; -using UTF8CaseSensitiveStringSearcher = StringSearcher; -using UTF8CaseInsensitiveStringSearcher = StringSearcher; -using ASCIICaseSensitiveTokenSearcher = TokenSearcher; -using ASCIICaseInsensitiveTokenSearcher = TokenSearcher; +using ASCIICaseSensitiveStringSearcher = impl::StringSearcher; +using ASCIICaseInsensitiveStringSearcher = impl::StringSearcher; +using UTF8CaseSensitiveStringSearcher = impl::StringSearcher; +using UTF8CaseInsensitiveStringSearcher = impl::StringSearcher; +using ASCIICaseSensitiveTokenSearcher = impl::TokenSearcher; +using ASCIICaseInsensitiveTokenSearcher = impl::TokenSearcher; /// Use only with short haystacks where cheap initialization is required. template -struct StdLibASCIIStringSearcher : public StringSearcherBase +struct StdLibASCIIStringSearcher { const char * const needle_start; const char * const needle_end; template requires (sizeof(CharT) == 1) - StdLibASCIIStringSearcher(const CharT * const needle_start_, const size_t needle_size_) - : needle_start{reinterpret_cast(needle_start_)} - , needle_end{reinterpret_cast(needle_start) + needle_size_} + StdLibASCIIStringSearcher(const CharT * const needle_start_, size_t needle_size_) + : needle_start(reinterpret_cast(needle_start_)) + , needle_end(reinterpret_cast(needle_start) + needle_size_) {} template @@ -894,22 +900,18 @@ struct StdLibASCIIStringSearcher : public StringSearcherBase const CharT * search(const CharT * haystack_start, const CharT * const haystack_end) const { if constexpr (CaseInsensitive) - { return std::search( haystack_start, haystack_end, needle_start, needle_end, [](char c1, char c2) {return std::toupper(c1) == std::toupper(c2);}); - } else - { return std::search( haystack_start, haystack_end, needle_start, needle_end, [](char c1, char c2) {return c1 == c2;}); - } } template requires (sizeof(CharT) == 1) - const CharT * search(const CharT * haystack_start, const size_t haystack_length) const + const CharT * search(const CharT * haystack_start, size_t haystack_length) const { return search(haystack_start, haystack_start + haystack_length); } From 421afeeae074f70d094f9601aa655dc4bf6d9c7f Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 24 Feb 2023 10:13:35 +0000 Subject: [PATCH 30/83] Shuffle class order (and just that) --- src/Common/StringSearcher.h | 886 ++++++++++++++++++------------------ 1 file changed, 444 insertions(+), 442 deletions(-) diff --git a/src/Common/StringSearcher.h b/src/Common/StringSearcher.h index c595c649a27..15a8b3a022b 100644 --- a/src/Common/StringSearcher.h +++ b/src/Common/StringSearcher.h @@ -57,6 +57,450 @@ private: /// Performs case-sensitive or case-insensitive search of ASCII or UTF-8 strings template class StringSearcher; + +/// Case-sensitive ASCII and UTF8 searcher +template +class StringSearcher : public StringSearcherBase +{ +private: + /// string to be searched for + const uint8_t * const needle; + const uint8_t * const needle_end; + /// first character in `needle` + uint8_t first_needle_character = 0; + +#ifdef __SSE4_1__ + /// second character of "needle" (if its length is > 1) + uint8_t second_needle_character = 0; + /// first/second needle character broadcasted into a 16 bytes vector + __m128i first_needle_character_vec; + __m128i second_needle_character_vec; + /// vector of first 16 characters of `needle` + __m128i cache = _mm_setzero_si128(); + uint16_t cachemask = 0; +#endif + +public: + template + requires (sizeof(CharT) == 1) + StringSearcher(const CharT * needle_, size_t needle_size) + : needle(reinterpret_cast(needle_)) + , needle_end(needle + needle_size) + { + if (needle_size == 0) + return; + + first_needle_character = *needle; + +#ifdef __SSE4_1__ + first_needle_character_vec = _mm_set1_epi8(first_needle_character); + if (needle_size > 1) + { + second_needle_character = *(needle + 1); + second_needle_character_vec = _mm_set1_epi8(second_needle_character); + } + const auto * needle_pos = needle; + + for (uint8_t i = 0; i < N; ++i) + { + cache = _mm_srli_si128(cache, 1); + + if (needle_pos != needle_end) + { + cache = _mm_insert_epi8(cache, *needle_pos, N - 1); + cachemask |= 1 << i; + ++needle_pos; + } + } +#endif + } + + template + requires (sizeof(CharT) == 1) + ALWAYS_INLINE bool compare(const CharT * /*haystack*/, const CharT * /*haystack_end*/, const CharT * pos) const + { +#ifdef __SSE4_1__ + if (isPageSafe(pos)) + { + const __m128i haystack_characters = _mm_loadu_si128(reinterpret_cast(pos)); + const __m128i comparison_result = _mm_cmpeq_epi8(haystack_characters, cache); + const uint16_t comparison_result_mask = _mm_movemask_epi8(comparison_result); + + if (0xffff == cachemask) + { + if (comparison_result_mask == cachemask) + { + pos += N; + const auto * needle_pos = needle + N; + + while (needle_pos < needle_end && *pos == *needle_pos) + ++pos, ++needle_pos; + + if (needle_pos == needle_end) + return true; + } + } + else if ((comparison_result_mask & cachemask) == cachemask) + return true; + + return false; + } +#endif + + if (*pos == first_needle_character) + { + ++pos; + const auto * needle_pos = needle + 1; + + while (needle_pos < needle_end && *pos == *needle_pos) + ++pos, ++needle_pos; + + if (needle_pos == needle_end) + return true; + } + + return false; + } + + template + requires (sizeof(CharT) == 1) + const CharT * search(const CharT * haystack, const CharT * const haystack_end) const + { + const auto needle_size = needle_end - needle; + + if (needle == needle_end) + return haystack; + +#ifdef __SSE4_1__ + /// Fast path for single-character needles. Compare 16 characters of the haystack against the needle character at once. + if (needle_size == 1) + { + while (haystack < haystack_end) + { + if (haystack + N <= haystack_end && isPageSafe(haystack)) + { + const __m128i haystack_characters = _mm_loadu_si128(reinterpret_cast(haystack)); + const __m128i comparison_result = _mm_cmpeq_epi8(haystack_characters, first_needle_character_vec); + const uint16_t comparison_result_mask = _mm_movemask_epi8(comparison_result); + if (comparison_result_mask == 0) + { + haystack += N; + continue; + } + + const int offset = std::countr_zero(comparison_result_mask); + haystack += offset; + + return haystack; + } + + if (haystack == haystack_end) + return haystack_end; + + if (*haystack == first_needle_character) + return haystack; + + ++haystack; + } + + return haystack_end; + } +#endif + + while (haystack < haystack_end && haystack_end - haystack >= needle_size) + { +#ifdef __SSE4_1__ + /// Compare the [0:15] bytes from haystack and broadcasted 16 bytes vector from first character of needle. + /// Compare the [1:16] bytes from haystack and broadcasted 16 bytes vector from second character of needle. + /// Bit AND the results of above two comparisons and get the mask. + if ((haystack + 1 + N) <= haystack_end && isPageSafe(haystack + 1)) + { + const __m128i haystack_characters_from_1st = _mm_loadu_si128(reinterpret_cast(haystack)); + const __m128i haystack_characters_from_2nd = _mm_loadu_si128(reinterpret_cast(haystack + 1)); + const __m128i comparison_result_1st = _mm_cmpeq_epi8(haystack_characters_from_1st, first_needle_character_vec); + const __m128i comparison_result_2nd = _mm_cmpeq_epi8(haystack_characters_from_2nd, second_needle_character_vec); + const __m128i comparison_result_combined = _mm_and_si128(comparison_result_1st, comparison_result_2nd); + const uint16_t comparison_result_mask = _mm_movemask_epi8(comparison_result_combined); + /// If the mask = 0, then first two characters [0:1] from needle are not in the [0:17] bytes of haystack. + if (comparison_result_mask == 0) + { + haystack += N; + continue; + } + + const int offset = std::countr_zero(comparison_result_mask); + haystack += offset; + + if (haystack + N <= haystack_end && isPageSafe(haystack)) + { + /// Already find the haystack position where the [pos:pos + 1] two characters exactly match the first two characters of needle. + /// Compare the 16 bytes from needle (cache) and the first 16 bytes from haystack at once if the haystack size >= 16 bytes. + const __m128i haystack_characters = _mm_loadu_si128(reinterpret_cast(haystack)); + const __m128i comparison_result_cache = _mm_cmpeq_epi8(haystack_characters, cache); + const uint16_t mask_offset = _mm_movemask_epi8(comparison_result_cache); + + if (0xffff == cachemask) + { + if (mask_offset == cachemask) + { + const auto * haystack_pos = haystack + N; + const auto * needle_pos = needle + N; + + while (haystack_pos < haystack_end && needle_pos < needle_end && + *haystack_pos == *needle_pos) + ++haystack_pos, ++needle_pos; + + if (needle_pos == needle_end) + return haystack; + } + } + else if ((mask_offset & cachemask) == cachemask) + return haystack; + + ++haystack; + continue; + } + } +#endif + + if (haystack == haystack_end) + return haystack_end; + + if (*haystack == first_needle_character) + { + const auto * haystack_pos = haystack + 1; + const auto * needle_pos = needle + 1; + + while (haystack_pos < haystack_end && needle_pos < needle_end && + *haystack_pos == *needle_pos) + ++haystack_pos, ++needle_pos; + + if (needle_pos == needle_end) + return haystack; + } + + ++haystack; + } + + return haystack_end; + } + + template + requires (sizeof(CharT) == 1) + const CharT * search(const CharT * haystack, size_t haystack_size) const + { + return search(haystack, haystack + haystack_size); + } +}; + + +/// Case-insensitive ASCII searcher +template <> +class StringSearcher : public StringSearcherBase +{ +private: + /// string to be searched for + const uint8_t * const needle; + const uint8_t * const needle_end; + /// lower and uppercase variants of the first character in `needle` + uint8_t l = 0; + uint8_t u = 0; + +#ifdef __SSE4_1__ + /// vectors filled with `l` and `u`, for determining leftmost position of the first symbol + __m128i patl, patu; + /// lower and uppercase vectors of first 16 characters of `needle` + __m128i cachel = _mm_setzero_si128(), cacheu = _mm_setzero_si128(); + int cachemask = 0; +#endif + +public: + template + requires (sizeof(CharT) == 1) + StringSearcher(const CharT * needle_, size_t needle_size) + : needle(reinterpret_cast(needle_)) + , needle_end(needle + needle_size) + { + if (0 == needle_size) + return; + + l = static_cast(std::tolower(*needle)); + u = static_cast(std::toupper(*needle)); + +#ifdef __SSE4_1__ + patl = _mm_set1_epi8(l); + patu = _mm_set1_epi8(u); + + const auto * needle_pos = needle; + + for (size_t i = 0; i < N; ++i) + { + cachel = _mm_srli_si128(cachel, 1); + cacheu = _mm_srli_si128(cacheu, 1); + + if (needle_pos != needle_end) + { + cachel = _mm_insert_epi8(cachel, std::tolower(*needle_pos), N - 1); + cacheu = _mm_insert_epi8(cacheu, std::toupper(*needle_pos), N - 1); + cachemask |= 1 << i; + ++needle_pos; + } + } +#endif + } + + template + requires (sizeof(CharT) == 1) + ALWAYS_INLINE bool compare(const CharT * /*haystack*/, const CharT * /*haystack_end*/, const CharT * pos) const + { +#ifdef __SSE4_1__ + if (isPageSafe(pos)) + { + const auto v_haystack = _mm_loadu_si128(reinterpret_cast(pos)); + const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel); + const auto v_against_u = _mm_cmpeq_epi8(v_haystack, cacheu); + const auto v_against_l_or_u = _mm_or_si128(v_against_l, v_against_u); + const auto mask = _mm_movemask_epi8(v_against_l_or_u); + + if (0xffff == cachemask) + { + if (mask == cachemask) + { + pos += N; + const auto * needle_pos = needle + N; + + while (needle_pos < needle_end && std::tolower(*pos) == std::tolower(*needle_pos)) + { + ++pos; + ++needle_pos; + } + + if (needle_pos == needle_end) + return true; + } + } + else if ((mask & cachemask) == cachemask) + return true; + + return false; + } +#endif + + if (*pos == l || *pos == u) + { + ++pos; + const auto * needle_pos = needle + 1; + + while (needle_pos < needle_end && std::tolower(*pos) == std::tolower(*needle_pos)) + { + ++pos; + ++needle_pos; + } + + if (needle_pos == needle_end) + return true; + } + + return false; + } + + template + requires (sizeof(CharT) == 1) + const CharT * search(const CharT * haystack, const CharT * const haystack_end) const + { + if (needle == needle_end) + return haystack; + + while (haystack < haystack_end) + { +#ifdef __SSE4_1__ + if (haystack + N <= haystack_end && isPageSafe(haystack)) + { + const auto v_haystack = _mm_loadu_si128(reinterpret_cast(haystack)); + const auto v_against_l = _mm_cmpeq_epi8(v_haystack, patl); + const auto v_against_u = _mm_cmpeq_epi8(v_haystack, patu); + const auto v_against_l_or_u = _mm_or_si128(v_against_l, v_against_u); + + const auto mask = _mm_movemask_epi8(v_against_l_or_u); + + if (mask == 0) + { + haystack += N; + continue; + } + + const auto offset = __builtin_ctz(mask); + haystack += offset; + + if (haystack + N <= haystack_end && isPageSafe(haystack)) + { + const auto v_haystack_offset = _mm_loadu_si128(reinterpret_cast(haystack)); + const auto v_against_l_offset = _mm_cmpeq_epi8(v_haystack_offset, cachel); + const auto v_against_u_offset = _mm_cmpeq_epi8(v_haystack_offset, cacheu); + const auto v_against_l_or_u_offset = _mm_or_si128(v_against_l_offset, v_against_u_offset); + const auto mask_offset = _mm_movemask_epi8(v_against_l_or_u_offset); + + if (0xffff == cachemask) + { + if (mask_offset == cachemask) + { + const auto * haystack_pos = haystack + N; + const auto * needle_pos = needle + N; + + while (haystack_pos < haystack_end && needle_pos < needle_end && + std::tolower(*haystack_pos) == std::tolower(*needle_pos)) + { + ++haystack_pos; + ++needle_pos; + } + + if (needle_pos == needle_end) + return haystack; + } + } + else if ((mask_offset & cachemask) == cachemask) + return haystack; + + ++haystack; + continue; + } + } +#endif + + if (haystack == haystack_end) + return haystack_end; + + if (*haystack == l || *haystack == u) + { + const auto * haystack_pos = haystack + 1; + const auto * needle_pos = needle + 1; + + while (haystack_pos < haystack_end && needle_pos < needle_end && + std::tolower(*haystack_pos) == std::tolower(*needle_pos)) + { + ++haystack_pos; + ++needle_pos; + } + + if (needle_pos == needle_end) + return haystack; + } + + ++haystack; + } + + return haystack_end; + } + + template + requires (sizeof(CharT) == 1) + const CharT * search(const CharT * haystack, size_t haystack_size) const + { + return search(haystack, haystack + haystack_size); + } +}; + + /// Case-insensitive UTF-8 searcher template <> class StringSearcher : public StringSearcherBase @@ -356,448 +800,6 @@ public: }; -/// Case-insensitive ASCII searcher -template <> -class StringSearcher : public StringSearcherBase -{ -private: - /// string to be searched for - const uint8_t * const needle; - const uint8_t * const needle_end; - /// lower and uppercase variants of the first character in `needle` - uint8_t l = 0; - uint8_t u = 0; - -#ifdef __SSE4_1__ - /// vectors filled with `l` and `u`, for determining leftmost position of the first symbol - __m128i patl, patu; - /// lower and uppercase vectors of first 16 characters of `needle` - __m128i cachel = _mm_setzero_si128(), cacheu = _mm_setzero_si128(); - int cachemask = 0; -#endif - -public: - template - requires (sizeof(CharT) == 1) - StringSearcher(const CharT * needle_, size_t needle_size) - : needle(reinterpret_cast(needle_)) - , needle_end(needle + needle_size) - { - if (0 == needle_size) - return; - - l = static_cast(std::tolower(*needle)); - u = static_cast(std::toupper(*needle)); - -#ifdef __SSE4_1__ - patl = _mm_set1_epi8(l); - patu = _mm_set1_epi8(u); - - const auto * needle_pos = needle; - - for (size_t i = 0; i < N; ++i) - { - cachel = _mm_srli_si128(cachel, 1); - cacheu = _mm_srli_si128(cacheu, 1); - - if (needle_pos != needle_end) - { - cachel = _mm_insert_epi8(cachel, std::tolower(*needle_pos), N - 1); - cacheu = _mm_insert_epi8(cacheu, std::toupper(*needle_pos), N - 1); - cachemask |= 1 << i; - ++needle_pos; - } - } -#endif - } - - template - requires (sizeof(CharT) == 1) - ALWAYS_INLINE bool compare(const CharT * /*haystack*/, const CharT * /*haystack_end*/, const CharT * pos) const - { -#ifdef __SSE4_1__ - if (isPageSafe(pos)) - { - const auto v_haystack = _mm_loadu_si128(reinterpret_cast(pos)); - const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel); - const auto v_against_u = _mm_cmpeq_epi8(v_haystack, cacheu); - const auto v_against_l_or_u = _mm_or_si128(v_against_l, v_against_u); - const auto mask = _mm_movemask_epi8(v_against_l_or_u); - - if (0xffff == cachemask) - { - if (mask == cachemask) - { - pos += N; - const auto * needle_pos = needle + N; - - while (needle_pos < needle_end && std::tolower(*pos) == std::tolower(*needle_pos)) - { - ++pos; - ++needle_pos; - } - - if (needle_pos == needle_end) - return true; - } - } - else if ((mask & cachemask) == cachemask) - return true; - - return false; - } -#endif - - if (*pos == l || *pos == u) - { - ++pos; - const auto * needle_pos = needle + 1; - - while (needle_pos < needle_end && std::tolower(*pos) == std::tolower(*needle_pos)) - { - ++pos; - ++needle_pos; - } - - if (needle_pos == needle_end) - return true; - } - - return false; - } - - template - requires (sizeof(CharT) == 1) - const CharT * search(const CharT * haystack, const CharT * const haystack_end) const - { - if (needle == needle_end) - return haystack; - - while (haystack < haystack_end) - { -#ifdef __SSE4_1__ - if (haystack + N <= haystack_end && isPageSafe(haystack)) - { - const auto v_haystack = _mm_loadu_si128(reinterpret_cast(haystack)); - const auto v_against_l = _mm_cmpeq_epi8(v_haystack, patl); - const auto v_against_u = _mm_cmpeq_epi8(v_haystack, patu); - const auto v_against_l_or_u = _mm_or_si128(v_against_l, v_against_u); - - const auto mask = _mm_movemask_epi8(v_against_l_or_u); - - if (mask == 0) - { - haystack += N; - continue; - } - - const auto offset = __builtin_ctz(mask); - haystack += offset; - - if (haystack + N <= haystack_end && isPageSafe(haystack)) - { - const auto v_haystack_offset = _mm_loadu_si128(reinterpret_cast(haystack)); - const auto v_against_l_offset = _mm_cmpeq_epi8(v_haystack_offset, cachel); - const auto v_against_u_offset = _mm_cmpeq_epi8(v_haystack_offset, cacheu); - const auto v_against_l_or_u_offset = _mm_or_si128(v_against_l_offset, v_against_u_offset); - const auto mask_offset = _mm_movemask_epi8(v_against_l_or_u_offset); - - if (0xffff == cachemask) - { - if (mask_offset == cachemask) - { - const auto * haystack_pos = haystack + N; - const auto * needle_pos = needle + N; - - while (haystack_pos < haystack_end && needle_pos < needle_end && - std::tolower(*haystack_pos) == std::tolower(*needle_pos)) - { - ++haystack_pos; - ++needle_pos; - } - - if (needle_pos == needle_end) - return haystack; - } - } - else if ((mask_offset & cachemask) == cachemask) - return haystack; - - ++haystack; - continue; - } - } -#endif - - if (haystack == haystack_end) - return haystack_end; - - if (*haystack == l || *haystack == u) - { - const auto * haystack_pos = haystack + 1; - const auto * needle_pos = needle + 1; - - while (haystack_pos < haystack_end && needle_pos < needle_end && - std::tolower(*haystack_pos) == std::tolower(*needle_pos)) - { - ++haystack_pos; - ++needle_pos; - } - - if (needle_pos == needle_end) - return haystack; - } - - ++haystack; - } - - return haystack_end; - } - - template - requires (sizeof(CharT) == 1) - const CharT * search(const CharT * haystack, size_t haystack_size) const - { - return search(haystack, haystack + haystack_size); - } -}; - - -/// Case-sensitive ASCII and UTF8 searcher -template -class StringSearcher : public StringSearcherBase -{ -private: - /// string to be searched for - const uint8_t * const needle; - const uint8_t * const needle_end; - /// first character in `needle` - uint8_t first_needle_character = 0; - -#ifdef __SSE4_1__ - /// second character of "needle" (if its length is > 1) - uint8_t second_needle_character = 0; - /// first/second needle character broadcasted into a 16 bytes vector - __m128i first_needle_character_vec; - __m128i second_needle_character_vec; - /// vector of first 16 characters of `needle` - __m128i cache = _mm_setzero_si128(); - uint16_t cachemask = 0; -#endif - -public: - template - requires (sizeof(CharT) == 1) - StringSearcher(const CharT * needle_, size_t needle_size) - : needle(reinterpret_cast(needle_)) - , needle_end(needle + needle_size) - { - if (needle_size == 0) - return; - - first_needle_character = *needle; - -#ifdef __SSE4_1__ - first_needle_character_vec = _mm_set1_epi8(first_needle_character); - if (needle_size > 1) - { - second_needle_character = *(needle + 1); - second_needle_character_vec = _mm_set1_epi8(second_needle_character); - } - const auto * needle_pos = needle; - - for (uint8_t i = 0; i < N; ++i) - { - cache = _mm_srli_si128(cache, 1); - - if (needle_pos != needle_end) - { - cache = _mm_insert_epi8(cache, *needle_pos, N - 1); - cachemask |= 1 << i; - ++needle_pos; - } - } -#endif - } - - template - requires (sizeof(CharT) == 1) - ALWAYS_INLINE bool compare(const CharT * /*haystack*/, const CharT * /*haystack_end*/, const CharT * pos) const - { -#ifdef __SSE4_1__ - if (isPageSafe(pos)) - { - const __m128i haystack_characters = _mm_loadu_si128(reinterpret_cast(pos)); - const __m128i comparison_result = _mm_cmpeq_epi8(haystack_characters, cache); - const uint16_t comparison_result_mask = _mm_movemask_epi8(comparison_result); - - if (0xffff == cachemask) - { - if (comparison_result_mask == cachemask) - { - pos += N; - const auto * needle_pos = needle + N; - - while (needle_pos < needle_end && *pos == *needle_pos) - ++pos, ++needle_pos; - - if (needle_pos == needle_end) - return true; - } - } - else if ((comparison_result_mask & cachemask) == cachemask) - return true; - - return false; - } -#endif - - if (*pos == first_needle_character) - { - ++pos; - const auto * needle_pos = needle + 1; - - while (needle_pos < needle_end && *pos == *needle_pos) - ++pos, ++needle_pos; - - if (needle_pos == needle_end) - return true; - } - - return false; - } - - template - requires (sizeof(CharT) == 1) - const CharT * search(const CharT * haystack, const CharT * const haystack_end) const - { - const auto needle_size = needle_end - needle; - - if (needle == needle_end) - return haystack; - -#ifdef __SSE4_1__ - /// Fast path for single-character needles. Compare 16 characters of the haystack against the needle character at once. - if (needle_size == 1) - { - while (haystack < haystack_end) - { - if (haystack + N <= haystack_end && isPageSafe(haystack)) - { - const __m128i haystack_characters = _mm_loadu_si128(reinterpret_cast(haystack)); - const __m128i comparison_result = _mm_cmpeq_epi8(haystack_characters, first_needle_character_vec); - const uint16_t comparison_result_mask = _mm_movemask_epi8(comparison_result); - if (comparison_result_mask == 0) - { - haystack += N; - continue; - } - - const int offset = std::countr_zero(comparison_result_mask); - haystack += offset; - - return haystack; - } - - if (haystack == haystack_end) - return haystack_end; - - if (*haystack == first_needle_character) - return haystack; - - ++haystack; - } - - return haystack_end; - } -#endif - - while (haystack < haystack_end && haystack_end - haystack >= needle_size) - { -#ifdef __SSE4_1__ - /// Compare the [0:15] bytes from haystack and broadcasted 16 bytes vector from first character of needle. - /// Compare the [1:16] bytes from haystack and broadcasted 16 bytes vector from second character of needle. - /// Bit AND the results of above two comparisons and get the mask. - if ((haystack + 1 + N) <= haystack_end && isPageSafe(haystack + 1)) - { - const __m128i haystack_characters_from_1st = _mm_loadu_si128(reinterpret_cast(haystack)); - const __m128i haystack_characters_from_2nd = _mm_loadu_si128(reinterpret_cast(haystack + 1)); - const __m128i comparison_result_1st = _mm_cmpeq_epi8(haystack_characters_from_1st, first_needle_character_vec); - const __m128i comparison_result_2nd = _mm_cmpeq_epi8(haystack_characters_from_2nd, second_needle_character_vec); - const __m128i comparison_result_combined = _mm_and_si128(comparison_result_1st, comparison_result_2nd); - const uint16_t comparison_result_mask = _mm_movemask_epi8(comparison_result_combined); - /// If the mask = 0, then first two characters [0:1] from needle are not in the [0:17] bytes of haystack. - if (comparison_result_mask == 0) - { - haystack += N; - continue; - } - - const int offset = std::countr_zero(comparison_result_mask); - haystack += offset; - - if (haystack + N <= haystack_end && isPageSafe(haystack)) - { - /// Already find the haystack position where the [pos:pos + 1] two characters exactly match the first two characters of needle. - /// Compare the 16 bytes from needle (cache) and the first 16 bytes from haystack at once if the haystack size >= 16 bytes. - const __m128i haystack_characters = _mm_loadu_si128(reinterpret_cast(haystack)); - const __m128i comparison_result_cache = _mm_cmpeq_epi8(haystack_characters, cache); - const uint16_t mask_offset = _mm_movemask_epi8(comparison_result_cache); - - if (0xffff == cachemask) - { - if (mask_offset == cachemask) - { - const auto * haystack_pos = haystack + N; - const auto * needle_pos = needle + N; - - while (haystack_pos < haystack_end && needle_pos < needle_end && - *haystack_pos == *needle_pos) - ++haystack_pos, ++needle_pos; - - if (needle_pos == needle_end) - return haystack; - } - } - else if ((mask_offset & cachemask) == cachemask) - return haystack; - - ++haystack; - continue; - } - } -#endif - - if (haystack == haystack_end) - return haystack_end; - - if (*haystack == first_needle_character) - { - const auto * haystack_pos = haystack + 1; - const auto * needle_pos = needle + 1; - - while (haystack_pos < haystack_end && needle_pos < needle_end && - *haystack_pos == *needle_pos) - ++haystack_pos, ++needle_pos; - - if (needle_pos == needle_end) - return haystack; - } - - ++haystack; - } - - return haystack_end; - } - - template - requires (sizeof(CharT) == 1) - const CharT * search(const CharT * haystack, size_t haystack_size) const - { - return search(haystack, haystack + haystack_size); - } -}; - // Searches for needle surrounded by token-separators. // Separators are anything inside ASCII (0-128) and not alphanum. // Any value outside of basic ASCII (>=128) is considered a non-separator symbol, hence UTF-8 strings From 627961d9db017dac6cdc166af39fd982ca3b1a16 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 24 Feb 2023 10:14:58 +0000 Subject: [PATCH 31/83] More cosmetics --- src/Common/StringSearcher.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Common/StringSearcher.h b/src/Common/StringSearcher.h index 15a8b3a022b..ae440f9151b 100644 --- a/src/Common/StringSearcher.h +++ b/src/Common/StringSearcher.h @@ -321,7 +321,7 @@ public: : needle(reinterpret_cast(needle_)) , needle_end(needle + needle_size) { - if (0 == needle_size) + if (needle_size == 0) return; l = static_cast(std::tolower(*needle)); @@ -536,7 +536,7 @@ public: : needle(reinterpret_cast(needle_)) , needle_size(needle_size_) { - if (0 == needle_size) + if (needle_size == 0) return; UTF8SequenceBuffer l_seq; @@ -718,7 +718,7 @@ public: requires (sizeof(CharT) == 1) const CharT * search(const CharT * haystack, const CharT * const haystack_end) const { - if (0 == needle_size) + if (needle_size == 0) return haystack; while (haystack < haystack_end) From e8527e720b2ab12b3327f1e3886aace402a292c6 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Fri, 24 Feb 2023 13:07:40 +0100 Subject: [PATCH 32/83] refine regexp tree dictionary --- src/Dictionaries/RegExpTreeDictionary.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/Dictionaries/RegExpTreeDictionary.cpp b/src/Dictionaries/RegExpTreeDictionary.cpp index c636f200324..5775bb3790d 100644 --- a/src/Dictionaries/RegExpTreeDictionary.cpp +++ b/src/Dictionaries/RegExpTreeDictionary.cpp @@ -99,6 +99,17 @@ struct RegExpTreeDictionary::RegexTreeNode return searcher.Match(haystack, 0, size, re2_st::RE2::Anchor::UNANCHORED, nullptr, 0); } + /// check if this node can cover all the attributes from the query. + bool containsAll(const std::unordered_map & matching_attributes) const + { + for (const auto & [key, value] : matching_attributes) + { + if (!attributes.contains(key)) + return false; + } + return true; + } + struct AttributeValue { Field field; @@ -498,6 +509,8 @@ std::unordered_map RegExpTreeDictionary::match( if (node_ptr->match(reinterpret_cast(keys_data.data()) + offset, length)) { match_result.insertNodeID(node_ptr->id); + if (node_ptr->containsAll(attributes)) + break; } } From 944c24061512fd3b6fadc632829bb50d155bd6a7 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 24 Feb 2023 14:09:58 +0100 Subject: [PATCH 33/83] Update history --- src/Core/SettingsChangesHistory.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 2d346467338..c9d68b94a5e 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -80,12 +80,12 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { + {"23.3", {{"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}}}, {"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"}, {"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"}, {"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"}, {"optimize_duplicate_order_by_and_distinct", true, false, "Remove duplicate ORDER BY and DISTINCT if it's possible"}, - {"insert_keeper_max_retries", 0, 20, "Enable reconnections to Keeper on INSERT, improve reliability"}, - {"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}}}, + {"insert_keeper_max_retries", 0, 20, "Enable reconnections to Keeper on INSERT, improve reliability"}}}, {"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"}, {"input_format_json_defaults_for_missing_elements_in_named_tuple", false, true, "Allow missing elements in JSON objects while reading named tuples by default"}, {"input_format_csv_detect_header", false, true, "Detect header in CSV format by default"}, From f64c5fb3fac2f9e0dc99bc24f06686b600089365 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 24 Feb 2023 14:19:36 +0000 Subject: [PATCH 34/83] revert Planner.cpp --- src/Planner/Planner.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index c64dfd20f62..f0fe44e368f 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -720,7 +720,7 @@ bool addPreliminaryLimitOptimizationStepIfNeeded(QueryPlan & query_plan, bool apply_limit = query_processing_info.getToStage() != QueryProcessingStage::WithMergeableStateAfterAggregation; bool apply_prelimit = apply_limit && - query_analysis_result.limit_length && + query_node.hasLimit() && !query_node.isLimitWithTies() && !query_node.isGroupByWithTotals() && !query_analysis_result.query_has_with_totals_in_any_subquery_in_join_tree && @@ -767,7 +767,7 @@ void addPreliminarySortOrDistinctOrLimitStepsIfNeeded(QueryPlan & query_plan, * Otherwise we can take several equal values from different streams * according to limit and skip some distinct values. */ - if (query_analysis_result.limit_length && query_node.isDistinct()) + if (query_node.hasLimit() && query_node.isDistinct()) { addDistinctStep(query_plan, query_analysis_result, @@ -785,7 +785,7 @@ void addPreliminarySortOrDistinctOrLimitStepsIfNeeded(QueryPlan & query_plan, addLimitByStep(query_plan, limit_by_analysis_result, query_node); } - if (query_analysis_result.limit_length) + if (query_node.hasLimit()) addPreliminaryLimitStep(query_plan, query_analysis_result, planner_context, true /*do_not_skip_offset*/); } @@ -1420,7 +1420,7 @@ void Planner::buildPlanForQueryNode() bool apply_offset = query_processing_info.getToStage() != QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit; - if (query_analysis_result.limit_length && query_node.isLimitWithTies() && apply_offset) + if (query_node.hasLimit() && query_node.isLimitWithTies() && apply_offset) addLimitStep(query_plan, query_analysis_result, planner_context, query_node); addExtremesStepIfNeeded(query_plan, planner_context); @@ -1434,9 +1434,9 @@ void Planner::buildPlanForQueryNode() * This is the case for various optimizations for distributed queries, * and when LIMIT cannot be applied it will be applied on the initiator anyway. */ - if (query_analysis_result.limit_length && apply_limit && !limit_applied && apply_offset) + if (query_node.hasLimit() && apply_limit && !limit_applied && apply_offset) addLimitStep(query_plan, query_analysis_result, planner_context, query_node); - else if (!limit_applied && apply_offset && query_analysis_result.limit_offset) + else if (!limit_applied && apply_offset && query_node.hasOffset()) addOffsetStep(query_plan, query_analysis_result); /// Project names is not done on shards, because initiator will not find columns in blocks From 8c6cf28c01ea2428bad31de0efd7e26c0b99e767 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 24 Feb 2023 16:43:28 +0000 Subject: [PATCH 35/83] Make better --- docker/test/stress/run.sh | 309 +------------------ docker/test/upgrade/run.sh | 602 +++++++++---------------------------- tests/ci/stress_tests.lib | 305 +++++++++++++++++++ 3 files changed, 463 insertions(+), 753 deletions(-) create mode 100644 tests/ci/stress_tests.lib diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 86c0bd15979..15f58d6c3a3 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -8,230 +8,13 @@ dmesg --clear set -x -# core.COMM.PID-TID -sysctl kernel.core_pattern='core.%e.%p-%P' +# we mount tests folder from repo to /usr/share +ln -s /usr/share/clickhouse-test/ci/stress.py /usr/bin/stress +ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test -OK="\tOK\t\\N\t" -FAIL="\tFAIL\t\\N\t" - -FAILURE_CONTEXT_LINES=50 -FAILURE_CONTEXT_MAX_LINE_WIDTH=400 - -function escaped() -{ - # That's the simplest way I found to escape a string in bash. Yep, bash is the most convenient programming language. - # Also limit lines width just in case (too long lines are not really useful usually) - clickhouse local -S 's String' --input-format=LineAsString -q "select substr(s, 1, $FAILURE_CONTEXT_MAX_LINE_WIDTH) - from table format CustomSeparated settings format_custom_row_after_delimiter='\\\\\\\\n'" -} -function head_escaped() -{ - head -n $FAILURE_CONTEXT_LINES $1 | escaped -} -function unts() -{ - grep -Po "[0-9][0-9]:[0-9][0-9] \K.*" -} -function trim_server_logs() -{ - head -n $FAILURE_CONTEXT_LINES "/test_output/$1" | grep -Eo " \[ [0-9]+ \] \{.*" | escaped -} - -function install_packages() -{ - dpkg -i $1/clickhouse-common-static_*.deb - dpkg -i $1/clickhouse-common-static-dbg_*.deb - dpkg -i $1/clickhouse-server_*.deb - dpkg -i $1/clickhouse-client_*.deb -} - -function configure() -{ - # install test configs - export USE_DATABASE_ORDINARY=1 - export EXPORT_S3_STORAGE_POLICIES=1 - /usr/share/clickhouse-test/config/install.sh - - # we mount tests folder from repo to /usr/share - ln -s /usr/share/clickhouse-test/ci/stress.py /usr/bin/stress - ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test - ln -s /usr/share/clickhouse-test/ci/download_release_packages.py /usr/bin/download_release_packages - ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag - - # avoid too slow startup - sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \ - | sed "s|100000|10000|" \ - > /etc/clickhouse-server/config.d/keeper_port.xml.tmp - sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml - sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml - sudo chgrp clickhouse /etc/clickhouse-server/config.d/keeper_port.xml - - # for clickhouse-server (via service) - echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment - # for clickhouse-client - export ASAN_OPTIONS='malloc_context_size=10 allocator_release_to_os_interval_ms=10000' - - # since we run clickhouse from root - sudo chown root: /var/lib/clickhouse - - # Set more frequent update period of asynchronous metrics to more frequently update information about real memory usage (less chance of OOM). - echo "1" \ - > /etc/clickhouse-server/config.d/asynchronous_metrics_update_period_s.xml - - local total_mem - total_mem=$(awk '/MemTotal/ { print $(NF-1) }' /proc/meminfo) # KiB - total_mem=$(( total_mem*1024 )) # bytes - - # Set maximum memory usage as half of total memory (less chance of OOM). - # - # But not via max_server_memory_usage but via max_memory_usage_for_user, - # so that we can override this setting and execute service queries, like: - # - hung check - # - show/drop database - # - ... - # - # So max_memory_usage_for_user will be a soft limit, and - # max_server_memory_usage will be hard limit, and queries that should be - # executed regardless memory limits will use max_memory_usage_for_user=0, - # instead of relying on max_untracked_memory - - max_server_memory_usage_to_ram_ratio=0.5 - echo "Setting max_server_memory_usage_to_ram_ratio to ${max_server_memory_usage_to_ram_ratio}" - cat > /etc/clickhouse-server/config.d/max_server_memory_usage.xml < - ${max_server_memory_usage_to_ram_ratio} - -EOL - - local max_users_mem - max_users_mem=$((total_mem*30/100)) # 30% - echo "Setting max_memory_usage_for_user=$max_users_mem and max_memory_usage for queries to 10G" - cat > /etc/clickhouse-server/users.d/max_memory_usage_for_user.xml < - - - 10G - ${max_users_mem} - - - -EOL - - cat > /etc/clickhouse-server/config.d/core.xml < - - - 107374182400 - - - $PWD - -EOL - - # Let OOM killer terminate other processes before clickhouse-server: - cat > /etc/clickhouse-server/config.d/oom_score.xml < - -1000 - -EOL - - # Analyzer is not yet ready for testing - cat > /etc/clickhouse-server/users.d/no_analyzer.xml < - - - - - - - - - - -EOL - -} - -function stop() -{ - local max_tries="${1:-90}" - local pid - # Preserve the pid, since the server can hung after the PID will be deleted. - pid="$(cat /var/run/clickhouse-server/clickhouse-server.pid)" - - clickhouse stop --max-tries "$max_tries" --do-not-kill && return - - # We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces. - echo -e "Possible deadlock on shutdown (see gdb.log)$FAIL" >> /test_output/test_results.tsv - kill -TERM "$(pidof gdb)" ||: - sleep 5 - echo "thread apply all backtrace (on stop)" >> /test_output/gdb.log - timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$pid" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log - clickhouse stop --force -} - -function start() -{ - counter=0 - until clickhouse-client --query "SELECT 1" - do - if [ "$counter" -gt ${1:-120} ] - then - echo "Cannot start clickhouse-server" - rg --text ".*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt ||: - echo -e "Cannot start clickhouse-server$FAIL$(trim_server_logs application_errors.txt)" >> /test_output/test_results.tsv - cat /var/log/clickhouse-server/stdout.log - tail -n100 /var/log/clickhouse-server/stderr.log - tail -n100000 /var/log/clickhouse-server/clickhouse-server.log | rg -F -v -e ' RaftInstance:' -e ' RaftInstance' | tail -n100 - break - fi - # use root to match with current uid - clickhouse start --user root >/var/log/clickhouse-server/stdout.log 2>>/var/log/clickhouse-server/stderr.log - sleep 0.5 - counter=$((counter + 1)) - done - - # Set follow-fork-mode to parent, because we attach to clickhouse-server, not to watchdog - # and clickhouse-server can do fork-exec, for example, to run some bridge. - # Do not set nostop noprint for all signals, because some it may cause gdb to hang, - # explicitly ignore non-fatal signals that are used by server. - # Number of SIGRTMIN can be determined only in runtime. - RTMIN=$(kill -l SIGRTMIN) - echo " -set follow-fork-mode parent -handle SIGHUP nostop noprint pass -handle SIGINT nostop noprint pass -handle SIGQUIT nostop noprint pass -handle SIGPIPE nostop noprint pass -handle SIGTERM nostop noprint pass -handle SIGUSR1 nostop noprint pass -handle SIGUSR2 nostop noprint pass -handle SIG$RTMIN nostop noprint pass -info signals -continue -backtrace full -thread apply all backtrace full -info registers -disassemble /s -up -disassemble /s -up -disassemble /s -p \"done\" -detach -quit -" > script.gdb - - # FIXME Hung check may work incorrectly because of attached gdb - # 1. False positives are possible - # 2. We cannot attach another gdb to get stacktraces if some queries hung - gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log & - sleep 5 - # gdb will send SIGSTOP, spend some time loading debug info and then send SIGCONT, wait for it (up to send_timeout, 300s) - time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||: -} +# Stress tests and upgrade check uses similar code that was placed +# in a separate bash library. See tests/ci/stress_tests.lib +source /usr/share/clickhouse-test/ci/stress_tests.lib install_packages package_folder @@ -414,13 +197,7 @@ unset "${!THREAD_@}" start -clickhouse-client --query "SELECT 'Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \ - || (rg --text ".*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt \ - && echo -e "Server failed to start (see application_errors.txt and clickhouse-server.clean.log)$FAIL$(trim_server_logs application_errors.txt)" \ - >> /test_output/test_results.tsv) - -# Remove file application_errors.txt if it's empty -[ -s /test_output/application_errors.txt ] || rm /test_output/application_errors.txt +check_server_start stop @@ -430,71 +207,11 @@ stop mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.final.log # Grep logs for sanitizer asserts, crashes and other critical errors +check_logs_for_critical_errors -# Sanitizer asserts -rg -Fa "==================" /var/log/clickhouse-server/stderr.log | rg -v "in query:" >> /test_output/tmp -rg -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp -rg -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \ - && echo -e "Sanitizer assert (in stderr.log)$FAIL$(head_escaped /test_output/tmp)" >> /test_output/test_results.tsv \ - || echo -e "No sanitizer asserts$OK" >> /test_output/test_results.tsv -rm -f /test_output/tmp +tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||: -# OOM -rg -Fa " Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \ - && echo -e "Signal 9 in clickhouse-server.log$FAIL" >> /test_output/test_results.tsv \ - || echo -e "No OOM messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv - -# Logical errors -rg -Fa "Code: 49. DB::Exception: " /var/log/clickhouse-server/clickhouse-server*.log > /test_output/logical_errors.txt \ - && echo -e "Logical error thrown (see clickhouse-server.log or logical_errors.txt)$FAIL$(head_escaped /test_output/logical_errors.txt)" >> /test_output/test_results.tsv \ - || echo -e "No logical errors$OK" >> /test_output/test_results.tsv - -# Remove file logical_errors.txt if it's empty -[ -s /test_output/logical_errors.txt ] || rm /test_output/logical_errors.txt - -# No such key errors -rg --text "Code: 499.*The specified key does not exist" /var/log/clickhouse-server/clickhouse-server*.log > /test_output/no_such_key_errors.txt \ - && echo -e "S3_ERROR No such key thrown (see clickhouse-server.log or no_such_key_errors.txt)$FAIL$(trim_server_logs no_such_key_errors.txt)" >> /test_output/test_results.tsv \ - || echo -e "No lost s3 keys$OK" >> /test_output/test_results.tsv - -# Remove file no_such_key_errors.txt if it's empty -[ -s /test_output/no_such_key_errors.txt ] || rm /test_output/no_such_key_errors.txt - -# Crash -rg -Fa "########################################" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \ - && echo -e "Killed by signal (in clickhouse-server.log)$FAIL" >> /test_output/test_results.tsv \ - || echo -e "Not crashed$OK" >> /test_output/test_results.tsv - -# It also checks for crash without stacktrace (printed by watchdog) -rg -Fa " " /var/log/clickhouse-server/clickhouse-server*.log > /test_output/fatal_messages.txt \ - && echo -e "Fatal message in clickhouse-server.log (see fatal_messages.txt)$FAIL$(trim_server_logs fatal_messages.txt)" >> /test_output/test_results.tsv \ - || echo -e "No fatal messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv - -# Remove file fatal_messages.txt if it's empty -[ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt - -rg -Fa "########################################" /test_output/* > /dev/null \ - && echo -e "Killed by signal (output files)$FAIL" >> /test_output/test_results.tsv - -function get_gdb_log_context() -{ - rg -A50 -Fa " received signal " /test_output/gdb.log | head_escaped -} - -rg -Fa " received signal " /test_output/gdb.log > /dev/null \ - && echo -e "Found signal in gdb.log$FAIL$(get_gdb_log_context)" >> /test_output/test_results.tsv - -for table in query_log trace_log -do - clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||: -done - -dmesg -T > /test_output/dmesg.log - -# OOM in dmesg -- those are real -grep -q -F -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE' /test_output/dmesg.log \ - && echo -e "OOM in dmesg$FAIL$(head_escaped /test_output/dmesg.log)" >> /test_output/test_results.tsv \ - || echo -e "No OOM in dmesg$OK" >> /test_output/test_results.tsv +collect_query_and_trace_logs mv /var/log/clickhouse-server/stderr.log /test_output/ @@ -514,8 +231,4 @@ rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv [ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv -# Core dumps -find . -type f -maxdepth 1 -name 'core.*' | while read core; do - zstd --threads=0 $core - mv $core.zst /test_output/ -done +collect_core_dumps diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index e2392dd4438..db1f5ae995f 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -3,267 +3,22 @@ # shellcheck disable=SC2086 # shellcheck disable=SC2024 -# This script is similar to script for common stress test - # Avoid overlaps with previous runs dmesg --clear set -x -# core.COMM.PID-TID -sysctl kernel.core_pattern='core.%e.%p-%P' - -OK="\tOK\t\\N\t" -FAIL="\tFAIL\t\\N\t" - -FAILURE_CONTEXT_LINES=50 -FAILURE_CONTEXT_MAX_LINE_WIDTH=400 - -function escaped() -{ - # That's the simplest way I found to escape a string in bash. Yep, bash is the most convenient programming language. - # Also limit lines width just in case (too long lines are not really useful usually) - clickhouse local -S 's String' --input-format=LineAsString -q "select substr(s, 1, $FAILURE_CONTEXT_MAX_LINE_WIDTH) - from table format CustomSeparated settings format_custom_row_after_delimiter='\\\\\\\\n'" -} -function head_escaped() -{ - head -n $FAILURE_CONTEXT_LINES $1 | escaped -} -function unts() -{ - grep -Po "[0-9][0-9]:[0-9][0-9] \K.*" -} -function trim_server_logs() -{ - head -n $FAILURE_CONTEXT_LINES "/test_output/$1" | grep -Eo " \[ [0-9]+ \] \{.*" | escaped -} - -function install_packages() -{ - dpkg -i $1/clickhouse-common-static_*.deb - dpkg -i $1/clickhouse-common-static-dbg_*.deb - dpkg -i $1/clickhouse-server_*.deb - dpkg -i $1/clickhouse-client_*.deb -} - -function configure() -{ - # install test configs - export USE_DATABASE_ORDINARY=1 - export EXPORT_S3_STORAGE_POLICIES=1 - /usr/share/clickhouse-test/config/install.sh - - # avoid too slow startup - sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \ - | sed "s|100000|10000|" \ - > /etc/clickhouse-server/config.d/keeper_port.xml.tmp - sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml - sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml - sudo chgrp clickhouse /etc/clickhouse-server/config.d/keeper_port.xml - - # for clickhouse-server (via service) - echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment - # for clickhouse-client - export ASAN_OPTIONS='malloc_context_size=10 allocator_release_to_os_interval_ms=10000' - - # since we run clickhouse from root - sudo chown root: /var/lib/clickhouse - - # Set more frequent update period of asynchronous metrics to more frequently update information about real memory usage (less chance of OOM). - echo "1" \ - > /etc/clickhouse-server/config.d/asynchronous_metrics_update_period_s.xml - - - local total_mem - total_mem=$(awk '/MemTotal/ { print $(NF-1) }' /proc/meminfo) # KiB - total_mem=$(( total_mem*1024 )) # bytes - - # Set maximum memory usage as half of total memory (less chance of OOM). - # - # But not via max_server_memory_usage but via max_memory_usage_for_user, - # so that we can override this setting and execute service queries, like: - # - hung check - # - show/drop database - # - ... - # - # So max_memory_usage_for_user will be a soft limit, and - # max_server_memory_usage will be hard limit, and queries that should be - # executed regardless memory limits will use max_memory_usage_for_user=0, - # instead of relying on max_untracked_memory - - max_server_memory_usage_to_ram_ratio=0.5 - echo "Setting max_server_memory_usage_to_ram_ratio to ${max_server_memory_usage_to_ram_ratio}" - cat > /etc/clickhouse-server/config.d/max_server_memory_usage.xml < - ${max_server_memory_usage_to_ram_ratio} - -EOL - - local max_users_mem - max_users_mem=$((total_mem*30/100)) # 30% - echo "Setting max_memory_usage_for_user=$max_users_mem and max_memory_usage for queries to 10G" - cat > /etc/clickhouse-server/users.d/max_memory_usage_for_user.xml < - - - 10G - ${max_users_mem} - - - -EOL - - cat > /etc/clickhouse-server/config.d/core.xml < - - - 107374182400 - - - $PWD - -EOL - - # Let OOM killer terminate other processes before clickhouse-server: - cat > /etc/clickhouse-server/config.d/oom_score.xml < - -1000 - -EOL - - # Analyzer is not yet ready for testing - cat > /etc/clickhouse-server/users.d/no_analyzer.xml < - - - - - - - - - - -EOL - -} - -function stop() -{ - local max_tries="${1:-90}" - local pid - # Preserve the pid, since the server can hung after the PID will be deleted. - pid="$(cat /var/run/clickhouse-server/clickhouse-server.pid)" - - clickhouse stop --max-tries "$max_tries" --do-not-kill && return - - # We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces. - echo -e "Possible deadlock on shutdown (see gdb.log)$FAIL" >> /test_output/test_results.tsv - kill -TERM "$(pidof gdb)" ||: - sleep 5 - echo "thread apply all backtrace (on stop)" >> /test_output/gdb.log - timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$pid" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log - clickhouse stop --force -} - -function start() -{ - counter=0 - until clickhouse-client --query "SELECT 1" - do - if [ "$counter" -gt ${1:-120} ] - then - echo "Cannot start clickhouse-server" - rg --text ".*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt ||: - echo -e "Cannot start clickhouse-server$FAIL$(trim_server_logs application_errors.txt)" >> /test_output/test_results.tsv - cat /var/log/clickhouse-server/stdout.log - tail -n100 /var/log/clickhouse-server/stderr.log - tail -n100000 /var/log/clickhouse-server/clickhouse-server.log | rg -F -v -e ' RaftInstance:' -e ' RaftInstance' | tail -n100 - break - fi - # use root to match with current uid - clickhouse start --user root >/var/log/clickhouse-server/stdout.log 2>>/var/log/clickhouse-server/stderr.log - sleep 0.5 - counter=$((counter + 1)) - done - - # Set follow-fork-mode to parent, because we attach to clickhouse-server, not to watchdog - # and clickhouse-server can do fork-exec, for example, to run some bridge. - # Do not set nostop noprint for all signals, because some it may cause gdb to hang, - # explicitly ignore non-fatal signals that are used by server. - # Number of SIGRTMIN can be determined only in runtime. - RTMIN=$(kill -l SIGRTMIN) - echo " -set follow-fork-mode parent -handle SIGHUP nostop noprint pass -handle SIGINT nostop noprint pass -handle SIGQUIT nostop noprint pass -handle SIGPIPE nostop noprint pass -handle SIGTERM nostop noprint pass -handle SIGUSR1 nostop noprint pass -handle SIGUSR2 nostop noprint pass -handle SIG$RTMIN nostop noprint pass -info signals -continue -backtrace full -thread apply all backtrace full -info registers -disassemble /s -up -disassemble /s -up -disassemble /s -p \"done\" -detach -quit -" > script.gdb - - # FIXME Hung check may work incorrectly because of attached gdb - # 1. False positives are possible - # 2. We cannot attach another gdb to get stacktraces if some queries hung - gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log & - sleep 5 - # gdb will send SIGSTOP, spend some time loading debug info and then send SIGCONT, wait for it (up to send_timeout, 300s) - time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||: -} - -# Thread Fuzzer allows to check more permutations of possible thread scheduling -# and find more potential issues. -# Temporarily disable ThreadFuzzer with tsan because of https://github.com/google/sanitizers/issues/1540 -is_tsan_build=$(clickhouse local -q "select value like '% -fsanitize=thread %' from system.build_options where name='CXX_FLAGS'") -if [ "$is_tsan_build" -eq "0" ]; then - export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000 - export THREAD_FUZZER_SLEEP_PROBABILITY=0.1 - export THREAD_FUZZER_SLEEP_TIME_US=100000 - - export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1 - export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1 - export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY=1 - export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY=1 - - export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001 - export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001 - export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001 - export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001 - export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000 - - export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000 - export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000 - export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000 -fi - -azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & -./setup_minio.sh stateless # to have a proper environment - # we mount tests folder from repo to /usr/share ln -s /usr/share/clickhouse-test/ci/stress.py /usr/bin/stress ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test ln -s /usr/share/clickhouse-test/ci/download_release_packages.py /usr/bin/download_release_packages ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag +source /usr/share/clickhouse-test/ci/stress_tests.lib + +azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & +./setup_minio.sh stateless # to have a proper environment + echo "Get previous release tag" previous_release_tag=$(dpkg --info package_folder/clickhouse-client*.deb | grep "Version: " | awk '{print $2}' | cut -f1 -d'+' | get_previous_release_tag) echo $previous_release_tag @@ -274,224 +29,161 @@ git clone https://github.com/ClickHouse/ClickHouse.git --no-tags --progress --br echo "Download clickhouse-server from the previous release" mkdir previous_release_package_folder -echo $previous_release_tag | download_release_packages && echo -e 'Download script exit code\tOK' >> /test_output/test_results.tsv \ - || echo -e 'Download script failed\tFAIL' >> /test_output/test_results.tsv +echo $previous_release_tag | download_release_packages && echo -e "Download script exit code$OK" >> /test_output/test_results.tsv \ + || echo -e "Download script failed$FAIL" >> /test_output/test_results.tsv # Check if we cloned previous release repository successfully if ! [ "$(ls -A previous_release_repository/tests/queries)" ] then - echo -e "Failed to clone previous release tests\tFAIL" >> /test_output/test_results.tsv + echo -e 'failure\tFailed to clone previous release tests' > /test_output/check_status.tsv + exit elif ! [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ] then - echo -e "Failed to download previous release packages\tFAIL" >> /test_output/test_results.tsv -else - echo -e "Successfully cloned previous release tests\tOK" >> /test_output/test_results.tsv - echo -e "Successfully downloaded previous release packages\tOK" >> /test_output/test_results.tsv - - # Make upgrade check more funny by forcing Ordinary engine for system database - mkdir /var/lib/clickhouse/metadata - echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/system.sql - - # Install previous release packages - install_packages previous_release_package_folder - - # Start server from previous release - # Let's enable S3 storage by default - export USE_S3_STORAGE_FOR_MERGE_TREE=1 - # Previous version may not be ready for fault injections - export ZOOKEEPER_FAULT_INJECTION=0 - configure - - # But we still need default disk because some tables loaded only into it - sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \ - | sed "s|
s3
|
s3
default|" \ - > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml - sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml - sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml - - # Avoid "Setting s3_check_objects_after_upload is neither a builtin setting..." - rm -f /etc/clickhouse-server/users.d/enable_blobs_check.xml ||: - rm -f /etc/clickhouse-server/users.d/marks.xml ||: - - # Remove s3 related configs to avoid "there is no disk type `cache`" - rm -f /etc/clickhouse-server/config.d/storage_conf.xml ||: - rm -f /etc/clickhouse-server/config.d/azure_storage_conf.xml ||: - - # Turn on after 22.12 - rm -f /etc/clickhouse-server/config.d/compressed_marks_and_index.xml ||: - # it uses recently introduced settings which previous versions may not have - rm -f /etc/clickhouse-server/users.d/insert_keeper_retries.xml ||: - - start - - clickhouse-client --query="SELECT 'Server version: ', version()" - - mkdir tmp_stress_output - - stress --test-cmd="/usr/bin/clickhouse-test --queries=\"previous_release_repository/tests/queries\"" --upgrade-check --output-folder tmp_stress_output --global-time-limit=1200 \ - && echo -e "Test script exit code$OK" >> /test_output/test_results.tsv \ - || echo -e "Test script failed$FAIL script exit code: $?" >> /test_output/test_results.tsv - - rm -rf tmp_stress_output - - # We experienced deadlocks in this command in very rare cases. Let's debug it: - timeout 10m clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables" || - ( - echo "thread apply all backtrace (on select tables count)" >> /test_output/gdb.log - timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log - clickhouse stop --force - ) - - # Use bigger timeout for previous version - stop 300 - mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log - - # Install and start new server - install_packages package_folder - # Disable fault injections on start (we don't test them here, and it can lead to tons of requests in case of huge number of tables). - export ZOOKEEPER_FAULT_INJECTION=0 - configure - start 500 - clickhouse-client --query "SELECT 'Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \ - || (rg --text ".*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt \ - && echo -e "Server failed to start (see application_errors.txt and clickhouse-server.clean.log)$FAIL$(trim_server_logs application_errors.txt)" \ - >> /test_output/test_results.tsv) - - # Remove file application_errors.txt if it's empty - [ -s /test_output/application_errors.txt ] || rm /test_output/application_errors.txt - - clickhouse-client --query="SELECT 'Server version: ', version()" - - # Let the server run for a while before checking log. - sleep 60 - - stop - mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.upgrade.log - - # Error messages (we should ignore some errors) - # FIXME https://github.com/ClickHouse/ClickHouse/issues/38643 ("Unknown index: idx.") - # FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 ("Cannot parse string 'Hello' as UInt64") - # FIXME Not sure if it's expected, but some tests from stress test may not be finished yet when we restarting server. - # Let's just ignore all errors from queries ("} TCPHandler: Code:", "} executeQuery: Code:") - # FIXME https://github.com/ClickHouse/ClickHouse/issues/39197 ("Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'") - # NOTE Incompatibility was introduced in https://github.com/ClickHouse/ClickHouse/pull/39263, it's expected - # ("This engine is deprecated and is not supported in transactions", "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part") - # FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 - bad mutation does not indicate backward incompatibility - echo "Check for Error messages in server log:" - rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \ - -e "Code: 236. DB::Exception: Cancelled mutating parts" \ - -e "REPLICA_IS_ALREADY_ACTIVE" \ - -e "REPLICA_ALREADY_EXISTS" \ - -e "ALL_REPLICAS_LOST" \ - -e "DDLWorker: Cannot parse DDL task query" \ - -e "RaftInstance: failed to accept a rpc connection due to error 125" \ - -e "UNKNOWN_DATABASE" \ - -e "NETWORK_ERROR" \ - -e "UNKNOWN_TABLE" \ - -e "ZooKeeperClient" \ - -e "KEEPER_EXCEPTION" \ - -e "DirectoryMonitor" \ - -e "TABLE_IS_READ_ONLY" \ - -e "Code: 1000, e.code() = 111, Connection refused" \ - -e "UNFINISHED" \ - -e "NETLINK_ERROR" \ - -e "Renaming unexpected part" \ - -e "PART_IS_TEMPORARILY_LOCKED" \ - -e "and a merge is impossible: we didn't find" \ - -e "found in queue and some source parts for it was lost" \ - -e "is lost forever." \ - -e "Unknown index: idx." \ - -e "Cannot parse string 'Hello' as UInt64" \ - -e "} TCPHandler: Code:" \ - -e "} executeQuery: Code:" \ - -e "Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'" \ - -e "This engine is deprecated and is not supported in transactions" \ - -e "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part" \ - -e "The set of parts restored in place of" \ - -e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \ - -e "Code: 269. DB::Exception: Destination table is myself" \ - -e "Coordination::Exception: Connection loss" \ - -e "MutateFromLogEntryTask" \ - -e "No connection to ZooKeeper, cannot get shared table ID" \ - -e "Session expired" \ - -e "TOO_MANY_PARTS" \ - -e "Authentication failed" \ - -e "Container already exists" \ - /var/log/clickhouse-server/clickhouse-server.upgrade.log | zgrep -Fa "" > /test_output/upgrade_error_messages.txt \ - && echo -e "Error message in clickhouse-server.log (see upgrade_error_messages.txt)$FAIL$(head_escaped /test_output/bc_check_error_messages.txt)" \ - >> /test_output/test_results.tsv \ - || echo -e "No Error messages after server upgrade$OK" >> /test_output/test_results.tsv - - # Remove file bc_check_error_messages.txt if it's empty - [ -s /test_output/upgrade_error_messages.txt ] || rm /test_output/upgrade_error_messages.txt - - # Sanitizer asserts - rg -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp - rg -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp - rg -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \ - && echo -e "Sanitizer assert (in stderr.log)$FAIL$(head_escaped /test_output/tmp)" >> /test_output/test_results.tsv \ - || echo -e "No sanitizer asserts$OK" >> /test_output/test_results.tsv - rm -f /test_output/tmp - - # OOM - rg -Fa " Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.*.log > /dev/null \ - && echo -e "Signal 9 in clickhouse-server.log$FAIL" >> /test_output/test_results.tsv \ - || echo -e "No OOM messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv - - # Logical errors - echo "Check for Logical errors in server log:" - rg -Fa -A20 "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.*.log > /test_output/logical_errors.txt \ - && echo -e "Logical error thrown (see clickhouse-server.log or logical_errors.txt)$FAIL$(head_escaped /test_output/logical_errors.txt)" >> /test_output/test_results.tsv \ - || echo -e "No logical errors$OK" >> /test_output/test_results.tsv - - # Remove file logical_errors.txt if it's empty - [ -s /test_output/logical_errors.txt ] || rm /test_output/logical_errors.txt - - # Crash - rg -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.*.log > /dev/null \ - && echo -e "Killed by signal (in clickhouse-server.log)$FAIL" >> /test_output/test_results.tsv \ - || echo -e "Not crashed$OK" >> /test_output/test_results.tsv - - # It also checks for crash without stacktrace (printed by watchdog) - echo "Check for Fatal message in server log:" - rg -Fa " " /var/log/clickhouse-server/clickhouse-server.*.log > /test_output/fatal_messages.txt \ - && echo -e "Fatal message in clickhouse-server.log (see fatal_messages.txt)$FAIL$(trim_server_logs fatal_messages.txt)" >> /test_output/test_results.tsv \ - || echo -e "No fatal messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv - - # Remove file fatal_messages.txt if it's empty - [ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt - - rg -Fa "########################################" /test_output/* > /dev/null \ - && echo -e "Killed by signal (output files)$FAIL" >> /test_output/test_results.tsv - - tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||: - for table in query_log trace_log - do - clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" \ - | zstd --threads=0 > /test_output/$table.tsv.zst ||: - done + echo -e 'failure\tFailed to download previous release packages' > /test_output/check_status.tsv + exit fi -dmesg -T > /test_output/dmesg.log +echo -e "Successfully cloned previous release tests$OK" >> /test_output/test_results.tsv +echo -e "Successfully downloaded previous release packages$OK" >> /test_output/test_results.tsv -# OOM in dmesg -- those are real -grep -q -F -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE' /test_output/dmesg.log \ - && echo -e "OOM in dmesg$FAIL$(head_escaped /test_output/dmesg.log)" >> /test_output/test_results.tsv \ - || echo -e "No OOM in dmesg$OK" >> /test_output/test_results.tsv +# Make upgrade check more funny by forcing Ordinary engine for system database +mkdir /var/lib/clickhouse/metadata +echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/system.sql + +# Install previous release packages +install_packages previous_release_package_folder + +# Start server from previous release +# Let's enable S3 storage by default +export USE_S3_STORAGE_FOR_MERGE_TREE=1 +# Previous version may not be ready for fault injections +export ZOOKEEPER_FAULT_INJECTION=0 +configure + +# But we still need default disk because some tables loaded only into it +sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \ + | sed "s|
s3
|
s3
default|" \ + > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml +sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml +sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml + +start + +clickhouse-client --query="SELECT 'Server version: ', version()" + +mkdir tmp_stress_output + +stress --test-cmd="/usr/bin/clickhouse-test --queries=\"previous_release_repository/tests/queries\"" --upgrade-check --output-folder tmp_stress_output --global-time-limit=1200 \ + && echo -e "Test script exit code$OK" >> /test_output/test_results.tsv \ + || echo -e "Test script failed$FAIL script exit code: $?" >> /test_output/test_results.tsv + +rm -rf tmp_stress_output + +# We experienced deadlocks in this command in very rare cases. Let's debug it: +timeout 10m clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables" || +( + echo "thread apply all backtrace (on select tables count)" >> /test_output/gdb.log + timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log + clickhouse stop --force +) + +# Use bigger timeout for previous version and disable additional hang check +stop 300 false +mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log + +# Install and start new server +install_packages package_folder +# Disable fault injections on start (we don't test them here, and it can lead to tons of requests in case of huge number of tables). +export ZOOKEEPER_FAULT_INJECTION=0 +configure +start 500 +clickhouse-client --query "SELECT 'Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \ + || (rg --text ".*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt \ + && echo -e "Server failed to start (see application_errors.txt and clickhouse-server.clean.log)$FAIL$(trim_server_logs application_errors.txt)" \ + >> /test_output/test_results.tsv) + +# Remove file application_errors.txt if it's empty +[ -s /test_output/application_errors.txt ] || rm /test_output/application_errors.txt + +clickhouse-client --query="SELECT 'Server version: ', version()" + +# Let the server run for a while before checking log. +sleep 60 + +stop +mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.upgrade.log + +# Error messages (we should ignore some errors) +# FIXME https://github.com/ClickHouse/ClickHouse/issues/38643 ("Unknown index: idx.") +# FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 ("Cannot parse string 'Hello' as UInt64") +# FIXME Not sure if it's expected, but some tests from stress test may not be finished yet when we restarting server. +# Let's just ignore all errors from queries ("} TCPHandler: Code:", "} executeQuery: Code:") +# FIXME https://github.com/ClickHouse/ClickHouse/issues/39197 ("Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'") +# NOTE Incompatibility was introduced in https://github.com/ClickHouse/ClickHouse/pull/39263, it's expected +# ("This engine is deprecated and is not supported in transactions", "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part") +# FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 - bad mutation does not indicate backward incompatibility +echo "Check for Error messages in server log:" +rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \ + -e "Code: 236. DB::Exception: Cancelled mutating parts" \ + -e "REPLICA_IS_ALREADY_ACTIVE" \ + -e "REPLICA_ALREADY_EXISTS" \ + -e "ALL_REPLICAS_LOST" \ + -e "DDLWorker: Cannot parse DDL task query" \ + -e "RaftInstance: failed to accept a rpc connection due to error 125" \ + -e "UNKNOWN_DATABASE" \ + -e "NETWORK_ERROR" \ + -e "UNKNOWN_TABLE" \ + -e "ZooKeeperClient" \ + -e "KEEPER_EXCEPTION" \ + -e "DirectoryMonitor" \ + -e "TABLE_IS_READ_ONLY" \ + -e "Code: 1000, e.code() = 111, Connection refused" \ + -e "UNFINISHED" \ + -e "NETLINK_ERROR" \ + -e "Renaming unexpected part" \ + -e "PART_IS_TEMPORARILY_LOCKED" \ + -e "and a merge is impossible: we didn't find" \ + -e "found in queue and some source parts for it was lost" \ + -e "is lost forever." \ + -e "Unknown index: idx." \ + -e "Cannot parse string 'Hello' as UInt64" \ + -e "} TCPHandler: Code:" \ + -e "} executeQuery: Code:" \ + -e "Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'" \ + -e "This engine is deprecated and is not supported in transactions" \ + -e "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part" \ + -e "The set of parts restored in place of" \ + -e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \ + -e "Code: 269. DB::Exception: Destination table is myself" \ + -e "Coordination::Exception: Connection loss" \ + -e "MutateFromLogEntryTask" \ + -e "No connection to ZooKeeper, cannot get shared table ID" \ + -e "Session expired" \ + -e "TOO_MANY_PARTS" \ + -e "Authentication failed" \ + -e "Container already exists" \ + /var/log/clickhouse-server/clickhouse-server.upgrade.log | zgrep -Fa "" > /test_output/upgrade_error_messages.txt \ + && echo -e "Error message in clickhouse-server.log (see upgrade_error_messages.txt)$FAIL$(head_escaped /test_output/bc_check_error_messages.txt)" \ + >> /test_output/test_results.tsv \ + || echo -e "No Error messages after server upgrade$OK" >> /test_output/test_results.tsv + +# Remove file upgrade_error_messages.txt if it's empty +[ -s /test_output/upgrade_error_messages.txt ] || rm /test_output/upgrade_error_messages.txt + +# Grep logs for sanitizer asserts, crashes and other critical errors +check_logs_for_critical_errors + +tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||: + +collect_query_and_trace_logs + +check_oom_in_dmesg mv /var/log/clickhouse-server/stderr.log /test_output/ -# If we failed to clone repo or download previous release packages, -# we don't have any packages installed, but we need clickhouse-local -# to be installed to create check_status.tsv. -if ! command -v clickhouse-local &> /dev/null -then - install_packages package_folder -fi - # Write check result into check_status.tsv # Try to choose most specific error for the whole check status -clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by +clickhouse-local --structure "test String, res String, time Nullable(Float32), desc String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (test like '%Sanitizer%') DESC, (test like '%Killed by signal%') DESC, (test like '%gdb.log%') DESC, @@ -504,7 +196,7 @@ clickhouse-local --structure "test String, res String" -q "SELECT 'failure', tes (test like '%Error message%') DESC, (test like '%previous release%') DESC, rowNumberInAllBlocks() -LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv +LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv [ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv # Core dumps diff --git a/tests/ci/stress_tests.lib b/tests/ci/stress_tests.lib new file mode 100644 index 00000000000..97269dc7d75 --- /dev/null +++ b/tests/ci/stress_tests.lib @@ -0,0 +1,305 @@ +#!/bin/bash + +# core.COMM.PID-TID +sysctl kernel.core_pattern='core.%e.%p-%P' + +OK="\tOK\t\\N\t" +FAIL="\tFAIL\t\\N\t" + +FAILURE_CONTEXT_LINES=50 +FAILURE_CONTEXT_MAX_LINE_WIDTH=400 + +function escaped() +{ + # That's the simplest way I found to escape a string in bash. Yep, bash is the most convenient programming language. + # Also limit lines width just in case (too long lines are not really useful usually) + clickhouse local -S 's String' --input-format=LineAsString -q "select substr(s, 1, $FAILURE_CONTEXT_MAX_LINE_WIDTH) + from table format CustomSeparated settings format_custom_row_after_delimiter='\\\\\\\\n'" +} +function head_escaped() +{ + head -n $FAILURE_CONTEXT_LINES $1 | escaped +} +function unts() +{ + grep -Po "[0-9][0-9]:[0-9][0-9] \K.*" +} +function trim_server_logs() +{ + head -n $FAILURE_CONTEXT_LINES "/test_output/$1" | grep -Eo " \[ [0-9]+ \] \{.*" | escaped +} + +function install_packages() +{ + dpkg -i $1/clickhouse-common-static_*.deb + dpkg -i $1/clickhouse-common-static-dbg_*.deb + dpkg -i $1/clickhouse-server_*.deb + dpkg -i $1/clickhouse-client_*.deb +} + +function configure() +{ + # install test configs + export USE_DATABASE_ORDINARY=1 + export EXPORT_S3_STORAGE_POLICIES=1 + /usr/share/clickhouse-test/config/install.sh + + # avoid too slow startup + sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \ + | sed "s|100000|10000|" \ + > /etc/clickhouse-server/config.d/keeper_port.xml.tmp + sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml + sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml + sudo chgrp clickhouse /etc/clickhouse-server/config.d/keeper_port.xml + + # for clickhouse-server (via service) + echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment + # for clickhouse-client + export ASAN_OPTIONS='malloc_context_size=10 allocator_release_to_os_interval_ms=10000' + + # since we run clickhouse from root + sudo chown root: /var/lib/clickhouse + + # Set more frequent update period of asynchronous metrics to more frequently update information about real memory usage (less chance of OOM). + echo "1" \ + > /etc/clickhouse-server/config.d/asynchronous_metrics_update_period_s.xml + + local total_mem + total_mem=$(awk '/MemTotal/ { print $(NF-1) }' /proc/meminfo) # KiB + total_mem=$(( total_mem*1024 )) # bytes + + # Set maximum memory usage as half of total memory (less chance of OOM). + # + # But not via max_server_memory_usage but via max_memory_usage_for_user, + # so that we can override this setting and execute service queries, like: + # - hung check + # - show/drop database + # - ... + # + # So max_memory_usage_for_user will be a soft limit, and + # max_server_memory_usage will be hard limit, and queries that should be + # executed regardless memory limits will use max_memory_usage_for_user=0, + # instead of relying on max_untracked_memory + + max_server_memory_usage_to_ram_ratio=0.5 + echo "Setting max_server_memory_usage_to_ram_ratio to ${max_server_memory_usage_to_ram_ratio}" + cat > /etc/clickhouse-server/config.d/max_server_memory_usage.xml < + ${max_server_memory_usage_to_ram_ratio} + +EOL + + local max_users_mem + max_users_mem=$((total_mem*30/100)) # 30% + echo "Setting max_memory_usage_for_user=$max_users_mem and max_memory_usage for queries to 10G" + cat > /etc/clickhouse-server/users.d/max_memory_usage_for_user.xml < + + + 10G + ${max_users_mem} + + + +EOL + + cat > /etc/clickhouse-server/config.d/core.xml < + + + 107374182400 + + + $PWD + +EOL + + # Analyzer is not yet ready for testing + cat > /etc/clickhouse-server/users.d/no_analyzer.xml < + + + + + + + + + + +EOL + +} + +function stop() +{ + local max_tries="${1:-90}" + local check_hang="${2:-true}" + local pid + # Preserve the pid, since the server can hung after the PID will be deleted. + pid="$(cat /var/run/clickhouse-server/clickhouse-server.pid)" + + clickhouse stop --max-tries "$max_tries" --do-not-kill && return + + if [ $check_hang == true ] then + # We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces. + echo -e "Possible deadlock on shutdown (see gdb.log)$FAIL" >> /test_output/test_results.tsv + kill -TERM "$(pidof gdb)" ||: + sleep 5 + echo "thread apply all backtrace (on stop)" >> /test_output/gdb.log + timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$pid" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log + clickhouse stop --force + fi +} + +function start() +{ + counter=0 + until clickhouse-client --query "SELECT 1" + do + if [ "$counter" -gt ${1:-120} ] + then + echo "Cannot start clickhouse-server" + rg --text ".*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt ||: + echo -e "Cannot start clickhouse-server$FAIL$(trim_server_logs application_errors.txt)" >> /test_output/test_results.tsv + cat /var/log/clickhouse-server/stdout.log + tail -n100 /var/log/clickhouse-server/stderr.log + tail -n100000 /var/log/clickhouse-server/clickhouse-server.log | rg -F -v -e ' RaftInstance:' -e ' RaftInstance' | tail -n100 + break + fi + # use root to match with current uid + clickhouse start --user root >/var/log/clickhouse-server/stdout.log 2>>/var/log/clickhouse-server/stderr.log + sleep 0.5 + counter=$((counter + 1)) + done + + # Set follow-fork-mode to parent, because we attach to clickhouse-server, not to watchdog + # and clickhouse-server can do fork-exec, for example, to run some bridge. + # Do not set nostop noprint for all signals, because some it may cause gdb to hang, + # explicitly ignore non-fatal signals that are used by server. + # Number of SIGRTMIN can be determined only in runtime. + RTMIN=$(kill -l SIGRTMIN) + echo " +set follow-fork-mode parent +handle SIGHUP nostop noprint pass +handle SIGINT nostop noprint pass +handle SIGQUIT nostop noprint pass +handle SIGPIPE nostop noprint pass +handle SIGTERM nostop noprint pass +handle SIGUSR1 nostop noprint pass +handle SIGUSR2 nostop noprint pass +handle SIG$RTMIN nostop noprint pass +info signals +continue +backtrace full +thread apply all backtrace full +info registers +disassemble /s +up +disassemble /s +up +disassemble /s +p \"done\" +detach +quit +" > script.gdb + + # FIXME Hung check may work incorrectly because of attached gdb + # 1. False positives are possible + # 2. We cannot attach another gdb to get stacktraces if some queries hung + gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log & + sleep 5 + # gdb will send SIGSTOP, spend some time loading debug info and then send SIGCONT, wait for it (up to send_timeout, 300s) + time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||: +} + +function check_server_start() +{ + clickhouse-client --query "SELECT 'Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \ + || (rg --text ".*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt \ + && echo -e "Server failed to start (see application_errors.txt and clickhouse-server.clean.log)$FAIL$(trim_server_logs application_errors.txt)" \ + >> /test_output/test_results.tsv) + + # Remove file application_errors.txt if it's empty + [ -s /test_output/application_errors.txt ] || rm /test_output/application_errors.txt +} + +function check_logs_for_critical_errors() +{ + # Sanitizer asserts + rg -Fa "==================" /var/log/clickhouse-server/stderr.log | rg -v "in query:" >> /test_output/tmp + rg -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp + rg -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \ + && echo -e "Sanitizer assert (in stderr.log)$FAIL$(head_escaped /test_output/tmp)" >> /test_output/test_results.tsv \ + || echo -e "No sanitizer asserts$OK" >> /test_output/test_results.tsv + rm -f /test_output/tmp + + # OOM + rg -Fa " Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \ + && echo -e "Signal 9 in clickhouse-server.log$FAIL" >> /test_output/test_results.tsv \ + || echo -e "No OOM messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv + + # Logical errors + rg -Fa "Code: 49. DB::Exception: " /var/log/clickhouse-server/clickhouse-server*.log > /test_output/logical_errors.txt \ + && echo -e "Logical error thrown (see clickhouse-server.log or logical_errors.txt)$FAIL$(head_escaped /test_output/logical_errors.txt)" >> /test_output/test_results.tsv \ + || echo -e "No logical errors$OK" >> /test_output/test_results.tsv + # Remove file logical_errors.txt if it's empty + [ -s /test_output/logical_errors.txt ] || rm /test_output/logical_errors.txt + + # No such key errors + rg --text "Code: 499.*The specified key does not exist" /var/log/clickhouse-server/clickhouse-server*.log > /test_output/no_such_key_errors.txt \ + && echo -e "S3_ERROR No such key thrown (see clickhouse-server.log or no_such_key_errors.txt)$FAIL$(trim_server_logs no_such_key_errors.txt)" >> /test_output/test_results.tsv \ + || echo -e "No lost s3 keys$OK" >> /test_output/test_results.tsv + + # Remove file no_such_key_errors.txt if it's empty + [ -s /test_output/no_such_key_errors.txt ] || rm /test_output/no_such_key_errors.txt + + # Crash + rg -Fa "########################################" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \ + && echo -e "Killed by signal (in clickhouse-server.log)$FAIL" >> /test_output/test_results.tsv \ + || echo -e "Not crashed$OK" >> /test_output/test_results.tsv + + # It also checks for crash without stacktrace (printed by watchdog) + rg -Fa " " /var/log/clickhouse-server/clickhouse-server*.log > /test_output/fatal_messages.txt \ + && echo -e "Fatal message in clickhouse-server.log (see fatal_messages.txt)$FAIL$(trim_server_logs fatal_messages.txt)" >> /test_output/test_results.tsv \ + || echo -e "No fatal messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv + + # Remove file fatal_messages.txt if it's empty + [ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt + + rg -Fa "########################################" /test_output/* > /dev/null \ + && echo -e "Killed by signal (output files)$FAIL" >> /test_output/test_results.tsv + + function get_gdb_log_context() + { + rg -A50 -Fa " received signal " /test_output/gdb.log | head_escaped + } + + rg -Fa " received signal " /test_output/gdb.log > /dev/null \ + && echo -e "Found signal in gdb.log$FAIL$(get_gdb_log_context)" >> /test_output/test_results.tsv + + dmesg -T > /test_output/dmesg.log + + # OOM in dmesg -- those are real + grep -q -F -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE' /test_output/dmesg.log \ + && echo -e "OOM in dmesg$FAIL$(head_escaped /test_output/dmesg.log)" >> /test_output/test_results.tsv \ + || echo -e "No OOM in dmesg$OK" >> /test_output/test_results.tsv +} + +function collect_query_and_trace_logs() +{ + for table in query_log trace_log + do + clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||: + done +} + +function collect_core_dumps() +{ + find . -type f -maxdepth 1 -name 'core.*' | while read core; do + zstd --threads=0 $core + mv $core.zst /test_output/ + done +} From e52a995e467d08b03de1bdd279141bcac9eed292 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 24 Feb 2023 16:58:36 +0000 Subject: [PATCH 36/83] Fix style --- docker/test/upgrade/run.sh | 2 ++ tests/ci/stress.py | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index db1f5ae995f..733d3a79d9f 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -14,6 +14,8 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test ln -s /usr/share/clickhouse-test/ci/download_release_packages.py /usr/bin/download_release_packages ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag +# Stress tests and upgrade check uses similar code that was placed +# in a separate bash library. See tests/ci/stress_tests.lib source /usr/share/clickhouse-test/ci/stress_tests.lib azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & diff --git a/tests/ci/stress.py b/tests/ci/stress.py index 0b6bd2349a6..5a46c4cb2bd 100755 --- a/tests/ci/stress.py +++ b/tests/ci/stress.py @@ -75,8 +75,8 @@ def run_func_test( for i in range(num_processes) ] pipes = [] - for i in range(0, len(output_paths)): - f = open(output_paths[i], "w") + for i, path in enumerate(output_paths): + f = open(path, "w") full_command = "{} {} {} {} {}".format( cmd, get_options(i, upgrade_check), From ff67fc4b06aff50cf5799118f8cc3d51f498c65a Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 24 Feb 2023 16:59:27 +0000 Subject: [PATCH 37/83] Remove llvm-9 from dependencies --- docker/test/stress/Dockerfile | 1 - docker/test/upgrade/Dockerfile | 1 - 2 files changed, 2 deletions(-) diff --git a/docker/test/stress/Dockerfile b/docker/test/stress/Dockerfile index 1cabea58a65..e9712f430fd 100644 --- a/docker/test/stress/Dockerfile +++ b/docker/test/stress/Dockerfile @@ -21,7 +21,6 @@ RUN apt-get update -y \ openssl \ netcat-openbsd \ telnet \ - llvm-9 \ brotli \ && apt-get clean diff --git a/docker/test/upgrade/Dockerfile b/docker/test/upgrade/Dockerfile index a91088fb01e..8e5890b81a0 100644 --- a/docker/test/upgrade/Dockerfile +++ b/docker/test/upgrade/Dockerfile @@ -21,7 +21,6 @@ RUN apt-get update -y \ openssl \ netcat-openbsd \ telnet \ - llvm-9 \ brotli \ && apt-get clean From e77dd810369ad5fcf957393e4fc71a8a6220b04e Mon Sep 17 00:00:00 2001 From: Han Fei Date: Fri, 24 Feb 2023 19:48:46 +0100 Subject: [PATCH 38/83] fix --- src/Dictionaries/RegExpTreeDictionary.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Dictionaries/RegExpTreeDictionary.cpp b/src/Dictionaries/RegExpTreeDictionary.cpp index 5775bb3790d..caba2a52a51 100644 --- a/src/Dictionaries/RegExpTreeDictionary.cpp +++ b/src/Dictionaries/RegExpTreeDictionary.cpp @@ -509,7 +509,8 @@ std::unordered_map RegExpTreeDictionary::match( if (node_ptr->match(reinterpret_cast(keys_data.data()) + offset, length)) { match_result.insertNodeID(node_ptr->id); - if (node_ptr->containsAll(attributes)) + /// When this node is leaf and contains all the required attributes, it means a match. + if (node_ptr->containsAll(attributes) && node_ptr->children.empty()) break; } } From da326dda03069b56033f6d310b41f0c78c289d19 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 24 Feb 2023 19:19:51 +0000 Subject: [PATCH 39/83] comments fix --- src/Common/SettingsChanges.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/SettingsChanges.h b/src/Common/SettingsChanges.h index 61f5a1c7eba..514e9a78911 100644 --- a/src/Common/SettingsChanges.h +++ b/src/Common/SettingsChanges.h @@ -29,11 +29,11 @@ public: const Field * tryGet(std::string_view name) const; Field * tryGet(std::string_view name); - /// Inserts element if doesn't exists and returns true, otherwise just returns false + /// Inserts element if doesn't exists and returns true, otherwise just returns false bool insertSetting(std::string_view name, const Field & value); /// Sets element to value, inserts if doesn't exist void setSetting(std::string_view name, const Field & value); - /// If element exists - removes it and returns true, otherwise returns false + /// If element exists - removes it and returns true, otherwise returns false bool removeSetting(std::string_view name); }; From dce319e1c5752c58d222b79d8c5a3c17761721c8 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Mon, 20 Feb 2023 21:30:57 +0100 Subject: [PATCH 40/83] add multitreading for StorageSystemDetachedParts --- .../System/StorageSystemDetachedParts.cpp | 300 +++++++++++++----- 1 file changed, 227 insertions(+), 73 deletions(-) diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index 494f9c9c31f..2da3fb3b988 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -8,10 +8,218 @@ #include #include #include +#include +#include + +#include namespace DB { +namespace +{ + +static void partFilesOnDiskImpl(const DiskPtr & disk, const String & from, std::vector & files) +{ + if (disk->isFile(from)) + { + files.push_back(from); + } + else + { + for (auto it = disk->iterateDirectory(from); it->isValid(); it->next()) + partFilesOnDiskImpl(disk, fs::path(from) / it->name(), files); + } +} + +static std::vector partFilesOnDisk(const DiskPtr & disk, const String & from) +{ + std::vector files; + try + { + partFilesOnDiskImpl(disk, from, files); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + return files; +} + +class SourceState +{ + std::mutex mutex; + StoragesInfoStream stream; + +public: + SourceState(StoragesInfoStream && stream_) + : stream(std::move(stream_)) + {} + + StoragesInfo next() + { + std::lock_guard lock(mutex); + return stream.next(); + } +}; + +class DetachedPartsSource : public ISource +{ +public: + DetachedPartsSource(Block header_, std::shared_ptr state_, std::vector columns_mask_, UInt64 block_size_, + bool has_bytes_on_disk_column_) + : ISource(std::move(header_)) + , state(state_) + , columns_mask(std::move(columns_mask_)) + , block_size(block_size_) + , has_bytes_on_disk_column(has_bytes_on_disk_column_) + {} + + String getName() const override { return "DataPartsSource"; } + +protected: + Chunk generate() override + { + Chunk result; + + while (result.getNumRows() < block_size) + { + if (detached_parts.empty()) + get_more_parts(); + + if (detached_parts.empty()) + { + progress(result.getNumRows(), result.bytes()); + return result; + } + + Chunk chunk = generate_chunk(block_size - result.getNumRows()); + + if (result) + result.append(chunk); + else + result = std::move(chunk); + } + + progress(result.getNumRows(), result.bytes()); + return result; + } + +private: + std::shared_ptr state; + const std::vector columns_mask; + const UInt64 block_size; + const bool has_bytes_on_disk_column; + + StoragesInfo current_info; + DetachedPartsInfo detached_parts; + + void get_more_parts() + { + chassert(detached_parts.empty()); + + current_info = state->next(); + if (!current_info) + return; + + detached_parts = current_info.data->getDetachedParts(); + } + + Chunk generate_chunk(size_t max_rows) + { + chassert(current_info); + + auto rows = std::min(max_rows, detached_parts.size()); + auto begin = detached_parts.size() - rows; + + std::vector> parts_sizes(rows); + + if (has_bytes_on_disk_column) + { + std::vector> futures; + SCOPE_EXIT_SAFE({ + /// Exceptions are not propagated + for (auto & future : futures) + if (future.valid()) + future.wait(); + futures.clear(); + }); + + for (auto p_id = begin; p_id < detached_parts.size(); ++p_id) + { + auto & p = detached_parts.at(p_id); + DiskPtr & disk = p.disk; + + const String part_path = fs::path(MergeTreeData::DETACHED_DIR_NAME) / p.dir_name; + const String relative_path = fs::path(current_info.data->getRelativeDataPath()) / part_path; + + std::vector listing = partFilesOnDisk(disk, relative_path); + chassert(listing.size() > 0); + auto * counter = &parts_sizes[p_id - begin]; + for (const auto & file : listing) + { + futures.push_back( + scheduleFromThreadPool( + [disk, file, counter] () + { + size_t size = disk->getFileSize(file); + counter->fetch_add(size); + }, + IOThreadPool::get(), + "DP_BytesOnDisk")); + } + } + + /// Exceptions are propagated + for (auto & future : futures) + future.get(); + } + + MutableColumns new_columns = getPort().getHeader().cloneEmptyColumns(); + + for (auto p_id = begin; p_id < detached_parts.size(); ++p_id) + { + auto & p = detached_parts.at(p_id); + + size_t src_index = 0; + size_t res_index = 0; + String detached_part_path = fs::path(MergeTreeData::DETACHED_DIR_NAME) / p.dir_name; + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(current_info.database); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(current_info.table); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(p.valid_name ? p.partition_id : Field()); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(p.dir_name); + if (columns_mask[src_index++]) + { + chassert(has_bytes_on_disk_column); + size_t bytes_on_disk = parts_sizes.at(p_id - begin).load(); + new_columns[res_index++]->insert(bytes_on_disk); + } + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(p.disk->getName()); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert((fs::path(current_info.data->getFullPathOnDisk(p.disk)) / detached_part_path).string()); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(p.valid_name ? p.prefix : Field()); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(p.valid_name ? p.min_block : Field()); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(p.valid_name ? p.max_block : Field()); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(p.valid_name ? p.level : Field()); + } + + detached_parts.resize(begin); + + return {std::move(new_columns), rows}; + } +}; + +} + StorageSystemDetachedParts::StorageSystemDetachedParts(const StorageID & table_id_) : IStorage(table_id_) { @@ -31,33 +239,6 @@ StorageSystemDetachedParts::StorageSystemDetachedParts(const StorageID & table_i }}); setInMemoryMetadata(storage_metadata); } -static void calculateTotalSizeOnDiskImpl(const DiskPtr & disk, const String & from, UInt64 & total_size) -{ - /// Files or directories of detached part may not exist. Only count the size of existing files. - if (disk->isFile(from)) - { - total_size += disk->getFileSize(from); - } - else - { - for (auto it = disk->iterateDirectory(from); it->isValid(); it->next()) - calculateTotalSizeOnDiskImpl(disk, fs::path(from) / it->name(), total_size); - } -} - -static UInt64 calculateTotalSizeOnDisk(const DiskPtr & disk, const String & from) -{ - UInt64 total_size = 0; - try - { - calculateTotalSizeOnDiskImpl(disk, from, total_size); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - return total_size; -} Pipe StorageSystemDetachedParts::read( const Names & column_names, @@ -65,66 +246,39 @@ Pipe StorageSystemDetachedParts::read( SelectQueryInfo & query_info, ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, - const size_t /*max_block_size*/, - const size_t /*num_streams*/) + const size_t max_block_size, + const size_t num_streams) { storage_snapshot->check(column_names); - - StoragesInfoStream stream(query_info, context); - - /// Create the result. - Block block = storage_snapshot->metadata->getSampleBlock(); + Block sample_block = storage_snapshot->metadata->getSampleBlock(); NameSet names_set(column_names.begin(), column_names.end()); - std::vector columns_mask(block.columns()); - Block header; - for (size_t i = 0; i < block.columns(); ++i) + Block header; + std::vector columns_mask(sample_block.columns()); + + for (size_t i = 0; i < columns_mask.size(); ++i) { - if (names_set.contains(block.getByPosition(i).name)) + if (names_set.contains(sample_block.getByPosition(i).name)) { columns_mask[i] = 1; - header.insert(block.getByPosition(i)); + header.insert(sample_block.getByPosition(i)); } } - MutableColumns new_columns = header.cloneEmptyColumns(); - while (StoragesInfo info = stream.next()) + bool has_bytes_on_disk_column = names_set.contains("bytes_on_disk"); + + auto state = std::make_shared(StoragesInfoStream(query_info, context)); + + Pipe pipe; + + for (size_t i = 0; i < num_streams; ++i) { - const auto parts = info.data->getDetachedParts(); - for (const auto & p : parts) - { - size_t src_index = 0, res_index = 0; - String detached_part_path = fs::path(MergeTreeData::DETACHED_DIR_NAME) / p.dir_name; - if (columns_mask[src_index++]) - new_columns[res_index++]->insert(info.database); - if (columns_mask[src_index++]) - new_columns[res_index++]->insert(info.table); - if (columns_mask[src_index++]) - new_columns[res_index++]->insert(p.valid_name ? p.partition_id : Field()); - if (columns_mask[src_index++]) - new_columns[res_index++]->insert(p.dir_name); - if (columns_mask[src_index++]) - new_columns[res_index++]->insert(calculateTotalSizeOnDisk(p.disk, fs::path(info.data->getRelativeDataPath()) / detached_part_path)); - if (columns_mask[src_index++]) - new_columns[res_index++]->insert(p.disk->getName()); - if (columns_mask[src_index++]) - new_columns[res_index++]->insert((fs::path(info.data->getFullPathOnDisk(p.disk)) / detached_part_path).string()); - if (columns_mask[src_index++]) - new_columns[res_index++]->insert(p.valid_name ? p.prefix : Field()); - if (columns_mask[src_index++]) - new_columns[res_index++]->insert(p.valid_name ? p.min_block : Field()); - if (columns_mask[src_index++]) - new_columns[res_index++]->insert(p.valid_name ? p.max_block : Field()); - if (columns_mask[src_index++]) - new_columns[res_index++]->insert(p.valid_name ? p.level : Field()); - } + auto source = std::make_shared(header.cloneEmpty(), state, columns_mask, max_block_size, has_bytes_on_disk_column); + pipe.addSource(std::move(source)); } - UInt64 num_rows = new_columns.at(0)->size(); - Chunk chunk(std::move(new_columns), num_rows); - - return Pipe(std::make_shared(std::move(header), std::move(chunk))); + return pipe; } } From 8abc1f0d5deaf6442c0bda4e5a4b19b8f5411da1 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 21 Feb 2023 11:37:13 +0100 Subject: [PATCH 41/83] fix build and style --- .../System/StorageSystemDetachedParts.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index 2da3fb3b988..0b92dc0dd3c 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -19,7 +19,7 @@ namespace DB namespace { -static void partFilesOnDiskImpl(const DiskPtr & disk, const String & from, std::vector & files) +void partFilesOnDiskImpl(const DiskPtr & disk, const String & from, std::vector & files) { if (disk->isFile(from)) { @@ -32,7 +32,7 @@ static void partFilesOnDiskImpl(const DiskPtr & disk, const String & from, std:: } } -static std::vector partFilesOnDisk(const DiskPtr & disk, const String & from) +std::vector partFilesOnDisk(const DiskPtr & disk, const String & from) { std::vector files; try @@ -85,7 +85,7 @@ protected: while (result.getNumRows() < block_size) { if (detached_parts.empty()) - get_more_parts(); + getMoreParts(); if (detached_parts.empty()) { @@ -93,7 +93,7 @@ protected: return result; } - Chunk chunk = generate_chunk(block_size - result.getNumRows()); + Chunk chunk = generateChunk(block_size - result.getNumRows()); if (result) result.append(chunk); @@ -114,7 +114,7 @@ private: StoragesInfo current_info; DetachedPartsInfo detached_parts; - void get_more_parts() + void getMoreParts() { chassert(detached_parts.empty()); @@ -125,7 +125,7 @@ private: detached_parts = current_info.data->getDetachedParts(); } - Chunk generate_chunk(size_t max_rows) + Chunk generateChunk(size_t max_rows) { chassert(current_info); @@ -152,10 +152,9 @@ private: const String part_path = fs::path(MergeTreeData::DETACHED_DIR_NAME) / p.dir_name; const String relative_path = fs::path(current_info.data->getRelativeDataPath()) / part_path; + auto * counter = &parts_sizes[p_id - begin]; std::vector listing = partFilesOnDisk(disk, relative_path); - chassert(listing.size() > 0); - auto * counter = &parts_sizes[p_id - begin]; for (const auto & file : listing) { futures.push_back( From 3b582493204ea26c7a3b48b2a18d7e1a86c9af96 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 21 Feb 2023 16:19:28 +0100 Subject: [PATCH 42/83] pack several files in one thread --- .../System/StorageSystemDetachedParts.cpp | 89 ++++++++++--------- 1 file changed, 49 insertions(+), 40 deletions(-) diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index 0b92dc0dd3c..6e40a3a3130 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -110,6 +110,7 @@ private: const std::vector columns_mask; const UInt64 block_size; const bool has_bytes_on_disk_column; + static const size_t files_peer_thread = 15; StoragesInfo current_info; DetachedPartsInfo detached_parts; @@ -125,6 +126,53 @@ private: detached_parts = current_info.data->getDetachedParts(); } + void calculatePartSizeOnDisk(size_t begin, std::vector> & parts_sizes) + { + if (!has_bytes_on_disk_column) + return; + + std::vector> futures; + SCOPE_EXIT_SAFE({ + /// Exceptions are not propagated + for (auto & future : futures) + if (future.valid()) + future.wait(); + futures.clear(); + }); + + for (auto p_id = begin; p_id < detached_parts.size(); ++p_id) + { + auto & p = detached_parts.at(p_id); + DiskPtr & disk = p.disk; + + const String part_path = fs::path(MergeTreeData::DETACHED_DIR_NAME) / p.dir_name; + const String relative_path = fs::path(current_info.data->getRelativeDataPath()) / part_path; + std::vector listing = partFilesOnDisk(disk, relative_path); + + auto * counter = &parts_sizes[p_id - begin]; + for (size_t slice_begin = 0; slice_begin < listing.size(); slice_begin += files_peer_thread) + { + size_t slice_end = std::min(slice_begin + files_peer_thread, listing.size()); + auto slice = std::vector(listing.begin() + slice_begin, listing.begin() + slice_end); + futures.push_back( + scheduleFromThreadPool( + [disk, counter, files = std::move(slice)] () + { + size_t partial_files_size = 0; + for (const auto & file : files) + partial_files_size += disk->getFileSize(file); + counter->fetch_add(partial_files_size); + }, + IOThreadPool::get(), + "DP_BytesOnDisk")); + } + } + + /// Exceptions are propagated + for (auto & future : futures) + future.get(); + } + Chunk generateChunk(size_t max_rows) { chassert(current_info); @@ -133,46 +181,7 @@ private: auto begin = detached_parts.size() - rows; std::vector> parts_sizes(rows); - - if (has_bytes_on_disk_column) - { - std::vector> futures; - SCOPE_EXIT_SAFE({ - /// Exceptions are not propagated - for (auto & future : futures) - if (future.valid()) - future.wait(); - futures.clear(); - }); - - for (auto p_id = begin; p_id < detached_parts.size(); ++p_id) - { - auto & p = detached_parts.at(p_id); - DiskPtr & disk = p.disk; - - const String part_path = fs::path(MergeTreeData::DETACHED_DIR_NAME) / p.dir_name; - const String relative_path = fs::path(current_info.data->getRelativeDataPath()) / part_path; - auto * counter = &parts_sizes[p_id - begin]; - - std::vector listing = partFilesOnDisk(disk, relative_path); - for (const auto & file : listing) - { - futures.push_back( - scheduleFromThreadPool( - [disk, file, counter] () - { - size_t size = disk->getFileSize(file); - counter->fetch_add(size); - }, - IOThreadPool::get(), - "DP_BytesOnDisk")); - } - } - - /// Exceptions are propagated - for (auto & future : futures) - future.get(); - } + calculatePartSizeOnDisk(begin, parts_sizes); MutableColumns new_columns = getPort().getHeader().cloneEmptyColumns(); From ef0c1841af125641b6822b899b22bee897f073ca Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 21 Feb 2023 22:55:58 +0100 Subject: [PATCH 43/83] work with comments on review --- .../System/StorageSystemDetachedParts.cpp | 39 +++++++++---------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index 6e40a3a3130..2fae788298c 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -78,31 +78,34 @@ public: String getName() const override { return "DataPartsSource"; } protected: + Chunk nullWhenNoRows(MutableColumns && new_columns) + { + chassert(!new_columns.empty()); + const auto rows = new_columns[0]->size(); + + if (!rows) + return {}; + + return {std::move(new_columns), rows}; + } + Chunk generate() override { - Chunk result; + MutableColumns new_columns = getPort().getHeader().cloneEmptyColumns(); + chassert(!new_columns.empty()); - while (result.getNumRows() < block_size) + while (new_columns[0]->size() < block_size) { if (detached_parts.empty()) getMoreParts(); if (detached_parts.empty()) - { - progress(result.getNumRows(), result.bytes()); - return result; - } + return nullWhenNoRows(std::move(new_columns)); - Chunk chunk = generateChunk(block_size - result.getNumRows()); - - if (result) - result.append(chunk); - else - result = std::move(chunk); + generateRows(new_columns, block_size - new_columns[0]->size()); } - progress(result.getNumRows(), result.bytes()); - return result; + return nullWhenNoRows(std::move(new_columns)); } private: @@ -110,7 +113,7 @@ private: const std::vector columns_mask; const UInt64 block_size; const bool has_bytes_on_disk_column; - static const size_t files_peer_thread = 15; + static const size_t files_peer_thread = 30; StoragesInfo current_info; DetachedPartsInfo detached_parts; @@ -173,7 +176,7 @@ private: future.get(); } - Chunk generateChunk(size_t max_rows) + void generateRows(MutableColumns & new_columns, size_t max_rows) { chassert(current_info); @@ -183,8 +186,6 @@ private: std::vector> parts_sizes(rows); calculatePartSizeOnDisk(begin, parts_sizes); - MutableColumns new_columns = getPort().getHeader().cloneEmptyColumns(); - for (auto p_id = begin; p_id < detached_parts.size(); ++p_id) { auto & p = detached_parts.at(p_id); @@ -221,8 +222,6 @@ private: } detached_parts.resize(begin); - - return {std::move(new_columns), rows}; } }; From e5cbe4311eaae25d91aa5470690baf913029f17e Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 22 Feb 2023 14:14:25 +0100 Subject: [PATCH 44/83] create limites count of support threads for a block --- .../System/StorageSystemDetachedParts.cpp | 76 ++++++++++++------- 1 file changed, 47 insertions(+), 29 deletions(-) diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index 2fae788298c..9ca0c921358 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -52,7 +52,7 @@ class SourceState StoragesInfoStream stream; public: - SourceState(StoragesInfoStream && stream_) + explicit SourceState(StoragesInfoStream && stream_) : stream(std::move(stream_)) {} @@ -78,7 +78,7 @@ public: String getName() const override { return "DataPartsSource"; } protected: - Chunk nullWhenNoRows(MutableColumns && new_columns) + static Chunk nullWhenNoRows(MutableColumns && new_columns) { chassert(!new_columns.empty()); const auto rows = new_columns[0]->size(); @@ -113,7 +113,7 @@ private: const std::vector columns_mask; const UInt64 block_size; const bool has_bytes_on_disk_column; - static const size_t files_peer_thread = 30; + static const size_t support_threads = 35; StoragesInfo current_info; DetachedPartsInfo detached_parts; @@ -134,6 +134,36 @@ private: if (!has_bytes_on_disk_column) return; + struct Task + { + DiskPtr disk; + String file; + std::atomic * counter = nullptr; + }; + + struct SharedState + { + std::vector tasks; + std::atomic next_task = {0}; + }; + + SharedState shared_state; + + for (auto p_id = begin; p_id < detached_parts.size(); ++p_id) + { + auto & p = detached_parts.at(p_id); + auto & disk = p.disk; + + auto part_path = fs::path(MergeTreeData::DETACHED_DIR_NAME) / p.dir_name; + auto relative_path = fs::path(current_info.data->getRelativeDataPath()) / part_path; + auto listing = partFilesOnDisk(disk, relative_path); + + auto * counter = &parts_sizes[p_id - begin]; + + for (auto & file : listing) + shared_state.tasks.push_back({disk, file, counter}); + } + std::vector> futures; SCOPE_EXIT_SAFE({ /// Exceptions are not propagated @@ -143,32 +173,21 @@ private: futures.clear(); }); - for (auto p_id = begin; p_id < detached_parts.size(); ++p_id) + for (size_t i = 0; i < support_threads; ++i) { - auto & p = detached_parts.at(p_id); - DiskPtr & disk = p.disk; - - const String part_path = fs::path(MergeTreeData::DETACHED_DIR_NAME) / p.dir_name; - const String relative_path = fs::path(current_info.data->getRelativeDataPath()) / part_path; - std::vector listing = partFilesOnDisk(disk, relative_path); - - auto * counter = &parts_sizes[p_id - begin]; - for (size_t slice_begin = 0; slice_begin < listing.size(); slice_begin += files_peer_thread) - { - size_t slice_end = std::min(slice_begin + files_peer_thread, listing.size()); - auto slice = std::vector(listing.begin() + slice_begin, listing.begin() + slice_end); - futures.push_back( - scheduleFromThreadPool( - [disk, counter, files = std::move(slice)] () + futures.push_back( + scheduleFromThreadPool( + [&shared_state] () + { + for (auto id = shared_state.next_task++; id < shared_state.tasks.size(); id = shared_state.next_task++) { - size_t partial_files_size = 0; - for (const auto & file : files) - partial_files_size += disk->getFileSize(file); - counter->fetch_add(partial_files_size); - }, - IOThreadPool::get(), - "DP_BytesOnDisk")); - } + auto & task = shared_state.tasks.at(id); + auto size = task.disk->getFileSize(task.file); + task.counter->fetch_add(size); + } + }, + IOThreadPool::get(), + "DP_BytesOnDisk")); } /// Exceptions are propagated @@ -192,7 +211,6 @@ private: size_t src_index = 0; size_t res_index = 0; - String detached_part_path = fs::path(MergeTreeData::DETACHED_DIR_NAME) / p.dir_name; if (columns_mask[src_index++]) new_columns[res_index++]->insert(current_info.database); if (columns_mask[src_index++]) @@ -210,7 +228,7 @@ private: if (columns_mask[src_index++]) new_columns[res_index++]->insert(p.disk->getName()); if (columns_mask[src_index++]) - new_columns[res_index++]->insert((fs::path(current_info.data->getFullPathOnDisk(p.disk)) / detached_part_path).string()); + new_columns[res_index++]->insert((fs::path(current_info.data->getFullPathOnDisk(p.disk)) / MergeTreeData::DETACHED_DIR_NAME / p.dir_name).string()); if (columns_mask[src_index++]) new_columns[res_index++]->insert(p.valid_name ? p.prefix : Field()); if (columns_mask[src_index++]) From 08dc874a37fdb22827794e152eba212791718dee Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 22 Feb 2023 16:21:08 +0100 Subject: [PATCH 45/83] do not run support threads if no tasks left --- .../System/StorageSystemDetachedParts.cpp | 64 ++++++++++--------- 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index 9ca0c921358..f03e1542b03 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -63,6 +63,19 @@ public: } }; +struct WorkerState +{ + struct Task + { + DiskPtr disk; + String file; + std::atomic * counter = nullptr; + }; + + std::vector tasks; + std::atomic next_task = {0}; +}; + class DetachedPartsSource : public ISource { public: @@ -113,7 +126,7 @@ private: const std::vector columns_mask; const UInt64 block_size; const bool has_bytes_on_disk_column; - static const size_t support_threads = 35; + const size_t support_threads = 35; StoragesInfo current_info; DetachedPartsInfo detached_parts; @@ -134,20 +147,7 @@ private: if (!has_bytes_on_disk_column) return; - struct Task - { - DiskPtr disk; - String file; - std::atomic * counter = nullptr; - }; - - struct SharedState - { - std::vector tasks; - std::atomic next_task = {0}; - }; - - SharedState shared_state; + WorkerState worker_state; for (auto p_id = begin; p_id < detached_parts.size(); ++p_id) { @@ -161,7 +161,7 @@ private: auto * counter = &parts_sizes[p_id - begin]; for (auto & file : listing) - shared_state.tasks.push_back({disk, file, counter}); + worker_state.tasks.push_back({disk, file, counter}); } std::vector> futures; @@ -173,21 +173,27 @@ private: futures.clear(); }); - for (size_t i = 0; i < support_threads; ++i) + auto max_thread_to_run = std::min(support_threads, worker_state.tasks.size() / 10); + for (size_t i = 0; i < max_thread_to_run; ++i) { + if (worker_state.next_task.load() >= worker_state.tasks.size()) + break; + + auto worker = [&worker_state] () + { + for (auto id = worker_state.next_task++; id < worker_state.tasks.size(); id = worker_state.next_task++) + { + auto & task = worker_state.tasks.at(id); + auto size = task.disk->getFileSize(task.file); + task.counter->fetch_add(size); + } + }; + futures.push_back( - scheduleFromThreadPool( - [&shared_state] () - { - for (auto id = shared_state.next_task++; id < shared_state.tasks.size(); id = shared_state.next_task++) - { - auto & task = shared_state.tasks.at(id); - auto size = task.disk->getFileSize(task.file); - task.counter->fetch_add(size); - } - }, - IOThreadPool::get(), - "DP_BytesOnDisk")); + scheduleFromThreadPool( + std::move(worker), + IOThreadPool::get(), + "DP_BytesOnDisk")); } /// Exceptions are propagated From e2bfa834297841d1c1d7d0b44ff66e05a9b3b822 Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Thu, 23 Feb 2023 12:00:56 +0100 Subject: [PATCH 46/83] cancel workers when exception --- src/Storages/System/StorageSystemDetachedParts.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index f03e1542b03..f8ca31433f0 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -166,6 +166,8 @@ private: std::vector> futures; SCOPE_EXIT_SAFE({ + /// Cancel all workers + worker_state.next_task.store(worker_state.tasks.size()); /// Exceptions are not propagated for (auto & future : futures) if (future.valid()) From 57b5f9e7d7662d12988c8ad4f7c05a8b0200e600 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 23 Feb 2023 18:14:52 +0000 Subject: [PATCH 47/83] Fix case with no detached parts for table. --- src/Storages/System/StorageSystemDetachedParts.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index f8ca31433f0..7c631b12e04 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -135,11 +135,14 @@ private: { chassert(detached_parts.empty()); - current_info = state->next(); - if (!current_info) - return; + while (detached_parts.empty()) + { + current_info = state->next(); + if (!current_info) + return; - detached_parts = current_info.data->getDetachedParts(); + detached_parts = current_info.data->getDetachedParts(); + } } void calculatePartSizeOnDisk(size_t begin, std::vector> & parts_sizes) From 0c9643f1ed7dc13ee0d7c96d3de8f03db5e1beed Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Sat, 25 Feb 2023 16:27:27 +0100 Subject: [PATCH 48/83] make listing in parrallel as well --- .../System/StorageSystemDetachedParts.cpp | 89 +++++++++++++++++-- 1 file changed, 80 insertions(+), 9 deletions(-) diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index 7c631b12e04..f456eabd0f5 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -63,7 +63,7 @@ public: } }; -struct WorkerState +struct FileSizeWorkerState { struct Task { @@ -76,6 +76,19 @@ struct WorkerState std::atomic next_task = {0}; }; +struct ListingWorkerState +{ + struct Task + { + DiskPtr disk; + String path; + std::vector & files; + }; + + std::vector tasks; + std::atomic next_task = {0}; +}; + class DetachedPartsSource : public ISource { public: @@ -145,25 +158,81 @@ private: } } + std::vector> collectListings(size_t begin) + { + std::vector> listings(detached_parts.size() - begin); + + ListingWorkerState worker_state; + + for (auto p_id = begin; p_id < detached_parts.size(); ++p_id) + { + auto & part = detached_parts[p_id]; + + auto part_path = fs::path(MergeTreeData::DETACHED_DIR_NAME) / part.dir_name; + auto relative_path = fs::path(current_info.data->getRelativeDataPath()) / part_path; + + worker_state.tasks.push_back({part.disk, relative_path, listings.at(p_id - begin)}); + } + + std::vector> futures; + SCOPE_EXIT_SAFE({ + /// Cancel all workers + worker_state.next_task.store(worker_state.tasks.size()); + /// Exceptions are not propagated + for (auto & future : futures) + if (future.valid()) + future.wait(); + futures.clear(); + }); + + auto max_thread_to_run = std::max(size_t(1), std::min(support_threads, detached_parts.size() / 10)); + futures.reserve(max_thread_to_run); + + for (size_t i = 0; i < max_thread_to_run; ++i) + { + if (worker_state.next_task.load() >= worker_state.tasks.size()) + break; + + auto worker = [&worker_state] () + { + for (auto id = worker_state.next_task++; id < worker_state.tasks.size(); id = worker_state.next_task++) + { + auto & task = worker_state.tasks.at(id); + task.files = partFilesOnDisk(task.disk, task.path); + } + }; + + futures.push_back( + scheduleFromThreadPool( + std::move(worker), + IOThreadPool::get(), + "DP_BytesOnDisk")); + } + + /// Exceptions are propagated + for (auto & future : futures) + future.get(); + + return listings; + + } + void calculatePartSizeOnDisk(size_t begin, std::vector> & parts_sizes) { if (!has_bytes_on_disk_column) return; - WorkerState worker_state; + auto listings = collectListings(begin); + + FileSizeWorkerState worker_state; for (auto p_id = begin; p_id < detached_parts.size(); ++p_id) { auto & p = detached_parts.at(p_id); auto & disk = p.disk; - - auto part_path = fs::path(MergeTreeData::DETACHED_DIR_NAME) / p.dir_name; - auto relative_path = fs::path(current_info.data->getRelativeDataPath()) / part_path; - auto listing = partFilesOnDisk(disk, relative_path); - auto * counter = &parts_sizes[p_id - begin]; - for (auto & file : listing) + for (auto & file : listings[p_id - begin]) worker_state.tasks.push_back({disk, file, counter}); } @@ -178,7 +247,9 @@ private: futures.clear(); }); - auto max_thread_to_run = std::min(support_threads, worker_state.tasks.size() / 10); + auto max_thread_to_run = std::max(size_t(1), std::min(support_threads, worker_state.tasks.size() / 10)); + futures.reserve(max_thread_to_run); + for (size_t i = 0; i < max_thread_to_run; ++i) { if (worker_state.next_task.load() >= worker_state.tasks.size()) From 15ece2ab16b69ee04a71ce0de55b2ca14001b5c1 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sat, 25 Feb 2023 19:26:16 +0000 Subject: [PATCH 49/83] Fix incorrect LIKE-to-substring translation To be backported --- src/Functions/MatchImpl.h | 21 +++++++++++++++++-- .../25339_like_substring_search_bug.reference | 1 + .../25339_like_substring_search_bug.sql | 1 + 3 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/25339_like_substring_search_bug.reference create mode 100644 tests/queries/0_stateless/25339_like_substring_search_bug.sql diff --git a/src/Functions/MatchImpl.h b/src/Functions/MatchImpl.h index db8dd55474e..fe38ac49d62 100644 --- a/src/Functions/MatchImpl.h +++ b/src/Functions/MatchImpl.h @@ -23,9 +23,10 @@ namespace ErrorCodes namespace impl { -/// Is the [I]LIKE expression reduced to finding a substring in a string? +/// Is the [I]LIKE expression equivalent to a substring search? inline bool likePatternIsSubstring(std::string_view pattern, String & res) { + /// TODO: ignore multiple leading or trailing % if (pattern.size() < 2 || !pattern.starts_with('%') || !pattern.ends_with('%')) return false; @@ -45,9 +46,25 @@ inline bool likePatternIsSubstring(std::string_view pattern, String & res) case '\\': ++pos; if (pos == end) + /// pattern ends with \% --> trailing % is to be taken literally and pattern doesn't qualify for substring search return false; else - res += *pos; + { + switch (*pos) + { + /// Known LIKE escape sequences: + case '%': + case '_': + case '\\': + res += *pos; + break; + /// For all other escape sequences, the backslash loses its special meaning + default: + res += '\\'; + res += *pos; + break; + } + } break; default: res += *pos; diff --git a/tests/queries/0_stateless/25339_like_substring_search_bug.reference b/tests/queries/0_stateless/25339_like_substring_search_bug.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/25339_like_substring_search_bug.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/25339_like_substring_search_bug.sql b/tests/queries/0_stateless/25339_like_substring_search_bug.sql new file mode 100644 index 00000000000..10ce3cdde36 --- /dev/null +++ b/tests/queries/0_stateless/25339_like_substring_search_bug.sql @@ -0,0 +1 @@ +SELECT 'Win\Sys' LIKE '%Win\Sys%'; From 1666527dae0f14d8884cd94837629cba04a60251 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Sat, 25 Feb 2023 22:20:00 +0100 Subject: [PATCH 50/83] do listing combined with getting size in one thread --- .../System/StorageSystemDetachedParts.cpp | 108 +++--------------- 1 file changed, 17 insertions(+), 91 deletions(-) diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index f456eabd0f5..66e610ca653 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -19,31 +19,32 @@ namespace DB namespace { -void partFilesOnDiskImpl(const DiskPtr & disk, const String & from, std::vector & files) +void calculateTotalSizeOnDiskImpl(const DiskPtr & disk, const String & from, UInt64 & total_size) { + /// Files or directories of detached part may not exist. Only count the size of existing files. if (disk->isFile(from)) { - files.push_back(from); + total_size += disk->getFileSize(from); } else { for (auto it = disk->iterateDirectory(from); it->isValid(); it->next()) - partFilesOnDiskImpl(disk, fs::path(from) / it->name(), files); + calculateTotalSizeOnDiskImpl(disk, fs::path(from) / it->name(), total_size); } } -std::vector partFilesOnDisk(const DiskPtr & disk, const String & from) +UInt64 calculateTotalSizeOnDisk(const DiskPtr & disk, const String & from) { - std::vector files; + UInt64 total_size = 0; try { - partFilesOnDiskImpl(disk, from, files); + calculateTotalSizeOnDiskImpl(disk, from, total_size); } catch (...) { tryLogCurrentException(__PRETTY_FUNCTION__); } - return files; + return total_size; } class SourceState @@ -63,26 +64,14 @@ public: } }; -struct FileSizeWorkerState -{ - struct Task - { - DiskPtr disk; - String file; - std::atomic * counter = nullptr; - }; - std::vector tasks; - std::atomic next_task = {0}; -}; - -struct ListingWorkerState +struct WorkerState { struct Task { DiskPtr disk; String path; - std::vector & files; + std::atomic * counter = nullptr; }; std::vector tasks; @@ -158,82 +147,19 @@ private: } } - std::vector> collectListings(size_t begin) - { - std::vector> listings(detached_parts.size() - begin); - - ListingWorkerState worker_state; - - for (auto p_id = begin; p_id < detached_parts.size(); ++p_id) - { - auto & part = detached_parts[p_id]; - - auto part_path = fs::path(MergeTreeData::DETACHED_DIR_NAME) / part.dir_name; - auto relative_path = fs::path(current_info.data->getRelativeDataPath()) / part_path; - - worker_state.tasks.push_back({part.disk, relative_path, listings.at(p_id - begin)}); - } - - std::vector> futures; - SCOPE_EXIT_SAFE({ - /// Cancel all workers - worker_state.next_task.store(worker_state.tasks.size()); - /// Exceptions are not propagated - for (auto & future : futures) - if (future.valid()) - future.wait(); - futures.clear(); - }); - - auto max_thread_to_run = std::max(size_t(1), std::min(support_threads, detached_parts.size() / 10)); - futures.reserve(max_thread_to_run); - - for (size_t i = 0; i < max_thread_to_run; ++i) - { - if (worker_state.next_task.load() >= worker_state.tasks.size()) - break; - - auto worker = [&worker_state] () - { - for (auto id = worker_state.next_task++; id < worker_state.tasks.size(); id = worker_state.next_task++) - { - auto & task = worker_state.tasks.at(id); - task.files = partFilesOnDisk(task.disk, task.path); - } - }; - - futures.push_back( - scheduleFromThreadPool( - std::move(worker), - IOThreadPool::get(), - "DP_BytesOnDisk")); - } - - /// Exceptions are propagated - for (auto & future : futures) - future.get(); - - return listings; - - } - void calculatePartSizeOnDisk(size_t begin, std::vector> & parts_sizes) { if (!has_bytes_on_disk_column) return; - auto listings = collectListings(begin); - - FileSizeWorkerState worker_state; + WorkerState worker_state; for (auto p_id = begin; p_id < detached_parts.size(); ++p_id) { - auto & p = detached_parts.at(p_id); - auto & disk = p.disk; - auto * counter = &parts_sizes[p_id - begin]; - - for (auto & file : listings[p_id - begin]) - worker_state.tasks.push_back({disk, file, counter}); + auto & part = detached_parts[p_id]; + auto part_path = fs::path(MergeTreeData::DETACHED_DIR_NAME) / part.dir_name; + auto relative_path = fs::path(current_info.data->getRelativeDataPath()) / part_path; + worker_state.tasks.push_back({part.disk, relative_path, &parts_sizes.at(p_id - begin)}); } std::vector> futures; @@ -260,8 +186,8 @@ private: for (auto id = worker_state.next_task++; id < worker_state.tasks.size(); id = worker_state.next_task++) { auto & task = worker_state.tasks.at(id); - auto size = task.disk->getFileSize(task.file); - task.counter->fetch_add(size); + size_t size = calculateTotalSizeOnDisk(task.disk, task.path); + task.counter->store(size); } }; From cc0c0c6133ee62e63a3c42ceb6c285a2d8f5611f Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 26 Feb 2023 20:01:35 +0000 Subject: [PATCH 51/83] A better alternative to #46344 The (experimental) inverted index writes/reads files different from the standard files written by the other skip indexes. The original problem was that with database engine "ordinary", DROP TABLE of a table with inverted index finds unknown files in persistence and complains. The same will happen with engine "atomic" but deferred. As a hotfix, the error was silenced by explicitly adding the four files created in a specific test to the deletion code. This PR tries a cleaner solution where all needed files are provided via the normal checksum structure. One drawback remains which is that the affected files were written earlier and we don't have their checksums available. Therefore, the inverted index is currently excluded from CHECK TABLE. Minimal repro: SET allow_experimental_inverted_index = 1; DROP TABLE IF EXISTS tab; CREATE TABLE tab(s String, INDEX af(s) TYPE inverted(2)) ENGINE = MergeTree() ORDER BY s; INSERT INTO tab VALUES ('Alick a01'); CHECK TABLE tab; DROP TABLE IF EXISTS tab; run ./clickhouse-test with --db-engine Ordinary --- .../MergeTree/DataPartStorageOnDiskBase.cpp | 6 ----- .../MergeTree/MergeTreeDataPartChecksum.cpp | 4 ++++ .../MergeTreeDataPartWriterOnDisk.cpp | 22 ++++++++++++++----- src/Storages/MergeTree/checkDataPart.cpp | 6 ++++- .../02346_full_text_search.reference | 1 + .../0_stateless/02346_full_text_search.sql | 3 +++ 6 files changed, 30 insertions(+), 12 deletions(-) diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index d8769a94347..175df9b6e28 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -684,12 +684,6 @@ void DataPartStorageOnDiskBase::clearDirectory( request.emplace_back(fs::path(dir) / "delete-on-destroy.txt", true); request.emplace_back(fs::path(dir) / "txn_version.txt", true); - /// Inverted index - request.emplace_back(fs::path(dir) / "skp_idx_af.gin_dict", true); - request.emplace_back(fs::path(dir) / "skp_idx_af.gin_post", true); - request.emplace_back(fs::path(dir) / "skp_idx_af.gin_seg", true); - request.emplace_back(fs::path(dir) / "skp_idx_af.gin_sid", true); - disk->removeSharedFiles(request, !can_remove_shared_data, names_not_to_remove); disk->removeDirectory(dir); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp index 719a60b2f31..8f4d066baa3 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp @@ -75,6 +75,10 @@ void MergeTreeDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & r { const String & name = it.first; + /// Exclude files written by inverted index from check. No correct checksums are available for them currently. + if (name.ends_with(".gin_dict") || name.ends_with(".gin_post") || name.ends_with(".gin_seg") || name.ends_with(".gin_sid")) + continue; + auto jt = rhs.files.find(name); if (jt == rhs.files.end()) throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No file {} in data part", name); diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index 1dec7c2cd7c..fe5dbcefabf 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -208,26 +208,26 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices() auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(settings.marks_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); CompressionCodecPtr marks_compression_codec = CompressionCodecFactory::instance().get(ast, nullptr); - for (const auto & index_helper : skip_indices) + for (const auto & skip_index : skip_indices) { - String stream_name = index_helper->getFileName(); + String stream_name = skip_index->getFileName(); skip_indices_streams.emplace_back( std::make_unique( stream_name, data_part->getDataPartStoragePtr(), - stream_name, index_helper->getSerializedFileExtension(), + stream_name, skip_index->getSerializedFileExtension(), stream_name, marks_file_extension, default_codec, settings.max_compress_block_size, marks_compression_codec, settings.marks_compress_block_size, settings.query_write_settings)); GinIndexStorePtr store = nullptr; - if (dynamic_cast(&*index_helper) != nullptr) + if (dynamic_cast(&*skip_index) != nullptr) { store = std::make_shared(stream_name, data_part->getDataPartStoragePtr(), data_part->getDataPartStoragePtr(), storage.getSettings()->max_digestion_size_per_segment); gin_index_stores[stream_name] = store; } - skip_indices_aggregators.push_back(index_helper->createIndexAggregatorForPart(store)); + skip_indices_aggregators.push_back(skip_index->createIndexAggregatorForPart(store)); skip_index_accumulated_marks.push_back(0); } } @@ -388,6 +388,18 @@ void MergeTreeDataPartWriterOnDisk::fillSkipIndicesChecksums(MergeTreeData::Data auto & stream = *skip_indices_streams[i]; if (!skip_indices_aggregators[i]->empty()) skip_indices_aggregators[i]->getGranuleAndReset()->serializeBinary(stream.compressed_hashing); + + /// Register additional files written only by the inverted index. Required because otherwise DROP TABLE complains about unknown + /// files. Note that the provided actual checksums are bogus. The problem is that at this point the file writes happened already and + /// we'd need to re-open + hash the files (fixing this is TODO). For now, CHECK TABLE skips these four files. + if (dynamic_cast(&*skip_indices[i]) != nullptr) + { + String filename_without_extension = skip_indices[i]->getFileName(); + checksums.files[filename_without_extension + ".gin_dict"] = MergeTreeDataPartChecksums::Checksum(); + checksums.files[filename_without_extension + ".gin_post"] = MergeTreeDataPartChecksums::Checksum(); + checksums.files[filename_without_extension + ".gin_seg"] = MergeTreeDataPartChecksums::Checksum(); + checksums.files[filename_without_extension + ".gin_sid"] = MergeTreeDataPartChecksums::Checksum(); + } } for (auto & stream : skip_indices_streams) diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index 2ec83d99eeb..3a33572d047 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -163,12 +163,16 @@ IMergeTreeDataPart::Checksums checkDataPart( auto file_name = it->name(); /// We will check projections later. - if (data_part_storage.isDirectory(file_name) && endsWith(file_name, ".proj")) + if (data_part_storage.isDirectory(file_name) && file_name.ends_with(".proj")) { projections_on_disk.insert(file_name); continue; } + /// Exclude files written by inverted index from check. No correct checksums are available for them currently. + if (file_name.ends_with(".gin_dict") || file_name.ends_with(".gin_post") || file_name.ends_with(".gin_seg") || file_name.ends_with(".gin_sid")) + continue; + auto checksum_it = checksums_data.files.find(file_name); /// Skip files that we already calculated. Also skip metadata files that are not checksummed. diff --git a/tests/queries/0_stateless/02346_full_text_search.reference b/tests/queries/0_stateless/02346_full_text_search.reference index e035e93867b..f1e21e511d0 100644 --- a/tests/queries/0_stateless/02346_full_text_search.reference +++ b/tests/queries/0_stateless/02346_full_text_search.reference @@ -1,4 +1,5 @@ af inverted +1 101 Alick a01 1 101 Alick a01 diff --git a/tests/queries/0_stateless/02346_full_text_search.sql b/tests/queries/0_stateless/02346_full_text_search.sql index af49c5d52c2..2b10800e78f 100644 --- a/tests/queries/0_stateless/02346_full_text_search.sql +++ b/tests/queries/0_stateless/02346_full_text_search.sql @@ -15,6 +15,9 @@ INSERT INTO tab VALUES (101, 'Alick a01'), (102, 'Blick a02'), (103, 'Click a03' -- check inverted index was created SELECT name, type FROM system.data_skipping_indices WHERE table =='tab' AND database = currentDatabase() LIMIT 1; +-- throw in a random consistency check +CHECK TABLE tab; + -- search inverted index with == SELECT * FROM tab WHERE s == 'Alick a01'; From 22d09caea2fadce928094aac6ce902dc7f1c6933 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 27 Feb 2023 08:31:40 +0000 Subject: [PATCH 52/83] SIZES_OF_ARRAYS_DOESN_MATCH --> SIZES_OF_ARRAYS_DONT_MATCH --- src/AggregateFunctions/AggregateFunctionArray.h | 4 ++-- src/AggregateFunctions/AggregateFunctionForEach.h | 4 ++-- src/Common/ErrorCodes.cpp | 2 +- src/DataTypes/NestedUtils.cpp | 4 ++-- src/Functions/FunctionHelpers.cpp | 4 ++-- src/Functions/array/FunctionArrayMapped.h | 4 ++-- src/Functions/array/arrayDistance.cpp | 6 +++--- src/Functions/array/arrayEnumerateExtended.h | 4 ++-- src/Functions/array/arrayEnumerateRanked.h | 8 ++++---- src/Functions/array/arrayReduce.cpp | 4 ++-- src/Functions/array/arrayReduceInRanges.cpp | 4 ++-- src/Functions/array/arrayUniq.cpp | 4 ++-- src/Functions/array/arrayZip.cpp | 4 ++-- src/Functions/nested.cpp | 4 ++-- src/Functions/tupleElement.cpp | 6 +++--- src/Functions/validateNestedArraySizes.cpp | 4 ++-- src/Interpreters/ArrayJoinAction.cpp | 4 ++-- 17 files changed, 37 insertions(+), 37 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionArray.h b/src/AggregateFunctions/AggregateFunctionArray.h index d1494f46f4b..21394e3ce05 100644 --- a/src/AggregateFunctions/AggregateFunctionArray.h +++ b/src/AggregateFunctions/AggregateFunctionArray.h @@ -13,7 +13,7 @@ struct Settings; namespace ErrorCodes { - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; extern const int ILLEGAL_TYPE_OF_ARGUMENT; } @@ -129,7 +129,7 @@ public: const IColumn::Offsets & ith_offsets = ith_column.getOffsets(); if (ith_offsets[row_num] != end || (row_num != 0 && ith_offsets[row_num - 1] != begin)) - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Arrays passed to {} aggregate function have different sizes", getName()); + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Arrays passed to {} aggregate function have different sizes", getName()); } for (size_t i = begin; i < end; ++i) diff --git a/src/AggregateFunctions/AggregateFunctionForEach.h b/src/AggregateFunctions/AggregateFunctionForEach.h index e035d645222..f041dd11209 100644 --- a/src/AggregateFunctions/AggregateFunctionForEach.h +++ b/src/AggregateFunctions/AggregateFunctionForEach.h @@ -19,7 +19,7 @@ namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; } @@ -197,7 +197,7 @@ public: const IColumn::Offsets & ith_offsets = ith_column.getOffsets(); if (ith_offsets[row_num] != end || (row_num != 0 && ith_offsets[row_num - 1] != begin)) - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Arrays passed to {} aggregate function have different sizes", getName()); + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Arrays passed to {} aggregate function have different sizes", getName()); } AggregateFunctionForEachData & state = ensureAggregateData(place, end - begin, *arena); diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 028663a2176..c00129249e2 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -197,7 +197,7 @@ M(187, COLLATION_COMPARISON_FAILED) \ M(188, UNKNOWN_ACTION) \ M(189, TABLE_MUST_NOT_BE_CREATED_MANUALLY) \ - M(190, SIZES_OF_ARRAYS_DOESNT_MATCH) \ + M(190, SIZES_OF_ARRAYS_DONT_MATCH) \ M(191, SET_SIZE_LIMIT_EXCEEDED) \ M(192, UNKNOWN_USER) \ M(193, WRONG_PASSWORD) \ diff --git a/src/DataTypes/NestedUtils.cpp b/src/DataTypes/NestedUtils.cpp index e5ba23b9df8..f029ac6ba27 100644 --- a/src/DataTypes/NestedUtils.cpp +++ b/src/DataTypes/NestedUtils.cpp @@ -25,7 +25,7 @@ namespace DB namespace ErrorCodes { extern const int ILLEGAL_COLUMN; - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; } namespace Nested @@ -242,7 +242,7 @@ void validateArraySizes(const Block & block) const ColumnArray & another_array_column = assert_cast(*elem.column); if (!first_array_column.hasEqualOffsets(another_array_column)) - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Elements '{}' and '{}' " "of Nested data structure '{}' (Array columns) have different array sizes.", block.getByPosition(it->second).name, elem.name, split.first); diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp index c981f666219..ff09274d907 100644 --- a/src/Functions/FunctionHelpers.cpp +++ b/src/Functions/FunctionHelpers.cpp @@ -16,7 +16,7 @@ namespace ErrorCodes { extern const int ILLEGAL_COLUMN; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; extern const int ILLEGAL_TYPE_OF_ARGUMENT; } @@ -213,7 +213,7 @@ checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments) if (i == 0) offsets = offsets_i; else if (*offsets_i != *offsets) - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to aggregate function must be equal."); + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths of all arrays passed to aggregate function must be equal."); } return {nested_columns, offsets->data()}; } diff --git a/src/Functions/array/FunctionArrayMapped.h b/src/Functions/array/FunctionArrayMapped.h index 989ec500cfb..61abc607349 100644 --- a/src/Functions/array/FunctionArrayMapped.h +++ b/src/Functions/array/FunctionArrayMapped.h @@ -37,7 +37,7 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int LOGICAL_ERROR; - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } @@ -361,7 +361,7 @@ public: if (getOffsetsPtr(*column_array) != offsets_column && getOffsets(*column_array) != typeid_cast(*offsets_column).getData()) throw Exception( - ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "{}s passed to {} must have equal size", argument_type_name, getName()); diff --git a/src/Functions/array/arrayDistance.cpp b/src/Functions/array/arrayDistance.cpp index c1137848cc5..c68c89ee0d5 100644 --- a/src/Functions/array/arrayDistance.cpp +++ b/src/Functions/array/arrayDistance.cpp @@ -16,7 +16,7 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int LOGICAL_ERROR; - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; extern const int ARGUMENT_OUT_OF_BOUND; } @@ -356,7 +356,7 @@ private: { ColumnArray::Offset prev_offset = row > 0 ? offsets_x[row] : 0; throw Exception( - ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Arguments of function {} have different array sizes: {} and {}", getName(), offsets_x[row] - prev_offset, @@ -423,7 +423,7 @@ private: if (unlikely(offsets_x[0] != offsets_y[row] - prev_offset)) { throw Exception( - ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Arguments of function {} have different array sizes: {} and {}", getName(), offsets_x[0], diff --git a/src/Functions/array/arrayEnumerateExtended.h b/src/Functions/array/arrayEnumerateExtended.h index c3d69bb6972..3f145c05b54 100644 --- a/src/Functions/array/arrayEnumerateExtended.h +++ b/src/Functions/array/arrayEnumerateExtended.h @@ -20,7 +20,7 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; } class FunctionArrayEnumerateUniq; @@ -153,7 +153,7 @@ ColumnPtr FunctionArrayEnumerateExtended::executeImpl(const ColumnsWith offsets_column = array->getOffsetsPtr(); } else if (offsets_i != *offsets) - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to {} must be equal.", + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths of all arrays passed to {} must be equal.", getName()); const auto * array_data = &array->getData(); diff --git a/src/Functions/array/arrayEnumerateRanked.h b/src/Functions/array/arrayEnumerateRanked.h index 73feb3e46ea..8a348c07421 100644 --- a/src/Functions/array/arrayEnumerateRanked.h +++ b/src/Functions/array/arrayEnumerateRanked.h @@ -60,7 +60,7 @@ namespace DB namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; } class FunctionArrayEnumerateUniqRanked; @@ -194,7 +194,7 @@ ColumnPtr FunctionArrayEnumerateRankedExtended::executeImpl( { if (*offsets_by_depth[0] != array->getOffsets()) { - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths and effective depths of all arrays passed to {} must be equal.", getName()); } } @@ -217,7 +217,7 @@ ColumnPtr FunctionArrayEnumerateRankedExtended::executeImpl( { if (*offsets_by_depth[col_depth] != array->getOffsets()) { - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths and effective depths of all arrays passed to {} must be equal.", getName()); } } @@ -225,7 +225,7 @@ ColumnPtr FunctionArrayEnumerateRankedExtended::executeImpl( if (col_depth < arrays_depths.depths[array_num]) { - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "{}: Passed array number {} depth ({}) is more than the actual array depth ({}).", getName(), array_num, std::to_string(arrays_depths.depths[array_num]), col_depth); } diff --git a/src/Functions/array/arrayReduce.cpp b/src/Functions/array/arrayReduce.cpp index d4896595941..a4b2cc037ab 100644 --- a/src/Functions/array/arrayReduce.cpp +++ b/src/Functions/array/arrayReduce.cpp @@ -19,7 +19,7 @@ namespace DB namespace ErrorCodes { - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; @@ -144,7 +144,7 @@ ColumnPtr FunctionArrayReduce::executeImpl(const ColumnsWithTypeAndName & argume if (i == 0) offsets = offsets_i; else if (*offsets_i != *offsets) - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to {} must be equal.", + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths of all arrays passed to {} must be equal.", getName()); } const IColumn ** aggregate_arguments = aggregate_arguments_vec.data(); diff --git a/src/Functions/array/arrayReduceInRanges.cpp b/src/Functions/array/arrayReduceInRanges.cpp index 07391c963a6..790bc3ef879 100644 --- a/src/Functions/array/arrayReduceInRanges.cpp +++ b/src/Functions/array/arrayReduceInRanges.cpp @@ -21,7 +21,7 @@ namespace DB namespace ErrorCodes { - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; @@ -190,7 +190,7 @@ ColumnPtr FunctionArrayReduceInRanges::executeImpl( if (i == 0) offsets = offsets_i; else if (*offsets_i != *offsets) - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to {} must be equal.", + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths of all arrays passed to {} must be equal.", getName()); } const IColumn ** aggregate_arguments = aggregate_arguments_vec.data(); diff --git a/src/Functions/array/arrayUniq.cpp b/src/Functions/array/arrayUniq.cpp index 1d1cf4e6392..81ba5b62094 100644 --- a/src/Functions/array/arrayUniq.cpp +++ b/src/Functions/array/arrayUniq.cpp @@ -18,7 +18,7 @@ namespace DB namespace ErrorCodes { - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; @@ -151,7 +151,7 @@ ColumnPtr FunctionArrayUniq::executeImpl(const ColumnsWithTypeAndName & argument if (i == 0) offsets = &offsets_i; else if (offsets_i != *offsets) - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to {} must be equal.", + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths of all arrays passed to {} must be equal.", getName()); const auto * array_data = &array->getData(); diff --git a/src/Functions/array/arrayZip.cpp b/src/Functions/array/arrayZip.cpp index 3a50491fd4b..44c323e3fe3 100644 --- a/src/Functions/array/arrayZip.cpp +++ b/src/Functions/array/arrayZip.cpp @@ -13,7 +13,7 @@ namespace DB namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_COLUMN; } @@ -81,7 +81,7 @@ public: } else if (!column_array->hasEqualOffsets(static_cast(*first_array_column))) { - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "The argument 1 and argument {} of function {} have different array sizes", i + 1, getName()); } diff --git a/src/Functions/nested.cpp b/src/Functions/nested.cpp index 7617951784f..b22330cd881 100644 --- a/src/Functions/nested.cpp +++ b/src/Functions/nested.cpp @@ -20,7 +20,7 @@ namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; } namespace @@ -118,7 +118,7 @@ public: const auto * rhs_array = assert_cast(arguments[i].column.get()); if (!lhs_array->hasEqualOffsets(*rhs_array)) - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "The argument 1 and argument {} of function {} have different array offsets", i + 1, getName()); diff --git a/src/Functions/tupleElement.cpp b/src/Functions/tupleElement.cpp index 879b6feed40..b1fd200f5cd 100644 --- a/src/Functions/tupleElement.cpp +++ b/src/Functions/tupleElement.cpp @@ -21,7 +21,7 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int NOT_FOUND_COLUMN_IN_BLOCK; extern const int NUMBER_OF_DIMENSIONS_MISMATCHED; - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; } namespace @@ -200,7 +200,7 @@ private: const auto & array_y = *assert_cast(col_y.get()); if (!array_x.hasEqualOffsets(array_y)) { - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "The argument 1 and argument 3 of function {} have different array sizes", getName()); } } @@ -222,7 +222,7 @@ private: { if (unlikely(offsets_x[0] != offsets_y[row] - prev_offset)) { - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "The argument 1 and argument 3 of function {} have different array sizes", getName()); } prev_offset = offsets_y[row]; diff --git a/src/Functions/validateNestedArraySizes.cpp b/src/Functions/validateNestedArraySizes.cpp index 7e1dbc798d8..c422637ba7f 100644 --- a/src/Functions/validateNestedArraySizes.cpp +++ b/src/Functions/validateNestedArraySizes.cpp @@ -12,7 +12,7 @@ namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; } /** Function validateNestedArraySizes is used to check the consistency of Nested DataType subcolumns's offsets when Update @@ -106,7 +106,7 @@ ColumnPtr FunctionValidateNestedArraySizes::executeImpl( else if (first_length != length) { throw Exception( - ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Elements '{}' and '{}' of Nested data structure (Array columns) " "have different array sizes ({} and {} respectively) on row {}", arguments[1].name, arguments[args_idx].name, first_length, length, i); diff --git a/src/Interpreters/ArrayJoinAction.cpp b/src/Interpreters/ArrayJoinAction.cpp index 3650b888f9e..4f42122e98f 100644 --- a/src/Interpreters/ArrayJoinAction.cpp +++ b/src/Interpreters/ArrayJoinAction.cpp @@ -14,7 +14,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; extern const int TYPE_MISMATCH; } @@ -186,7 +186,7 @@ void ArrayJoinAction::execute(Block & block) const ColumnArray & array = typeid_cast(*array_ptr); if (!is_unaligned && !array.hasEqualOffsets(*any_array)) - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Sizes of ARRAY-JOIN-ed arrays do not match"); + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Sizes of ARRAY-JOIN-ed arrays do not match"); current.column = typeid_cast(*array_ptr).getDataPtr(); current.type = type->getNestedType(); From 4c9b9b362c16ddbbaeda2446284b861c30cef01e Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 27 Feb 2023 09:30:50 +0000 Subject: [PATCH 53/83] Replace dynamic_cast by typeid_cast --- src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp | 6 +++--- src/Storages/MergeTree/checkDataPart.cpp | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index fe5dbcefabf..b0101bb962c 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -222,7 +222,7 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices() settings.query_write_settings)); GinIndexStorePtr store = nullptr; - if (dynamic_cast(&*skip_index) != nullptr) + if (typeid_cast(&*skip_index) != nullptr) { store = std::make_shared(stream_name, data_part->getDataPartStoragePtr(), data_part->getDataPartStoragePtr(), storage.getSettings()->max_digestion_size_per_segment); gin_index_stores[stream_name] = store; @@ -284,7 +284,7 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializeSkipIndices(const Block WriteBuffer & marks_out = stream.compress_marks ? stream.marks_compressed_hashing : stream.marks_hashing; GinIndexStorePtr store; - if (dynamic_cast(&*index_helper) != nullptr) + if (typeid_cast(&*index_helper) != nullptr) { String stream_name = index_helper->getFileName(); auto it = gin_index_stores.find(stream_name); @@ -392,7 +392,7 @@ void MergeTreeDataPartWriterOnDisk::fillSkipIndicesChecksums(MergeTreeData::Data /// Register additional files written only by the inverted index. Required because otherwise DROP TABLE complains about unknown /// files. Note that the provided actual checksums are bogus. The problem is that at this point the file writes happened already and /// we'd need to re-open + hash the files (fixing this is TODO). For now, CHECK TABLE skips these four files. - if (dynamic_cast(&*skip_indices[i]) != nullptr) + if (typeid_cast(&*skip_indices[i]) != nullptr) { String filename_without_extension = skip_indices[i]->getFileName(); checksums.files[filename_without_extension + ".gin_dict"] = MergeTreeDataPartChecksums::Checksum(); diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index 3a33572d047..de31258b2f9 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -157,7 +157,7 @@ IMergeTreeDataPart::Checksums checkDataPart( } NameSet projections_on_disk; - const auto & checksum_files_txt = checksums_txt.files; + const auto & checksums_txt_files = checksums_txt.files; for (auto it = data_part_storage.iterate(); it->isValid(); it->next()) { auto file_name = it->name(); @@ -178,8 +178,8 @@ IMergeTreeDataPart::Checksums checkDataPart( /// Skip files that we already calculated. Also skip metadata files that are not checksummed. if (checksum_it == checksums_data.files.end() && !files_without_checksums.contains(file_name)) { - auto txt_checksum_it = checksum_files_txt.find(file_name); - if (txt_checksum_it == checksum_files_txt.end() || txt_checksum_it->second.uncompressed_size == 0) + auto txt_checksum_it = checksums_txt_files.find(file_name); + if (txt_checksum_it == checksums_txt_files.end() || txt_checksum_it->second.uncompressed_size == 0) { /// The file is not compressed. checksum_file(file_name); From 10c04d5a38af9875ec2562ad955d613d0c198e18 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Mon, 27 Feb 2023 12:26:36 +0100 Subject: [PATCH 54/83] Fix clone() for ASTColumnMatchers --- src/Parsers/ASTColumnsMatcher.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Parsers/ASTColumnsMatcher.cpp b/src/Parsers/ASTColumnsMatcher.cpp index 940030577d6..ba398b995be 100644 --- a/src/Parsers/ASTColumnsMatcher.cpp +++ b/src/Parsers/ASTColumnsMatcher.cpp @@ -18,6 +18,7 @@ namespace ErrorCodes ASTPtr ASTColumnsRegexpMatcher::clone() const { auto clone = std::make_shared(*this); + clone->children.clear(); if (expression) { clone->expression = expression->clone(); clone->children.push_back(clone->expression); } if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); } @@ -91,6 +92,7 @@ bool ASTColumnsRegexpMatcher::isColumnMatching(const String & column_name) const ASTPtr ASTColumnsListMatcher::clone() const { auto clone = std::make_shared(*this); + clone->children.clear(); if (expression) { clone->expression = expression->clone(); clone->children.push_back(clone->expression); } if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); } @@ -150,6 +152,7 @@ void ASTColumnsListMatcher::formatImpl(const FormatSettings & settings, FormatSt ASTPtr ASTQualifiedColumnsRegexpMatcher::clone() const { auto clone = std::make_shared(*this); + clone->children.clear(); if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); } @@ -216,6 +219,7 @@ void ASTQualifiedColumnsRegexpMatcher::formatImpl(const FormatSettings & setting ASTPtr ASTQualifiedColumnsListMatcher::clone() const { auto clone = std::make_shared(*this); + clone->children.clear(); if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); } From cb460c6903a4f2c39f801708f16f4d3e810f181f Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 27 Feb 2023 13:11:17 +0100 Subject: [PATCH 55/83] Fix missing format_description --- tests/ci/docker_manifests_merge.py | 2 +- tests/ci/docker_server.py | 2 +- tests/ci/install_check.py | 2 +- tests/ci/run_check.py | 1 + tests/ci/sqlancer_check.py | 2 +- 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/ci/docker_manifests_merge.py b/tests/ci/docker_manifests_merge.py index e0917581089..0484ea8f641 100644 --- a/tests/ci/docker_manifests_merge.py +++ b/tests/ci/docker_manifests_merge.py @@ -218,7 +218,7 @@ def main(): else: description = "Nothing to update" - format_description(description) + description = format_description(description) gh = Github(get_best_robot_token(), per_page=100) post_commit_status(gh, pr_info.sha, NAME, description, status, url) diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index fa4969a98d5..c6854c5aa78 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -369,7 +369,7 @@ def main(): description = f"Processed tags: {', '.join(tags)}" - format_description(description) + description = format_description(description) gh = Github(get_best_robot_token(), per_page=100) post_commit_status(gh, pr_info.sha, NAME, description, status, url) diff --git a/tests/ci/install_check.py b/tests/ci/install_check.py index b0d0af380bd..54245670b26 100644 --- a/tests/ci/install_check.py +++ b/tests/ci/install_check.py @@ -345,7 +345,7 @@ def main(): ch_helper = ClickHouseHelper() mark_flaky_tests(ch_helper, args.check_name, test_results) - format_description(description) + description = format_description(description) post_commit_status(gh, pr_info.sha, args.check_name, description, state, report_url) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index e80987e8bc5..c6810173f7a 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -200,6 +200,7 @@ if __name__ == "__main__": pr_info = PRInfo(need_orgs=True, pr_event_from_api=True, need_changed_files=True) can_run, description, labels_state = should_run_checks_for_pr(pr_info) + description = format_description(description) gh = Github(get_best_robot_token(), per_page=100) commit = get_commit(gh, pr_info.sha) diff --git a/tests/ci/sqlancer_check.py b/tests/ci/sqlancer_check.py index d71b9ec3e9c..1a6c4d14616 100644 --- a/tests/ci/sqlancer_check.py +++ b/tests/ci/sqlancer_check.py @@ -176,7 +176,7 @@ def main(): # status = "failure" description = "Task failed: $?=" + str(retcode) - format_description(description) + description = format_description(description) report_url = upload_results( s3_helper, From e680cd7b1264442eb3b174b005e45120274dde06 Mon Sep 17 00:00:00 2001 From: Peignon Melvyn Date: Mon, 27 Feb 2023 13:48:54 +0100 Subject: [PATCH 56/83] Update schema-inference.md Fixed the example --- docs/en/interfaces/schema-inference.md | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/docs/en/interfaces/schema-inference.md b/docs/en/interfaces/schema-inference.md index 728afa73a17..a07438a8748 100644 --- a/docs/en/interfaces/schema-inference.md +++ b/docs/en/interfaces/schema-inference.md @@ -1177,8 +1177,22 @@ This setting can be used to specify the types of columns that could not be deter **Example** ```sql -DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}' -SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)' +DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}') SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)', allow_suspicious_low_cardinality_types=1 + +DESCRIBE TABLE format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}') +SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)', allow_suspicious_low_cardinality_types = 1 + +Query id: 1752429f-1cd3-4d61-97af-8324ff97ee25 + +┌─name────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ id │ Nullable(Int64) │ │ │ │ │ │ +│ age │ LowCardinality(UInt8) │ │ │ │ │ │ +│ name │ Nullable(String) │ │ │ │ │ │ +│ status │ Nullable(String) │ │ │ │ │ │ +│ hobbies │ Array(Nullable(String)) │ │ │ │ │ │ +└─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ + + ``` ```response ┌─name────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ From 7c5bd4a5f10c4ef483ea0eb6cc36443663c6dc9c Mon Sep 17 00:00:00 2001 From: Peignon Melvyn Date: Mon, 27 Feb 2023 13:53:44 +0100 Subject: [PATCH 57/83] Update schema-inference.md --- docs/en/interfaces/schema-inference.md | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/docs/en/interfaces/schema-inference.md b/docs/en/interfaces/schema-inference.md index a07438a8748..2b50794c458 100644 --- a/docs/en/interfaces/schema-inference.md +++ b/docs/en/interfaces/schema-inference.md @@ -1178,21 +1178,6 @@ This setting can be used to specify the types of columns that could not be deter ```sql DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}') SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)', allow_suspicious_low_cardinality_types=1 - -DESCRIBE TABLE format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}') -SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)', allow_suspicious_low_cardinality_types = 1 - -Query id: 1752429f-1cd3-4d61-97af-8324ff97ee25 - -┌─name────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ -│ id │ Nullable(Int64) │ │ │ │ │ │ -│ age │ LowCardinality(UInt8) │ │ │ │ │ │ -│ name │ Nullable(String) │ │ │ │ │ │ -│ status │ Nullable(String) │ │ │ │ │ │ -│ hobbies │ Array(Nullable(String)) │ │ │ │ │ │ -└─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ - - ``` ```response ┌─name────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ From 77607ba0d4fec97171d51643cab226bcf754d5c6 Mon Sep 17 00:00:00 2001 From: bkuschel Date: Thu, 23 Feb 2023 09:42:06 -0500 Subject: [PATCH 58/83] Fix setenv string call --- programs/server/Server.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 8c60f840b89..c9f5898fb72 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -696,7 +696,7 @@ try { const String config_path = config().getString("config-file", "config.xml"); const auto config_dir = std::filesystem::path{config_path}.replace_filename("openssl.conf"); - setenv("OPENSSL_CONF", config_dir.string(), true); + setenv("OPENSSL_CONF", config_dir.c_str(), true); } #endif From 97fc091b1f860d0d5e5ab283fb05867eb0f8b86b Mon Sep 17 00:00:00 2001 From: bkuschel Date: Thu, 23 Feb 2023 09:45:32 -0500 Subject: [PATCH 59/83] Fix link order for gnu ld --- src/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index aef7dc6a38e..54a3f5651e5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -579,6 +579,7 @@ endif() if (TARGET ch_rust::skim) # Add only -I, library is needed only for clickhouse-client/clickhouse-local dbms_target_include_directories(PRIVATE $) + dbms_target_link_libraries(PUBLIC ch_rust::skim) endif() include ("${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake") From 891cb1e225d82a45bb33574ccf436ba50bb636d2 Mon Sep 17 00:00:00 2001 From: Boris Kuschel Date: Mon, 27 Feb 2023 08:00:53 -0500 Subject: [PATCH 60/83] Update src/CMakeLists.txt Co-authored-by: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> --- src/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 54a3f5651e5..6c5142813c5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -577,7 +577,6 @@ if (TARGET ch_contrib::annoy) endif() if (TARGET ch_rust::skim) - # Add only -I, library is needed only for clickhouse-client/clickhouse-local dbms_target_include_directories(PRIVATE $) dbms_target_link_libraries(PUBLIC ch_rust::skim) endif() From 8fe08bc458a4aa1655230dc8dee47bee3f810844 Mon Sep 17 00:00:00 2001 From: Peignon Melvyn Date: Mon, 27 Feb 2023 14:02:10 +0100 Subject: [PATCH 61/83] Add a warning to the schema_inference_hints --- docs/en/operations/settings/settings-formats.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index f8c95d8b890..026e695fc1e 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -142,6 +142,10 @@ y Nullable(String) z IPv4 ``` +:::warning +If the `schema_inference_hints` is not formated properly, or if there is a typo or a wrong datatype, etc... the whole schema_inference_hints will be ignored. +::: + ## schema_inference_make_columns_nullable {#schema_inference_make_columns_nullable} Controls making inferred types `Nullable` in schema inference for formats without information about nullability. From 5b6e581c1a86933bf640ceb075f449282500cb3b Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 27 Feb 2023 14:18:41 +0100 Subject: [PATCH 62/83] Preset description on the tweak reset --- tests/ci/release.py | 2 ++ tests/ci/version_helper.py | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/tests/ci/release.py b/tests/ci/release.py index f7b5155e750..e221bd3d008 100755 --- a/tests/ci/release.py +++ b/tests/ci/release.py @@ -273,7 +273,9 @@ class Release: self, version: ClickHouseVersion, reset_tweak: bool = True ) -> None: if reset_tweak: + desc = version.description version = version.reset_tweak() + version.with_description(desc) update_cmake_version(version) update_contributors(raise_error=True) if self.dry_run: diff --git a/tests/ci/version_helper.py b/tests/ci/version_helper.py index 1ede2c90b55..372974f4eda 100755 --- a/tests/ci/version_helper.py +++ b/tests/ci/version_helper.py @@ -58,6 +58,7 @@ class ClickHouseVersion: elif self._git is not None: self._tweak = self._git.tweak self._describe = "" + self._description = "" def update(self, part: Literal["major", "minor", "patch"]) -> "ClickHouseVersion": """If part is valid, returns a new version""" @@ -125,6 +126,10 @@ class ClickHouseVersion: def describe(self): return self._describe + @property + def description(self) -> str: + return self._description + @property def string(self): return ".".join( @@ -149,6 +154,7 @@ class ClickHouseVersion: def with_description(self, version_type): if version_type not in VersionType.VALID: raise ValueError(f"version type {version_type} not in {VersionType.VALID}") + self._description = version_type self._describe = f"v{self.string}-{version_type}" def __eq__(self, other: Any) -> bool: From 014a9c796167831a52eb7cce38de4fd503233ded Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 27 Feb 2023 13:25:28 +0000 Subject: [PATCH 63/83] Fix lib file --- tests/ci/stress_tests.lib | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/ci/stress_tests.lib b/tests/ci/stress_tests.lib index 97269dc7d75..2cbae112b7f 100644 --- a/tests/ci/stress_tests.lib +++ b/tests/ci/stress_tests.lib @@ -16,14 +16,17 @@ function escaped() clickhouse local -S 's String' --input-format=LineAsString -q "select substr(s, 1, $FAILURE_CONTEXT_MAX_LINE_WIDTH) from table format CustomSeparated settings format_custom_row_after_delimiter='\\\\\\\\n'" } + function head_escaped() { head -n $FAILURE_CONTEXT_LINES $1 | escaped } + function unts() { grep -Po "[0-9][0-9]:[0-9][0-9] \K.*" } + function trim_server_logs() { head -n $FAILURE_CONTEXT_LINES "/test_output/$1" | grep -Eo " \[ [0-9]+ \] \{.*" | escaped @@ -143,7 +146,8 @@ function stop() clickhouse stop --max-tries "$max_tries" --do-not-kill && return - if [ $check_hang == true ] then + if [ $check_hang == true ] + then # We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces. echo -e "Possible deadlock on shutdown (see gdb.log)$FAIL" >> /test_output/test_results.tsv kill -TERM "$(pidof gdb)" ||: From 463dbdc838e019cb505dedf9c4446d922867ce9b Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 27 Feb 2023 13:29:21 +0000 Subject: [PATCH 64/83] Update version_date.tsv and changelogs after v22.3.19.6-lts --- docs/changelogs/v22.3.19.6-lts.md | 17 +++++++++++++++++ utils/list-versions/version_date.tsv | 1 + 2 files changed, 18 insertions(+) create mode 100644 docs/changelogs/v22.3.19.6-lts.md diff --git a/docs/changelogs/v22.3.19.6-lts.md b/docs/changelogs/v22.3.19.6-lts.md new file mode 100644 index 00000000000..d5b45f4ce66 --- /dev/null +++ b/docs/changelogs/v22.3.19.6-lts.md @@ -0,0 +1,17 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v22.3.19.6-lts (467e0a7bd77) FIXME as compared to v22.3.18.37-lts (fe512717551) + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#46440](https://github.com/ClickHouse/ClickHouse/issues/46440): Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index d4abbdebee1..3814e94bf24 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -63,6 +63,7 @@ v22.4.5.9-stable 2022-05-06 v22.4.4.7-stable 2022-04-29 v22.4.3.3-stable 2022-04-26 v22.4.2.1-stable 2022-04-22 +v22.3.19.6-lts 2023-02-27 v22.3.18.37-lts 2023-02-15 v22.3.17.13-lts 2023-01-12 v22.3.16.1190-lts 2023-01-09 From bc4418bfb6012146d998d5086c5336395eee98aa Mon Sep 17 00:00:00 2001 From: Derek Chia Date: Mon, 27 Feb 2023 23:14:22 +0800 Subject: [PATCH 65/83] Update schema-inference.md --- docs/en/interfaces/schema-inference.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/interfaces/schema-inference.md b/docs/en/interfaces/schema-inference.md index 2b50794c458..25bdb0c36a3 100644 --- a/docs/en/interfaces/schema-inference.md +++ b/docs/en/interfaces/schema-inference.md @@ -229,7 +229,7 @@ To prevent inferring the same schema every time ClickHouse read the data from th There are special settings that control this cache: - `schema_inference_cache_max_elements_for_{file/s3/hdfs/url}` - the maximum number of cached schemas for the corresponding table function. The default value is `4096`. These settings should be set in the server config. -- `use_cache_for_{file,s3,hdfs,url}_schema_inference` - allows turning on/off using cache for schema inference. These settings can be used in queries. +- `schema_inference_use_cache_for_{file,s3,hdfs,url}` - allows turning on/off using cache for schema inference. These settings can be used in queries. The schema of the file can be changed by modifying the data or by changing format settings. For this reason, the schema inference cache identifies the schema by file source, format name, used format settings, and the last modification time of the file. From 12a9ff36b230c0de05b1abfd1b70340c94db8e7c Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 27 Feb 2023 15:20:56 +0000 Subject: [PATCH 66/83] Better exception messages when schema_inference_hints is ill-formatted --- .../parseColumnsListForTableFunction.cpp | 7 ++- .../parseColumnsListForTableFunction.h | 2 +- src/Processors/Formats/ISchemaReader.cpp | 60 ++++++++++++++----- src/Processors/Formats/ISchemaReader.h | 39 ++++++++---- .../Impl/JSONColumnsBlockInputFormatBase.cpp | 6 +- .../Impl/JSONColumnsBlockInputFormatBase.h | 1 + src/TableFunctions/TableFunctionValues.cpp | 3 +- 7 files changed, 86 insertions(+), 32 deletions(-) diff --git a/src/Interpreters/parseColumnsListForTableFunction.cpp b/src/Interpreters/parseColumnsListForTableFunction.cpp index 9e6326b431a..e7302b6324a 100644 --- a/src/Interpreters/parseColumnsListForTableFunction.cpp +++ b/src/Interpreters/parseColumnsListForTableFunction.cpp @@ -92,12 +92,11 @@ ColumnsDescription parseColumnsListFromString(const std::string & structure, con return columns; } -bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescription & columns, const ContextPtr & context) +bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescription & columns, const ContextPtr & context, String & error) { ParserColumnDeclarationList parser(true, true); const Settings & settings = context->getSettingsRef(); - String error; const char * start = structure.data(); const char * end = structure.data() + structure.size(); ASTPtr columns_list_raw = tryParseQuery(parser, start, end, error, false, "columns declaration list", false, settings.max_query_size, settings.max_parser_depth); @@ -106,7 +105,10 @@ bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescrip auto * columns_list = dynamic_cast(columns_list_raw.get()); if (!columns_list) + { + error = fmt::format("Invalid columns declaration list: \"{}\"", structure); return false; + } try { @@ -118,6 +120,7 @@ bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescrip } catch (...) { + error = getCurrentExceptionMessage(false); return false; } } diff --git a/src/Interpreters/parseColumnsListForTableFunction.h b/src/Interpreters/parseColumnsListForTableFunction.h index 97923bcad77..212c378d3b5 100644 --- a/src/Interpreters/parseColumnsListForTableFunction.h +++ b/src/Interpreters/parseColumnsListForTableFunction.h @@ -33,6 +33,6 @@ void validateDataType(const DataTypePtr & type, const DataTypeValidationSettings /// Parses a common argument for table functions such as table structure given in string ColumnsDescription parseColumnsListFromString(const std::string & structure, const ContextPtr & context); -bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescription & columns, const ContextPtr & context); +bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescription & columns, const ContextPtr & context, String & error); } diff --git a/src/Processors/Formats/ISchemaReader.cpp b/src/Processors/Formats/ISchemaReader.cpp index 48cb093f0ab..c96cb373a2d 100644 --- a/src/Processors/Formats/ISchemaReader.cpp +++ b/src/Processors/Formats/ISchemaReader.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -15,20 +16,38 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -void checkFinalInferredType(DataTypePtr & type, const String & name, const FormatSettings & settings, const DataTypePtr & default_type, size_t rows_read) +void checkFinalInferredType( + DataTypePtr & type, + const String & name, + const FormatSettings & settings, + const DataTypePtr & default_type, + size_t rows_read, + const String & hints_parsing_error) { if (!checkIfTypeIsComplete(type)) { if (!default_type) - throw Exception( - ErrorCodes::ONLY_NULLS_WHILE_READING_SCHEMA, - "Cannot determine type for column '{}' by first {} rows " - "of data, most likely this column contains only Nulls or empty " - "Arrays/Maps. You can specify the type for this column using setting schema_inference_hints. " - "If your data contains complex JSON objects, try enabling one " - "of the settings allow_experimental_object_type/input_format_json_read_objects_as_strings", - name, - rows_read); + { + if (hints_parsing_error.empty()) + throw Exception( + ErrorCodes::ONLY_NULLS_WHILE_READING_SCHEMA, + "Cannot determine type for column '{}' by first {} rows " + "of data, most likely this column contains only Nulls or empty " + "Arrays/Maps. You can specify the type for this column using setting schema_inference_hints. " + "If your data contains complex JSON objects, try enabling one " + "of the settings allow_experimental_object_type/input_format_json_read_objects_as_strings", + name, + rows_read); + else + throw Exception( + ErrorCodes::ONLY_NULLS_WHILE_READING_SCHEMA, + "Cannot determine type for column '{}' by first {} rows " + "of data, most likely this column contains only Nulls or empty Arrays/Maps. " + "Column types from setting schema_inference_hints couldn't be parsed because of error: {}", + name, + rows_read, + hints_parsing_error); + } type = default_type; } @@ -46,11 +65,15 @@ IIRowSchemaReader::IIRowSchemaReader(ReadBuffer & in_, const FormatSettings & fo void IIRowSchemaReader::setContext(ContextPtr & context) { ColumnsDescription columns; - if (tryParseColumnsListFromString(hints_str, columns, context)) + if (tryParseColumnsListFromString(hints_str, columns, context, hints_parsing_error)) { for (const auto & [name, type] : columns.getAll()) hints[name] = type; } + else + { + LOG_WARNING(&Poco::Logger::get("IIRowSchemaReader"), "Couldn't parse schema inference hints: {}. This setting will be ignored", hints_parsing_error); + } } void IIRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) @@ -137,7 +160,14 @@ NamesAndTypesList IRowSchemaReader::readSchema() if (!new_data_types[field_index] || hints.contains(column_names[field_index])) continue; - chooseResultColumnType(*this, data_types[field_index], new_data_types[field_index], getDefaultType(field_index), std::to_string(field_index + 1), rows_read); + chooseResultColumnType( + *this, + data_types[field_index], + new_data_types[field_index], + getDefaultType(field_index), + std::to_string(field_index + 1), + rows_read, + hints_parsing_error); } } @@ -149,7 +179,7 @@ NamesAndTypesList IRowSchemaReader::readSchema() { transformFinalTypeIfNeeded(data_types[field_index]); /// Check that we could determine the type of this column. - checkFinalInferredType(data_types[field_index], column_names[field_index], format_settings, getDefaultType(field_index), rows_read); + checkFinalInferredType(data_types[field_index], column_names[field_index], format_settings, getDefaultType(field_index), rows_read, hints_parsing_error); } result.emplace_back(column_names[field_index], data_types[field_index]); } @@ -246,7 +276,7 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema() continue; auto & type = it->second; - chooseResultColumnType(*this, type, new_type, default_type, name, rows_read); + chooseResultColumnType(*this, type, new_type, default_type, name, rows_read, hints_parsing_error); } } @@ -263,7 +293,7 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema() { transformFinalTypeIfNeeded(type); /// Check that we could determine the type of this column. - checkFinalInferredType(type, name, format_settings, default_type, rows_read); + checkFinalInferredType(type, name, format_settings, default_type, rows_read, hints_parsing_error); } result.emplace_back(name, type); } diff --git a/src/Processors/Formats/ISchemaReader.h b/src/Processors/Formats/ISchemaReader.h index edc5c6068c3..81bc94afa6c 100644 --- a/src/Processors/Formats/ISchemaReader.h +++ b/src/Processors/Formats/ISchemaReader.h @@ -65,6 +65,7 @@ protected: String hints_str; FormatSettings format_settings; std::unordered_map hints; + String hints_parsing_error; }; /// Base class for schema inference for formats that read data row by row. @@ -145,7 +146,8 @@ void chooseResultColumnType( DataTypePtr & new_type, const DataTypePtr & default_type, const String & column_name, - size_t row) + size_t row, + const String & hints_parsing_error = "") { if (!type) { @@ -166,14 +168,25 @@ void chooseResultColumnType( type = default_type; else { - throw Exception( - ErrorCodes::TYPE_MISMATCH, - "Automatically defined type {} for column '{}' in row {} differs from type defined by previous rows: {}. " - "You can specify the type for this column using setting schema_inference_hints", - type->getName(), - column_name, - row, - new_type->getName()); + if (hints_parsing_error.empty()) + throw Exception( + ErrorCodes::TYPE_MISMATCH, + "Automatically defined type {} for column '{}' in row {} differs from type defined by previous rows: {}. " + "You can specify the type for this column using setting schema_inference_hints", + type->getName(), + column_name, + row, + new_type->getName()); + else + throw Exception( + ErrorCodes::TYPE_MISMATCH, + "Automatically defined type {} for column '{}' in row {} differs from type defined by previous rows: {}. " + "Column types from setting schema_inference_hints couldn't be parsed because of error: {}", + type->getName(), + column_name, + row, + new_type->getName(), + hints_parsing_error); } } @@ -196,7 +209,13 @@ void chooseResultColumnTypes( chooseResultColumnType(schema_reader, types[i], new_types[i], default_type, column_names[i], row); } -void checkFinalInferredType(DataTypePtr & type, const String & name, const FormatSettings & settings, const DataTypePtr & default_type, size_t rows_read); +void checkFinalInferredType( + DataTypePtr & type, + const String & name, + const FormatSettings & settings, + const DataTypePtr & default_type, + size_t rows_read, + const String & hints_parsing_error); Strings splitColumnNames(const String & column_names_str); diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp index 204a5077e31..a39722950e4 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp @@ -182,7 +182,7 @@ JSONColumnsSchemaReaderBase::JSONColumnsSchemaReaderBase( void JSONColumnsSchemaReaderBase::setContext(ContextPtr & ctx) { ColumnsDescription columns; - if (tryParseColumnsListFromString(hints_str, columns, ctx)) + if (tryParseColumnsListFromString(hints_str, columns, ctx, hints_parsing_error)) { for (const auto & [name, type] : columns.getAll()) hints[name] = type; @@ -238,7 +238,7 @@ NamesAndTypesList JSONColumnsSchemaReaderBase::readSchema() rows_in_block = 0; auto column_type = readColumnAndGetDataType( column_name, rows_in_block, format_settings.max_rows_to_read_for_schema_inference - total_rows_read); - chooseResultColumnType(*this, names_to_types[column_name], column_type, nullptr, column_name, total_rows_read + 1); + chooseResultColumnType(*this, names_to_types[column_name], column_type, nullptr, column_name, total_rows_read + 1, hints_parsing_error); } ++iteration; @@ -260,7 +260,7 @@ NamesAndTypesList JSONColumnsSchemaReaderBase::readSchema() { transformJSONTupleToArrayIfPossible(type, format_settings, &inference_info); /// Check that we could determine the type of this column. - checkFinalInferredType(type, name, format_settings, nullptr, format_settings.max_rows_to_read_for_schema_inference); + checkFinalInferredType(type, name, format_settings, nullptr, format_settings.max_rows_to_read_for_schema_inference, hints_parsing_error); } result.emplace_back(name, type); } diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h index 3292b5649c9..2babc0734f9 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h +++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h @@ -91,6 +91,7 @@ private: const FormatSettings format_settings; String hints_str; std::unordered_map hints; + String hints_parsing_error; std::unique_ptr reader; Names column_names_from_settings; JSONInferenceInfo inference_info; diff --git a/src/TableFunctions/TableFunctionValues.cpp b/src/TableFunctions/TableFunctionValues.cpp index 545427f30c9..cf0e20c624c 100644 --- a/src/TableFunctions/TableFunctionValues.cpp +++ b/src/TableFunctions/TableFunctionValues.cpp @@ -92,7 +92,8 @@ void TableFunctionValues::parseArguments(const ASTPtr & ast_function, ContextPtr const auto & literal = args[0]->as(); String value; - if (args.size() > 1 && literal && literal->value.tryGet(value) && tryParseColumnsListFromString(value, structure, context)) + String error; + if (args.size() > 1 && literal && literal->value.tryGet(value) && tryParseColumnsListFromString(value, structure, context, error)) { has_structure_in_arguments = true; return; From 261c3aa86dd33914e1b6a4a7ebb4db41f47e9dca Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 27 Feb 2023 16:42:29 +0100 Subject: [PATCH 67/83] Update --- docker/test/upgrade/run.sh | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index 733d3a79d9f..b93959ee039 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -121,8 +121,6 @@ mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/c # FIXME Not sure if it's expected, but some tests from stress test may not be finished yet when we restarting server. # Let's just ignore all errors from queries ("} TCPHandler: Code:", "} executeQuery: Code:") # FIXME https://github.com/ClickHouse/ClickHouse/issues/39197 ("Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'") -# NOTE Incompatibility was introduced in https://github.com/ClickHouse/ClickHouse/pull/39263, it's expected -# ("This engine is deprecated and is not supported in transactions", "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part") # FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 - bad mutation does not indicate backward incompatibility echo "Check for Error messages in server log:" rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \ @@ -152,8 +150,6 @@ rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \ -e "} TCPHandler: Code:" \ -e "} executeQuery: Code:" \ -e "Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'" \ - -e "This engine is deprecated and is not supported in transactions" \ - -e "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part" \ -e "The set of parts restored in place of" \ -e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \ -e "Code: 269. DB::Exception: Destination table is myself" \ @@ -201,8 +197,4 @@ rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv [ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv -# Core dumps -find . -type f -maxdepth 1 -name 'core.*' | while read core; do - zstd --threads=0 $core - mv $core.zst /test_output/ -done +collect_core_dumps From 29da7fc9653c69f6f5aac7cac1a42729845a9a91 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 27 Feb 2023 17:48:38 +0100 Subject: [PATCH 68/83] Decrease log level in "disks" --- src/Storages/MergeTree/DataPartsExchange.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index b671106f46a..7a38033a126 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -493,12 +493,12 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchSelectedPart( if (!disk) { - LOG_TRACE(log, "Disk for fetch is not provided, reserving space using storage balanced reservation"); + LOG_TEST(log, "Disk for fetch is not provided, reserving space using storage balanced reservation"); reservation = data.balancedReservation(metadata_snapshot, sum_files_size, 0, part_name, part_info, {}, tagger_ptr, &ttl_infos, true); if (!reservation) { - LOG_TRACE(log, "Disk for fetch is not provided, reserving space using TTL rules"); + LOG_TEST(log, "Disk for fetch is not provided, reserving space using TTL rules"); reservation = data.reserveSpacePreferringTTLRules(metadata_snapshot, sum_files_size, ttl_infos, std::time(nullptr), 0, true); } @@ -506,18 +506,18 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchSelectedPart( } else if (!disk) { - LOG_TRACE(log, "Making balanced reservation"); + LOG_TEST(log, "Making balanced reservation"); reservation = data.balancedReservation(metadata_snapshot, sum_files_size, 0, part_name, part_info, {}, tagger_ptr, nullptr); if (!reservation) { - LOG_TRACE(log, "Making simple reservation"); + LOG_TEST(log, "Making simple reservation"); reservation = data.reserveSpace(sum_files_size); } } } else if (!disk) { - LOG_TRACE(log, "Making reservation on the largest disk"); + LOG_TEST(log, "Making reservation on the largest disk"); /// We don't know real size of part because sender server version is too old reservation = data.makeEmptyReservationOnLargestDisk(); } @@ -525,11 +525,11 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchSelectedPart( if (!disk) { disk = reservation->getDisk(); - LOG_INFO(log, "Disk for fetch is not provided, getting disk from reservation {} with type {}", disk->getName(), toString(disk->getDataSourceDescription().type)); + LOG_TRACE(log, "Disk for fetch is not provided, getting disk from reservation {} with type {}", disk->getName(), toString(disk->getDataSourceDescription().type)); } else { - LOG_INFO(log, "Disk for fetch is disk {} with type {}", disk->getName(), toString(disk->getDataSourceDescription().type)); + LOG_TEST(log, "Disk for fetch is disk {} with type {}", disk->getName(), toString(disk->getDataSourceDescription().type)); } UInt64 revision = parse(in->getResponseCookie("disk_revision", "0")); From 135961acb7922f7e476335da9f8b772c8ca61134 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 27 Feb 2023 17:59:19 +0100 Subject: [PATCH 69/83] Change the cherry-pick PR body --- tests/ci/cherry_pick.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/ci/cherry_pick.py b/tests/ci/cherry_pick.py index 7987ea2643f..585cfd52e87 100644 --- a/tests/ci/cherry_pick.py +++ b/tests/ci/cherry_pick.py @@ -52,7 +52,7 @@ class Labels: class ReleaseBranch: - CHERRYPICK_DESCRIPTION = """This pull-request is a first step of an automated \ + CHERRYPICK_DESCRIPTION = f"""This pull-request is a first step of an automated \ backporting. It contains changes like after calling a local command `git cherry-pick`. If you intend to continue backporting this changes, then resolve all conflicts if any. @@ -60,13 +60,16 @@ Otherwise, if you do not want to backport them, then just close this pull-reques The check results does not matter at this step - you can safely ignore them. Also this pull-request will be merged automatically as it reaches the mergeable state, \ - but you always can merge it manually. +**do not merge it manually**. + +If it stuck, check the original PR for `{Labels.BACKPORTS_CREATED}` and delete it if \ +necessary. """ BACKPORT_DESCRIPTION = """This pull-request is a last step of an automated \ backporting. Treat it as a standard pull-request: look at the checks and resolve conflicts. Merge it only if you intend to backport changes to the target branch, otherwise just \ - close it. +close it. """ REMOTE = "" From 36e65f5f84ef01fe4526f83aeb43dd806752426d Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 27 Feb 2023 19:00:40 +0100 Subject: [PATCH 70/83] Use versions vith dots --- docs/en/interfaces/formats.md | 2 +- docs/en/operations/settings/settings-formats.md | 4 ++-- src/Core/Settings.h | 2 +- src/Core/SettingsChangesHistory.h | 2 +- src/Core/SettingsEnums.cpp | 8 ++++---- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 27cab588e15..b2b2c6d5b1e 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1972,7 +1972,7 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t - [input_format_parquet_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_allow_missing_columns) - allow missing columns while reading Parquet data. Default value - `false`. - [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`. - [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`. -- [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `v2_latest`. +- [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `2.latest`. ## Arrow {#data-format-arrow} diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 6cc4ba5f2c9..c15b72ce97c 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -1104,9 +1104,9 @@ Enabled by default. ### output_format_parquet_version {#output_format_parquet_version} -The version of Parquet format used in output format. Supported versions: `v1_0`, `v2_4`, `v2_6` and `v2_latest`. +The version of Parquet format used in output format. Supported versions: `1.0`, `2.4`, `2.6` and `2.latest`. -Default value: `v2_latest`. +Default value: `2.latest`. ## Hive format settings {#hive-format-settings} diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 82f0356349f..3908254b6f1 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -856,7 +856,7 @@ class IColumn; M(UInt64, output_format_parquet_row_group_size, 1000000, "Row group size in rows.", 0) \ M(Bool, output_format_parquet_string_as_string, false, "Use Parquet String type instead of Binary for String columns.", 0) \ M(Bool, output_format_parquet_fixed_string_as_fixed_byte_array, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary for FixedString columns.", 0) \ - M(ParquetVersion, output_format_parquet_version, "v2_latest", "Parquet format version for output format. Supported versions: v1_0, v2_4, v2_6 and v2_latest (default)", 0) \ + M(ParquetVersion, output_format_parquet_version, "2.latest", "Parquet format version for output format. Supported versions: 1.0, 2.4, 2.6 and 2.latest (default)", 0) \ M(String, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \ M(UInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \ M(String, output_format_avro_string_column_pattern, "", "For Avro format: regexp of String columns to select as AVRO string.", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 80d4958944e..04f328bb665 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -80,7 +80,7 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { - {"23.3", {{"output_format_parquet_version", "v1_0", "v2_latest", "Use latest Parquet format version for output format"}, + {"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"}, {"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}}}, {"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"}, {"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"}, diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 751fbf65281..9e1ab585bb0 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -174,9 +174,9 @@ IMPLEMENT_SETTING_ENUM(LocalFSReadMethod, ErrorCodes::BAD_ARGUMENTS, IMPLEMENT_SETTING_ENUM_WITH_RENAME(ParquetVersion, ErrorCodes::BAD_ARGUMENTS, - {{"v1_0", FormatSettings::ParquetVersion::V1_0}, - {"v2_4", FormatSettings::ParquetVersion::V2_4}, - {"v2_6", FormatSettings::ParquetVersion::V2_6}, - {"v2_latest", FormatSettings::ParquetVersion::V2_LATEST}}) + {{"1.0", FormatSettings::ParquetVersion::V1_0}, + {"2.4", FormatSettings::ParquetVersion::V2_4}, + {"2.6", FormatSettings::ParquetVersion::V2_6}, + {"2.latest", FormatSettings::ParquetVersion::V2_LATEST}}) } From af3b8d5cbdcf7d0435184fb6d1acf69ef0efa437 Mon Sep 17 00:00:00 2001 From: Pradeep Chhetri <30620077+chhetripradeep@users.noreply.github.com> Date: Tue, 28 Feb 2023 02:15:15 +0800 Subject: [PATCH 71/83] Revert "Add join_algorithm='grace_hash' to stress tests" --- docker/test/stress/stress | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/docker/test/stress/stress b/docker/test/stress/stress index 4ed8ec838f2..c62bb615183 100755 --- a/docker/test/stress/stress +++ b/docker/test/stress/stress @@ -30,15 +30,13 @@ def get_options(i, backward_compatibility_check): if i % 2 == 1: join_alg_num = i // 2 - if join_alg_num % 5 == 0: + if join_alg_num % 4 == 0: client_options.append("join_algorithm='parallel_hash'") - if join_alg_num % 5 == 1: + if join_alg_num % 4 == 1: client_options.append("join_algorithm='partial_merge'") - if join_alg_num % 5 == 2: + if join_alg_num % 4 == 2: client_options.append("join_algorithm='full_sorting_merge'") - if join_alg_num % 5 == 3: - client_options.append("join_algorithm='grace_hash'") - if join_alg_num % 5 == 4: + if join_alg_num % 4 == 3: client_options.append("join_algorithm='auto'") client_options.append('max_rows_in_join=1000') From 76d0fb3cacc6e1a5810b488d265d7fda9c54d8e0 Mon Sep 17 00:00:00 2001 From: Julio Jimenez Date: Mon, 27 Feb 2023 14:43:16 -0500 Subject: [PATCH 72/83] =?UTF-8?q?=F0=9F=90=B3=20Update=20Ubuntu=20Image=20?= =?UTF-8?q?(#46784)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * :whale: Update Ubuntu Image Signed-off-by: Julio Jimenez * Ready for testing Signed-off-by: Julio Jimenez * add back wget Signed-off-by: Julio Jimenez * add back wget Signed-off-by: Julio Jimenez * add back wget Signed-off-by: Julio Jimenez --------- Signed-off-by: Julio Jimenez --- docker/server/Dockerfile.ubuntu | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 961c528f19c..5dbb244c298 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -1,4 +1,4 @@ -FROM ubuntu:20.04 +FROM ubuntu:22.04 # see https://github.com/moby/moby/issues/4032#issuecomment-192327844 ARG DEBIAN_FRONTEND=noninteractive @@ -9,13 +9,14 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list && groupadd -r clickhouse --gid=101 \ && useradd -r -g clickhouse --uid=101 --home-dir=/var/lib/clickhouse --shell=/bin/bash clickhouse \ && apt-get update \ + && apt-get upgrade -yq \ && apt-get install --yes --no-install-recommends \ apt-transport-https \ ca-certificates \ dirmngr \ - gnupg \ - locales \ + gnupg2 \ wget \ + locales \ tzdata \ && apt-get clean @@ -80,15 +81,8 @@ RUN arch=${TARGETARCH:-amd64} \ && mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client \ && chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client -# Remove as much of Ubuntu as possible. -# ClickHouse does not need Ubuntu. It can run on top of Linux kernel without any OS distribution. -# ClickHouse does not need Docker at all. ClickHouse is above all that. -# It does not care about Ubuntu, Docker, or other cruft and you should neither. -# The fact that this Docker image is based on Ubuntu is just a misconception. -# Some vulnerability scanners are arguing about Ubuntu, which is not relevant to ClickHouse at all. -# ClickHouse does not care when you report false vulnerabilities by running some Docker scanners. - -RUN apt-get remove --purge -y libksba8 && apt-get autoremove -y +RUN apt-get autoremove --purge -yq libksba8 && \ + apt-get autoremove -yq # we need to allow "others" access to clickhouse folder, because docker container # can be started with arbitrary uid (openshift usecase) From e5f4ba75437118983cc490c000983cc561ced6ba Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 27 Feb 2023 19:44:37 +0000 Subject: [PATCH 73/83] Rename recent tests to fix order --- ...{02661_window_ntile.reference => 02560_window_ntile.reference} | 0 .../{02661_window_ntile.sql => 02560_window_ntile.sql} | 0 ...rence => 02561_sorting_constants_and_distinct_crash.reference} | 0 ...t_crash.sql => 02561_sorting_constants_and_distinct_crash.sql} | 0 ...65_regexp_extract.reference => 02562_regexp_extract.reference} | 0 .../{02665_regexp_extract.sql => 02562_regexp_extract.sql} | 0 ...erence => 02563_progress_when_no_rows_from_prewhere.reference} | 0 ...m_prewhere.sh => 02563_progress_when_no_rows_from_prewhere.sh} | 0 ...al_desc.reference => 02564_read_in_order_final_desc.reference} | 0 ...in_order_final_desc.sql => 02564_read_in_order_final_desc.sql} | 0 ...settings.reference => 02565_analyzer_limit_settings.reference} | 0 ...lyzer_limit_settings.sql => 02565_analyzer_limit_settings.sql} | 0 ...erence => 02566_analyzer_limit_settings_distributed.reference} | 0 ...tributed.sql => 02566_analyzer_limit_settings_distributed.sql} | 0 ..._and_consistency.reference => 02567_and_consistency.reference} | 0 .../{02667_and_consistency.sql => 02567_and_consistency.sql} | 0 ....reference => 02568_array_map_const_low_cardinality.reference} | 0 ..._cardinality.sql => 02568_array_map_const_low_cardinality.sql} | 0 ...n_array_length.reference => 02568_json_array_length.reference} | 0 .../{02667_json_array_length.sql => 02568_json_array_length.sql} | 0 ...sult.reference => 02569_order_by_aggregation_result.reference} | 0 ...gregation_result.sql => 02569_order_by_aggregation_result.sql} | 0 ...nsert.reference => 02570_fallback_from_async_insert.reference} | 0 ...k_from_async_insert.sh => 02570_fallback_from_async_insert.sh} | 0 ...reference => 02571_local_desc_abort_on_twitter_json.reference} | 0 ..._twitter_json.sh => 02571_local_desc_abort_on_twitter_json.sh} | 0 ..._intersections.reference => 02572_max_intersections.reference} | 0 .../{02670_max_intersections.sql => 02572_max_intersections.sql} | 0 ...ile_fuse_msan.reference => 02573_quantile_fuse_msan.reference} | 0 ...{02671_quantile_fuse_msan.sql => 02573_quantile_fuse_msan.sql} | 0 ....reference => 02574_suspicious_low_cardinality_msan.reference} | 0 ...inality_msan.sql => 02574_suspicious_low_cardinality_msan.sql} | 0 ...ap_hashing_msan.reference => 02575_map_hashing_msan.reference} | 0 .../{02673_map_hashing_msan.sql => 02575_map_hashing_msan.sql} | 0 ....reference => 02576_predicate_push_down_sorting_fix.reference} | 0 ..._sorting_fix.sql => 02576_predicate_push_down_sorting_fix.sql} | 0 ..._has.reference => 02576_rewrite_array_exists_to_has.reference} | 0 ...ray_exists_to_has.sql => 02576_rewrite_array_exists_to_has.sh} | 0 ...e.reference => 02577_analyzer_array_join_calc_twice.reference} | 0 ...in_calc_twice.sql => 02577_analyzer_array_join_calc_twice.sql} | 0 ...e_update.reference => 02577_keepermap_delete_update.reference} | 0 ...permap_delete_update.sql => 02577_keepermap_delete_update.sql} | 0 ...38_ipv4_codec_t64.reference => 02578_ipv4_codec_t64.reference} | 0 .../{25338_ipv4_codec_t64.sql => 02578_ipv4_codec_t64.sql} | 0 ...ies.reference => 02578_parameterized_rename_queries.reference} | 0 ..._rename_queries.sql => 02578_parameterized_rename_queries.sql} | 0 ...ill_empty_chunk.reference => 02579_fill_empty_chunk.reference} | 0 .../{25339_fill_empty_chunk.sql => 02579_fill_empty_chunk.sql} | 0 ...ed_replace.reference => 02579_parameterized_replace.reference} | 0 ..._parameterized_replace.sql => 02579_parameterized_replace.sql} | 0 ...ch_bug.reference => 02580_like_substring_search_bug.reference} | 0 ...bstring_search_bug.sql => 02580_like_substring_search_bug.sql} | 0 52 files changed, 0 insertions(+), 0 deletions(-) rename tests/queries/0_stateless/{02661_window_ntile.reference => 02560_window_ntile.reference} (100%) rename tests/queries/0_stateless/{02661_window_ntile.sql => 02560_window_ntile.sql} (100%) rename tests/queries/0_stateless/{02662_sorting_constants_and_distinct_crash.reference => 02561_sorting_constants_and_distinct_crash.reference} (100%) rename tests/queries/0_stateless/{02662_sorting_constants_and_distinct_crash.sql => 02561_sorting_constants_and_distinct_crash.sql} (100%) rename tests/queries/0_stateless/{02665_regexp_extract.reference => 02562_regexp_extract.reference} (100%) rename tests/queries/0_stateless/{02665_regexp_extract.sql => 02562_regexp_extract.sql} (100%) rename tests/queries/0_stateless/{02666_progress_when_no_rows_from_prewhere.reference => 02563_progress_when_no_rows_from_prewhere.reference} (100%) rename tests/queries/0_stateless/{02666_progress_when_no_rows_from_prewhere.sh => 02563_progress_when_no_rows_from_prewhere.sh} (100%) rename tests/queries/0_stateless/{02666_read_in_order_final_desc.reference => 02564_read_in_order_final_desc.reference} (100%) rename tests/queries/0_stateless/{02666_read_in_order_final_desc.sql => 02564_read_in_order_final_desc.sql} (100%) rename tests/queries/0_stateless/{02667_analyzer_limit_settings.reference => 02565_analyzer_limit_settings.reference} (100%) rename tests/queries/0_stateless/{02667_analyzer_limit_settings.sql => 02565_analyzer_limit_settings.sql} (100%) rename tests/queries/0_stateless/{02667_analyzer_limit_settings_distributed.reference => 02566_analyzer_limit_settings_distributed.reference} (100%) rename tests/queries/0_stateless/{02667_analyzer_limit_settings_distributed.sql => 02566_analyzer_limit_settings_distributed.sql} (100%) rename tests/queries/0_stateless/{02667_and_consistency.reference => 02567_and_consistency.reference} (100%) rename tests/queries/0_stateless/{02667_and_consistency.sql => 02567_and_consistency.sql} (100%) rename tests/queries/0_stateless/{02667_array_map_const_low_cardinality.reference => 02568_array_map_const_low_cardinality.reference} (100%) rename tests/queries/0_stateless/{02667_array_map_const_low_cardinality.sql => 02568_array_map_const_low_cardinality.sql} (100%) rename tests/queries/0_stateless/{02667_json_array_length.reference => 02568_json_array_length.reference} (100%) rename tests/queries/0_stateless/{02667_json_array_length.sql => 02568_json_array_length.sql} (100%) rename tests/queries/0_stateless/{02667_order_by_aggregation_result.reference => 02569_order_by_aggregation_result.reference} (100%) rename tests/queries/0_stateless/{02667_order_by_aggregation_result.sql => 02569_order_by_aggregation_result.sql} (100%) rename tests/queries/0_stateless/{02668_fallback_from_async_insert.reference => 02570_fallback_from_async_insert.reference} (100%) rename tests/queries/0_stateless/{02668_fallback_from_async_insert.sh => 02570_fallback_from_async_insert.sh} (100%) rename tests/queries/0_stateless/{02669_local_desc_abort_on_twitter_json.reference => 02571_local_desc_abort_on_twitter_json.reference} (100%) rename tests/queries/0_stateless/{02669_local_desc_abort_on_twitter_json.sh => 02571_local_desc_abort_on_twitter_json.sh} (100%) rename tests/queries/0_stateless/{02670_max_intersections.reference => 02572_max_intersections.reference} (100%) rename tests/queries/0_stateless/{02670_max_intersections.sql => 02572_max_intersections.sql} (100%) rename tests/queries/0_stateless/{02671_quantile_fuse_msan.reference => 02573_quantile_fuse_msan.reference} (100%) rename tests/queries/0_stateless/{02671_quantile_fuse_msan.sql => 02573_quantile_fuse_msan.sql} (100%) rename tests/queries/0_stateless/{02672_suspicious_low_cardinality_msan.reference => 02574_suspicious_low_cardinality_msan.reference} (100%) rename tests/queries/0_stateless/{02672_suspicious_low_cardinality_msan.sql => 02574_suspicious_low_cardinality_msan.sql} (100%) rename tests/queries/0_stateless/{02673_map_hashing_msan.reference => 02575_map_hashing_msan.reference} (100%) rename tests/queries/0_stateless/{02673_map_hashing_msan.sql => 02575_map_hashing_msan.sql} (100%) rename tests/queries/0_stateless/{25337_predicate_push_down_sorting_fix.reference => 02576_predicate_push_down_sorting_fix.reference} (100%) rename tests/queries/0_stateless/{25337_predicate_push_down_sorting_fix.sql => 02576_predicate_push_down_sorting_fix.sql} (100%) rename tests/queries/0_stateless/{02660_rewrite_array_exists_to_has.reference => 02576_rewrite_array_exists_to_has.reference} (100%) rename tests/queries/0_stateless/{02660_rewrite_array_exists_to_has.sql => 02576_rewrite_array_exists_to_has.sh} (100%) rename tests/queries/0_stateless/{25338_analyzer_array_join_calc_twice.reference => 02577_analyzer_array_join_calc_twice.reference} (100%) rename tests/queries/0_stateless/{25338_analyzer_array_join_calc_twice.sql => 02577_analyzer_array_join_calc_twice.sql} (100%) rename tests/queries/0_stateless/{02661_keepermap_delete_update.reference => 02577_keepermap_delete_update.reference} (100%) rename tests/queries/0_stateless/{02661_keepermap_delete_update.sql => 02577_keepermap_delete_update.sql} (100%) rename tests/queries/0_stateless/{25338_ipv4_codec_t64.reference => 02578_ipv4_codec_t64.reference} (100%) rename tests/queries/0_stateless/{25338_ipv4_codec_t64.sql => 02578_ipv4_codec_t64.sql} (100%) rename tests/queries/0_stateless/{02661_parameterized_rename_queries.reference => 02578_parameterized_rename_queries.reference} (100%) rename tests/queries/0_stateless/{02661_parameterized_rename_queries.sql => 02578_parameterized_rename_queries.sql} (100%) rename tests/queries/0_stateless/{25339_fill_empty_chunk.reference => 02579_fill_empty_chunk.reference} (100%) rename tests/queries/0_stateless/{25339_fill_empty_chunk.sql => 02579_fill_empty_chunk.sql} (100%) rename tests/queries/0_stateless/{02661_parameterized_replace.reference => 02579_parameterized_replace.reference} (100%) rename tests/queries/0_stateless/{02661_parameterized_replace.sql => 02579_parameterized_replace.sql} (100%) rename tests/queries/0_stateless/{25339_like_substring_search_bug.reference => 02580_like_substring_search_bug.reference} (100%) rename tests/queries/0_stateless/{25339_like_substring_search_bug.sql => 02580_like_substring_search_bug.sql} (100%) diff --git a/tests/queries/0_stateless/02661_window_ntile.reference b/tests/queries/0_stateless/02560_window_ntile.reference similarity index 100% rename from tests/queries/0_stateless/02661_window_ntile.reference rename to tests/queries/0_stateless/02560_window_ntile.reference diff --git a/tests/queries/0_stateless/02661_window_ntile.sql b/tests/queries/0_stateless/02560_window_ntile.sql similarity index 100% rename from tests/queries/0_stateless/02661_window_ntile.sql rename to tests/queries/0_stateless/02560_window_ntile.sql diff --git a/tests/queries/0_stateless/02662_sorting_constants_and_distinct_crash.reference b/tests/queries/0_stateless/02561_sorting_constants_and_distinct_crash.reference similarity index 100% rename from tests/queries/0_stateless/02662_sorting_constants_and_distinct_crash.reference rename to tests/queries/0_stateless/02561_sorting_constants_and_distinct_crash.reference diff --git a/tests/queries/0_stateless/02662_sorting_constants_and_distinct_crash.sql b/tests/queries/0_stateless/02561_sorting_constants_and_distinct_crash.sql similarity index 100% rename from tests/queries/0_stateless/02662_sorting_constants_and_distinct_crash.sql rename to tests/queries/0_stateless/02561_sorting_constants_and_distinct_crash.sql diff --git a/tests/queries/0_stateless/02665_regexp_extract.reference b/tests/queries/0_stateless/02562_regexp_extract.reference similarity index 100% rename from tests/queries/0_stateless/02665_regexp_extract.reference rename to tests/queries/0_stateless/02562_regexp_extract.reference diff --git a/tests/queries/0_stateless/02665_regexp_extract.sql b/tests/queries/0_stateless/02562_regexp_extract.sql similarity index 100% rename from tests/queries/0_stateless/02665_regexp_extract.sql rename to tests/queries/0_stateless/02562_regexp_extract.sql diff --git a/tests/queries/0_stateless/02666_progress_when_no_rows_from_prewhere.reference b/tests/queries/0_stateless/02563_progress_when_no_rows_from_prewhere.reference similarity index 100% rename from tests/queries/0_stateless/02666_progress_when_no_rows_from_prewhere.reference rename to tests/queries/0_stateless/02563_progress_when_no_rows_from_prewhere.reference diff --git a/tests/queries/0_stateless/02666_progress_when_no_rows_from_prewhere.sh b/tests/queries/0_stateless/02563_progress_when_no_rows_from_prewhere.sh similarity index 100% rename from tests/queries/0_stateless/02666_progress_when_no_rows_from_prewhere.sh rename to tests/queries/0_stateless/02563_progress_when_no_rows_from_prewhere.sh diff --git a/tests/queries/0_stateless/02666_read_in_order_final_desc.reference b/tests/queries/0_stateless/02564_read_in_order_final_desc.reference similarity index 100% rename from tests/queries/0_stateless/02666_read_in_order_final_desc.reference rename to tests/queries/0_stateless/02564_read_in_order_final_desc.reference diff --git a/tests/queries/0_stateless/02666_read_in_order_final_desc.sql b/tests/queries/0_stateless/02564_read_in_order_final_desc.sql similarity index 100% rename from tests/queries/0_stateless/02666_read_in_order_final_desc.sql rename to tests/queries/0_stateless/02564_read_in_order_final_desc.sql diff --git a/tests/queries/0_stateless/02667_analyzer_limit_settings.reference b/tests/queries/0_stateless/02565_analyzer_limit_settings.reference similarity index 100% rename from tests/queries/0_stateless/02667_analyzer_limit_settings.reference rename to tests/queries/0_stateless/02565_analyzer_limit_settings.reference diff --git a/tests/queries/0_stateless/02667_analyzer_limit_settings.sql b/tests/queries/0_stateless/02565_analyzer_limit_settings.sql similarity index 100% rename from tests/queries/0_stateless/02667_analyzer_limit_settings.sql rename to tests/queries/0_stateless/02565_analyzer_limit_settings.sql diff --git a/tests/queries/0_stateless/02667_analyzer_limit_settings_distributed.reference b/tests/queries/0_stateless/02566_analyzer_limit_settings_distributed.reference similarity index 100% rename from tests/queries/0_stateless/02667_analyzer_limit_settings_distributed.reference rename to tests/queries/0_stateless/02566_analyzer_limit_settings_distributed.reference diff --git a/tests/queries/0_stateless/02667_analyzer_limit_settings_distributed.sql b/tests/queries/0_stateless/02566_analyzer_limit_settings_distributed.sql similarity index 100% rename from tests/queries/0_stateless/02667_analyzer_limit_settings_distributed.sql rename to tests/queries/0_stateless/02566_analyzer_limit_settings_distributed.sql diff --git a/tests/queries/0_stateless/02667_and_consistency.reference b/tests/queries/0_stateless/02567_and_consistency.reference similarity index 100% rename from tests/queries/0_stateless/02667_and_consistency.reference rename to tests/queries/0_stateless/02567_and_consistency.reference diff --git a/tests/queries/0_stateless/02667_and_consistency.sql b/tests/queries/0_stateless/02567_and_consistency.sql similarity index 100% rename from tests/queries/0_stateless/02667_and_consistency.sql rename to tests/queries/0_stateless/02567_and_consistency.sql diff --git a/tests/queries/0_stateless/02667_array_map_const_low_cardinality.reference b/tests/queries/0_stateless/02568_array_map_const_low_cardinality.reference similarity index 100% rename from tests/queries/0_stateless/02667_array_map_const_low_cardinality.reference rename to tests/queries/0_stateless/02568_array_map_const_low_cardinality.reference diff --git a/tests/queries/0_stateless/02667_array_map_const_low_cardinality.sql b/tests/queries/0_stateless/02568_array_map_const_low_cardinality.sql similarity index 100% rename from tests/queries/0_stateless/02667_array_map_const_low_cardinality.sql rename to tests/queries/0_stateless/02568_array_map_const_low_cardinality.sql diff --git a/tests/queries/0_stateless/02667_json_array_length.reference b/tests/queries/0_stateless/02568_json_array_length.reference similarity index 100% rename from tests/queries/0_stateless/02667_json_array_length.reference rename to tests/queries/0_stateless/02568_json_array_length.reference diff --git a/tests/queries/0_stateless/02667_json_array_length.sql b/tests/queries/0_stateless/02568_json_array_length.sql similarity index 100% rename from tests/queries/0_stateless/02667_json_array_length.sql rename to tests/queries/0_stateless/02568_json_array_length.sql diff --git a/tests/queries/0_stateless/02667_order_by_aggregation_result.reference b/tests/queries/0_stateless/02569_order_by_aggregation_result.reference similarity index 100% rename from tests/queries/0_stateless/02667_order_by_aggregation_result.reference rename to tests/queries/0_stateless/02569_order_by_aggregation_result.reference diff --git a/tests/queries/0_stateless/02667_order_by_aggregation_result.sql b/tests/queries/0_stateless/02569_order_by_aggregation_result.sql similarity index 100% rename from tests/queries/0_stateless/02667_order_by_aggregation_result.sql rename to tests/queries/0_stateless/02569_order_by_aggregation_result.sql diff --git a/tests/queries/0_stateless/02668_fallback_from_async_insert.reference b/tests/queries/0_stateless/02570_fallback_from_async_insert.reference similarity index 100% rename from tests/queries/0_stateless/02668_fallback_from_async_insert.reference rename to tests/queries/0_stateless/02570_fallback_from_async_insert.reference diff --git a/tests/queries/0_stateless/02668_fallback_from_async_insert.sh b/tests/queries/0_stateless/02570_fallback_from_async_insert.sh similarity index 100% rename from tests/queries/0_stateless/02668_fallback_from_async_insert.sh rename to tests/queries/0_stateless/02570_fallback_from_async_insert.sh diff --git a/tests/queries/0_stateless/02669_local_desc_abort_on_twitter_json.reference b/tests/queries/0_stateless/02571_local_desc_abort_on_twitter_json.reference similarity index 100% rename from tests/queries/0_stateless/02669_local_desc_abort_on_twitter_json.reference rename to tests/queries/0_stateless/02571_local_desc_abort_on_twitter_json.reference diff --git a/tests/queries/0_stateless/02669_local_desc_abort_on_twitter_json.sh b/tests/queries/0_stateless/02571_local_desc_abort_on_twitter_json.sh similarity index 100% rename from tests/queries/0_stateless/02669_local_desc_abort_on_twitter_json.sh rename to tests/queries/0_stateless/02571_local_desc_abort_on_twitter_json.sh diff --git a/tests/queries/0_stateless/02670_max_intersections.reference b/tests/queries/0_stateless/02572_max_intersections.reference similarity index 100% rename from tests/queries/0_stateless/02670_max_intersections.reference rename to tests/queries/0_stateless/02572_max_intersections.reference diff --git a/tests/queries/0_stateless/02670_max_intersections.sql b/tests/queries/0_stateless/02572_max_intersections.sql similarity index 100% rename from tests/queries/0_stateless/02670_max_intersections.sql rename to tests/queries/0_stateless/02572_max_intersections.sql diff --git a/tests/queries/0_stateless/02671_quantile_fuse_msan.reference b/tests/queries/0_stateless/02573_quantile_fuse_msan.reference similarity index 100% rename from tests/queries/0_stateless/02671_quantile_fuse_msan.reference rename to tests/queries/0_stateless/02573_quantile_fuse_msan.reference diff --git a/tests/queries/0_stateless/02671_quantile_fuse_msan.sql b/tests/queries/0_stateless/02573_quantile_fuse_msan.sql similarity index 100% rename from tests/queries/0_stateless/02671_quantile_fuse_msan.sql rename to tests/queries/0_stateless/02573_quantile_fuse_msan.sql diff --git a/tests/queries/0_stateless/02672_suspicious_low_cardinality_msan.reference b/tests/queries/0_stateless/02574_suspicious_low_cardinality_msan.reference similarity index 100% rename from tests/queries/0_stateless/02672_suspicious_low_cardinality_msan.reference rename to tests/queries/0_stateless/02574_suspicious_low_cardinality_msan.reference diff --git a/tests/queries/0_stateless/02672_suspicious_low_cardinality_msan.sql b/tests/queries/0_stateless/02574_suspicious_low_cardinality_msan.sql similarity index 100% rename from tests/queries/0_stateless/02672_suspicious_low_cardinality_msan.sql rename to tests/queries/0_stateless/02574_suspicious_low_cardinality_msan.sql diff --git a/tests/queries/0_stateless/02673_map_hashing_msan.reference b/tests/queries/0_stateless/02575_map_hashing_msan.reference similarity index 100% rename from tests/queries/0_stateless/02673_map_hashing_msan.reference rename to tests/queries/0_stateless/02575_map_hashing_msan.reference diff --git a/tests/queries/0_stateless/02673_map_hashing_msan.sql b/tests/queries/0_stateless/02575_map_hashing_msan.sql similarity index 100% rename from tests/queries/0_stateless/02673_map_hashing_msan.sql rename to tests/queries/0_stateless/02575_map_hashing_msan.sql diff --git a/tests/queries/0_stateless/25337_predicate_push_down_sorting_fix.reference b/tests/queries/0_stateless/02576_predicate_push_down_sorting_fix.reference similarity index 100% rename from tests/queries/0_stateless/25337_predicate_push_down_sorting_fix.reference rename to tests/queries/0_stateless/02576_predicate_push_down_sorting_fix.reference diff --git a/tests/queries/0_stateless/25337_predicate_push_down_sorting_fix.sql b/tests/queries/0_stateless/02576_predicate_push_down_sorting_fix.sql similarity index 100% rename from tests/queries/0_stateless/25337_predicate_push_down_sorting_fix.sql rename to tests/queries/0_stateless/02576_predicate_push_down_sorting_fix.sql diff --git a/tests/queries/0_stateless/02660_rewrite_array_exists_to_has.reference b/tests/queries/0_stateless/02576_rewrite_array_exists_to_has.reference similarity index 100% rename from tests/queries/0_stateless/02660_rewrite_array_exists_to_has.reference rename to tests/queries/0_stateless/02576_rewrite_array_exists_to_has.reference diff --git a/tests/queries/0_stateless/02660_rewrite_array_exists_to_has.sql b/tests/queries/0_stateless/02576_rewrite_array_exists_to_has.sh similarity index 100% rename from tests/queries/0_stateless/02660_rewrite_array_exists_to_has.sql rename to tests/queries/0_stateless/02576_rewrite_array_exists_to_has.sh diff --git a/tests/queries/0_stateless/25338_analyzer_array_join_calc_twice.reference b/tests/queries/0_stateless/02577_analyzer_array_join_calc_twice.reference similarity index 100% rename from tests/queries/0_stateless/25338_analyzer_array_join_calc_twice.reference rename to tests/queries/0_stateless/02577_analyzer_array_join_calc_twice.reference diff --git a/tests/queries/0_stateless/25338_analyzer_array_join_calc_twice.sql b/tests/queries/0_stateless/02577_analyzer_array_join_calc_twice.sql similarity index 100% rename from tests/queries/0_stateless/25338_analyzer_array_join_calc_twice.sql rename to tests/queries/0_stateless/02577_analyzer_array_join_calc_twice.sql diff --git a/tests/queries/0_stateless/02661_keepermap_delete_update.reference b/tests/queries/0_stateless/02577_keepermap_delete_update.reference similarity index 100% rename from tests/queries/0_stateless/02661_keepermap_delete_update.reference rename to tests/queries/0_stateless/02577_keepermap_delete_update.reference diff --git a/tests/queries/0_stateless/02661_keepermap_delete_update.sql b/tests/queries/0_stateless/02577_keepermap_delete_update.sql similarity index 100% rename from tests/queries/0_stateless/02661_keepermap_delete_update.sql rename to tests/queries/0_stateless/02577_keepermap_delete_update.sql diff --git a/tests/queries/0_stateless/25338_ipv4_codec_t64.reference b/tests/queries/0_stateless/02578_ipv4_codec_t64.reference similarity index 100% rename from tests/queries/0_stateless/25338_ipv4_codec_t64.reference rename to tests/queries/0_stateless/02578_ipv4_codec_t64.reference diff --git a/tests/queries/0_stateless/25338_ipv4_codec_t64.sql b/tests/queries/0_stateless/02578_ipv4_codec_t64.sql similarity index 100% rename from tests/queries/0_stateless/25338_ipv4_codec_t64.sql rename to tests/queries/0_stateless/02578_ipv4_codec_t64.sql diff --git a/tests/queries/0_stateless/02661_parameterized_rename_queries.reference b/tests/queries/0_stateless/02578_parameterized_rename_queries.reference similarity index 100% rename from tests/queries/0_stateless/02661_parameterized_rename_queries.reference rename to tests/queries/0_stateless/02578_parameterized_rename_queries.reference diff --git a/tests/queries/0_stateless/02661_parameterized_rename_queries.sql b/tests/queries/0_stateless/02578_parameterized_rename_queries.sql similarity index 100% rename from tests/queries/0_stateless/02661_parameterized_rename_queries.sql rename to tests/queries/0_stateless/02578_parameterized_rename_queries.sql diff --git a/tests/queries/0_stateless/25339_fill_empty_chunk.reference b/tests/queries/0_stateless/02579_fill_empty_chunk.reference similarity index 100% rename from tests/queries/0_stateless/25339_fill_empty_chunk.reference rename to tests/queries/0_stateless/02579_fill_empty_chunk.reference diff --git a/tests/queries/0_stateless/25339_fill_empty_chunk.sql b/tests/queries/0_stateless/02579_fill_empty_chunk.sql similarity index 100% rename from tests/queries/0_stateless/25339_fill_empty_chunk.sql rename to tests/queries/0_stateless/02579_fill_empty_chunk.sql diff --git a/tests/queries/0_stateless/02661_parameterized_replace.reference b/tests/queries/0_stateless/02579_parameterized_replace.reference similarity index 100% rename from tests/queries/0_stateless/02661_parameterized_replace.reference rename to tests/queries/0_stateless/02579_parameterized_replace.reference diff --git a/tests/queries/0_stateless/02661_parameterized_replace.sql b/tests/queries/0_stateless/02579_parameterized_replace.sql similarity index 100% rename from tests/queries/0_stateless/02661_parameterized_replace.sql rename to tests/queries/0_stateless/02579_parameterized_replace.sql diff --git a/tests/queries/0_stateless/25339_like_substring_search_bug.reference b/tests/queries/0_stateless/02580_like_substring_search_bug.reference similarity index 100% rename from tests/queries/0_stateless/25339_like_substring_search_bug.reference rename to tests/queries/0_stateless/02580_like_substring_search_bug.reference diff --git a/tests/queries/0_stateless/25339_like_substring_search_bug.sql b/tests/queries/0_stateless/02580_like_substring_search_bug.sql similarity index 100% rename from tests/queries/0_stateless/25339_like_substring_search_bug.sql rename to tests/queries/0_stateless/02580_like_substring_search_bug.sql From 8a977a2b8318a5dfd74895527f6d78f3a493c6b7 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 27 Feb 2023 20:01:48 +0000 Subject: [PATCH 74/83] Fix extension typo --- ...ray_exists_to_has.sh => 02576_rewrite_array_exists_to_has.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/queries/0_stateless/{02576_rewrite_array_exists_to_has.sh => 02576_rewrite_array_exists_to_has.sql} (100%) diff --git a/tests/queries/0_stateless/02576_rewrite_array_exists_to_has.sh b/tests/queries/0_stateless/02576_rewrite_array_exists_to_has.sql similarity index 100% rename from tests/queries/0_stateless/02576_rewrite_array_exists_to_has.sh rename to tests/queries/0_stateless/02576_rewrite_array_exists_to_has.sql From 69fa64e85295272ae3eed1bef2ef70accaa5c4df Mon Sep 17 00:00:00 2001 From: Dan Roscigno Date: Mon, 27 Feb 2023 15:16:18 -0500 Subject: [PATCH 75/83] Add example of arrayStringConcat() closes https://github.com/ClickHouse/clickhouse-docs/issues/124 --- .../functions/splitting-merging-functions.md | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/splitting-merging-functions.md b/docs/en/sql-reference/functions/splitting-merging-functions.md index 7cad6b2fbbf..6015bb79b87 100644 --- a/docs/en/sql-reference/functions/splitting-merging-functions.md +++ b/docs/en/sql-reference/functions/splitting-merging-functions.md @@ -226,6 +226,17 @@ SELECT splitByNonAlpha(' 1! a, b. '); Concatenates string representations of values listed in the array with the separator. `separator` is an optional parameter: a constant string, set to an empty string by default. Returns the string. +**Example** + +``` sql +SELECT arrayStringConcat(['12/05/2021', '12:50:00'], ' ') AS DateString; +``` +```text +┌─DateString──────────┐ +│ 12/05/2021 12:50:00 │ +└─────────────────────┘ +``` + ## alphaTokens(s[, max_substrings]), splitByAlpha(s[, max_substrings]) Selects substrings of consecutive bytes from the ranges a-z and A-Z.Returns an array of substrings. @@ -364,4 +375,4 @@ Result: ┌─tokens────────────────────────────┐ │ ['test1','test2','test3','test4'] │ └───────────────────────────────────┘ -``` \ No newline at end of file +``` From f1e270b755c1f4f4bc31895962056b5cd1b12424 Mon Sep 17 00:00:00 2001 From: Dan Roscigno Date: Mon, 27 Feb 2023 15:41:52 -0500 Subject: [PATCH 76/83] Add link to Howto from Materialized view ref. closes https://github.com/ClickHouse/clickhouse-docs/issues/96 --- docs/en/sql-reference/statements/create/view.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index 527b31b36a4..acdede3c673 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -54,6 +54,10 @@ SELECT * FROM view(column1=value1, column2=value2 ...) CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE] AS SELECT ... ``` +:::tip +Here is a step by step guide on using [Materialized views](docs/en/guides/developer/cascading-materialized-views.md). +::: + Materialized views store data transformed by the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query. When creating a materialized view without `TO [db].[table]`, you must specify `ENGINE` – the table engine for storing data. From 69cdf401a4adb15ae8680b83e4a7ca5283d7b17a Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 27 Feb 2023 22:29:16 +0100 Subject: [PATCH 77/83] Add test for materialized view with materialized postgresql --- .../test.py | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/integration/test_postgresql_replica_database_engine_2/test.py b/tests/integration/test_postgresql_replica_database_engine_2/test.py index 68c7cb96b71..ba75177b89a 100644 --- a/tests/integration/test_postgresql_replica_database_engine_2/test.py +++ b/tests/integration/test_postgresql_replica_database_engine_2/test.py @@ -628,6 +628,27 @@ def test_table_override(started_cluster): assert_eq_with_retry(instance, query, expected) +def test_materialized_view(started_cluster): + cursor = pg_manager.get_db_cursor() + cursor.execute(f"DROP TABLE IF EXISTS test_table") + cursor.execute(f"CREATE TABLE test_table (key integer PRIMARY KEY, value integer)") + cursor.execute(f"INSERT INTO test_table SELECT 1, 2") + instance.query("DROP DATABASE IF EXISTS test_database") + instance.query( + "CREATE DATABASE test_database ENGINE = MaterializedPostgreSQL(postgres1) SETTINGS materialized_postgresql_tables_list='test_table'" + ) + check_tables_are_synchronized(instance, "test_table") + instance.query("DROP TABLE IF EXISTS mv") + instance.query( + "CREATE MATERIALIZED VIEW mv ENGINE=MergeTree ORDER BY tuple() POPULATE AS SELECT * FROM test_database.test_table" + ) + assert "1\t2" == instance.query("SELECT * FROM mv").strip() + cursor.execute(f"INSERT INTO test_table SELECT 3, 4") + check_tables_are_synchronized(instance, "test_table") + assert "1\t2\n3\t4" == instance.query("SELECT * FROM mv").strip() + pg_manager.drop_materialized_db() + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") From f4a8b099de9f9ac9fe52ef32df85b4fb0b90cfc5 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Mon, 27 Feb 2023 21:36:21 +0000 Subject: [PATCH 78/83] allow IPv4 in range() --- src/Functions/array/range.cpp | 13 +++++++++---- .../queries/0_stateless/02674_range_ipv4.reference | 3 +++ tests/queries/0_stateless/02674_range_ipv4.sql | 3 +++ 3 files changed, 15 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/02674_range_ipv4.reference create mode 100644 tests/queries/0_stateless/02674_range_ipv4.sql diff --git a/src/Functions/array/range.cpp b/src/Functions/array/range.cpp index dc09facb81b..f1f0fef8fd9 100644 --- a/src/Functions/array/range.cpp +++ b/src/Functions/array/range.cpp @@ -55,14 +55,19 @@ private: getName(), arguments.size()); } - for (const auto & arg : arguments) + DataTypes arg_types; + for (size_t i = 0, size = arguments.size(); i < size; ++i) { - if (!isInteger(arg)) + if (i < 2 && WhichDataType(arguments[i]).isIPv4()) + arg_types.emplace_back(std::make_shared()); + else if (isInteger(arguments[i])) + arg_types.push_back(arguments[i]); + else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", - arg->getName(), getName()); + arguments[i]->getName(), getName()); } - DataTypePtr common_type = getLeastSupertype(arguments); + DataTypePtr common_type = getLeastSupertype(arg_types); return std::make_shared(common_type); } diff --git a/tests/queries/0_stateless/02674_range_ipv4.reference b/tests/queries/0_stateless/02674_range_ipv4.reference new file mode 100644 index 00000000000..76fc0c45bd3 --- /dev/null +++ b/tests/queries/0_stateless/02674_range_ipv4.reference @@ -0,0 +1,3 @@ +[2887712768,2887712769,2887712770,2887712771,2887712772,2887712773,2887712774,2887712775,2887712776,2887712777] +[2887712768,2887712769,2887712770,2887712771,2887712772,2887712773,2887712774,2887712775,2887712776,2887712777] +[2887712768,2887712769,2887712770,2887712771,2887712772,2887712773,2887712774,2887712775,2887712776,2887712777] diff --git a/tests/queries/0_stateless/02674_range_ipv4.sql b/tests/queries/0_stateless/02674_range_ipv4.sql new file mode 100644 index 00000000000..1241b727014 --- /dev/null +++ b/tests/queries/0_stateless/02674_range_ipv4.sql @@ -0,0 +1,3 @@ +SELECT range(toIPv4('172.31.0.0'), toIPv4('172.31.0.10')); +SELECT range(2887712768, toIPv4('172.31.0.10')); +SELECT range(toIPv4('172.31.0.0'), 2887712778); From ab44740efb829d6d3eaa4663d6b89cf3c5dc3b1c Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 28 Feb 2023 00:26:11 +0100 Subject: [PATCH 79/83] Enable perf tests added in #45364 (#46623) --- tests/performance/aggregation_by_partitions.xml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/performance/aggregation_by_partitions.xml b/tests/performance/aggregation_by_partitions.xml index dbeaf3ce6aa..403f94d8ac8 100644 --- a/tests/performance/aggregation_by_partitions.xml +++ b/tests/performance/aggregation_by_partitions.xml @@ -1,9 +1,8 @@ - - - - + 1 + 1 + 256 0 256 From 3444059649c607ba0b9735fbf80cfef4779f1863 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Tue, 28 Feb 2023 00:42:52 +0100 Subject: [PATCH 80/83] Add thevar1able to trusted contributors (#46998) * Add thevar1able to trusted contributors * Automatic style fix --------- Co-authored-by: robot-clickhouse --- tests/ci/workflow_approve_rerun_lambda/app.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py index a4a5a013c36..decf6ce0393 100644 --- a/tests/ci/workflow_approve_rerun_lambda/app.py +++ b/tests/ci/workflow_approve_rerun_lambda/app.py @@ -123,6 +123,7 @@ TRUSTED_CONTRIBUTORS = { "tonickkozlov", # Cloudflare "tylerhannan", # ClickHouse Employee "myrrc", # Mike Kot, DoubleCloud + "thevar1able", # ClickHouse Employee ] } From f43e00c3cacf019d776c77b83cd2a8c10971e789 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 28 Feb 2023 11:32:33 +0100 Subject: [PATCH 81/83] Add order by to test to fix flakyness --- .../test_postgresql_replica_database_engine_2/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_postgresql_replica_database_engine_2/test.py b/tests/integration/test_postgresql_replica_database_engine_2/test.py index ba75177b89a..33796336550 100644 --- a/tests/integration/test_postgresql_replica_database_engine_2/test.py +++ b/tests/integration/test_postgresql_replica_database_engine_2/test.py @@ -645,7 +645,7 @@ def test_materialized_view(started_cluster): assert "1\t2" == instance.query("SELECT * FROM mv").strip() cursor.execute(f"INSERT INTO test_table SELECT 3, 4") check_tables_are_synchronized(instance, "test_table") - assert "1\t2\n3\t4" == instance.query("SELECT * FROM mv").strip() + assert "1\t2\n3\t4" == instance.query("SELECT * FROM mv ORDER BY 1, 2").strip() pg_manager.drop_materialized_db() From cf067c1b67c4e2a8b3a3657126fa7385598612dd Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 28 Feb 2023 12:00:08 +0100 Subject: [PATCH 82/83] Use /etc/default/clickhouse in systemd too --- packages/clickhouse-server.service | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/clickhouse-server.service b/packages/clickhouse-server.service index 037be826b97..bff2bf8fc2d 100644 --- a/packages/clickhouse-server.service +++ b/packages/clickhouse-server.service @@ -24,6 +24,8 @@ RuntimeDirectory=%p ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=%t/%p/%p.pid # Minus means that this file is optional. EnvironmentFile=-/etc/default/%p +# Bring back /etc/default/clickhouse for backward compatibility +EnvironmentFile=-/etc/default/clickhouse LimitCORE=infinity LimitNOFILE=500000 CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE From 57f451ad2d35f349a447efd845f719ede8ffdfc9 Mon Sep 17 00:00:00 2001 From: Dan Roscigno Date: Tue, 28 Feb 2023 09:47:32 -0500 Subject: [PATCH 83/83] Remove table from intro page The table was not kept up to date, removing as the list of table functions is in the nav. In the future the list will be auto generated below the intro material. Closes #46944 --- .../en/sql-reference/table-functions/index.md | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/docs/en/sql-reference/table-functions/index.md b/docs/en/sql-reference/table-functions/index.md index 94b23bc695c..b49c2f8da20 100644 --- a/docs/en/sql-reference/table-functions/index.md +++ b/docs/en/sql-reference/table-functions/index.md @@ -23,23 +23,3 @@ You can use table functions in: :::warning You can’t use table functions if the [allow_ddl](../../operations/settings/permissions-for-queries.md#settings_allow_ddl) setting is disabled. ::: - -| Function | Description | -|------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------| -| [file](../../sql-reference/table-functions/file.md) | Creates a [File](../../engines/table-engines/special/file.md)-engine table. | -| [merge](../../sql-reference/table-functions/merge.md) | Creates a [Merge](../../engines/table-engines/special/merge.md)-engine table. | -| [numbers](../../sql-reference/table-functions/numbers.md) | Creates a table with a single column filled with integer numbers. | -| [remote](../../sql-reference/table-functions/remote.md) | Allows you to access remote servers without creating a [Distributed](../../engines/table-engines/special/distributed.md)-engine table. | -| [url](../../sql-reference/table-functions/url.md) | Creates a [Url](../../engines/table-engines/special/url.md)-engine table. | -| [mysql](../../sql-reference/table-functions/mysql.md) | Creates a [MySQL](../../engines/table-engines/integrations/mysql.md)-engine table. | -| [postgresql](../../sql-reference/table-functions/postgresql.md) | Creates a [PostgreSQL](../../engines/table-engines/integrations/postgresql.md)-engine table. | -| [jdbc](../../sql-reference/table-functions/jdbc.md) | Creates a [JDBC](../../engines/table-engines/integrations/jdbc.md)-engine table. | -| [odbc](../../sql-reference/table-functions/odbc.md) | Creates a [ODBC](../../engines/table-engines/integrations/odbc.md)-engine table. | -| [hdfs](../../sql-reference/table-functions/hdfs.md) | Creates a [HDFS](../../engines/table-engines/integrations/hdfs.md)-engine table. | -| [s3](../../sql-reference/table-functions/s3.md) | Creates a [S3](../../engines/table-engines/integrations/s3.md)-engine table. | -| [sqlite](../../sql-reference/table-functions/sqlite.md) | Creates a [sqlite](../../engines/table-engines/integrations/sqlite.md)-engine table. | - -:::note -Only these table functions are enabled in readonly mode : -null, view, viewIfPermitted, numbers, numbers_mt, generateRandom, values, cluster, clusterAllReplicas -::: \ No newline at end of file