From 68858635647c6cb86003db3971bf6e94d417a073 Mon Sep 17 00:00:00 2001 From: Maksym Sobolyev Date: Wed, 8 Mar 2023 16:18:13 -0800 Subject: [PATCH 01/18] Use "SELECT FROM ONLY xyz", not "SELECT FROM xyz" while replicating postgresql tables, to properly handle inherited tables. Currently, it would fetch same data twice - once from the child tables and then from the parent table. --- src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index f9bfe1d174a..29314523860 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -354,7 +354,7 @@ StorageInfo PostgreSQLReplicationHandler::loadFromSnapshot(postgres::Connection /// Load from snapshot, which will show table state before creation of replication slot. /// Already connected to needed database, no need to add it to query. auto quoted_name = doubleQuoteWithSchema(table_name); - query_str = fmt::format("SELECT * FROM {}", quoted_name); + query_str = fmt::format("SELECT * FROM ONLY {}", quoted_name); LOG_DEBUG(log, "Loading PostgreSQL table {}.{}", postgres_database, quoted_name); auto table_structure = fetchTableStructure(*tx, table_name); From 7504e107acb4fc48a793fb542f17693b058b98c7 Mon Sep 17 00:00:00 2001 From: serxa Date: Tue, 21 Mar 2023 22:11:19 +0000 Subject: [PATCH 02/18] Fix CPU usage counters in segmentator thread --- .../Formats/Impl/ParallelParsingInputFormat.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp index 5ba32251a71..5881854571c 100644 --- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp @@ -18,6 +18,10 @@ void ParallelParsingInputFormat::segmentatorThreadFunction(ThreadGroupStatusPtr CurrentThread::attachToGroup(thread_group); setThreadName("Segmentator"); + + Stopwatch total_stopwatch {CLOCK_MONOTONIC_COARSE}; + UInt64 last_profile_events_update_time = 0; + try { while (!parsing_finished) @@ -50,6 +54,15 @@ void ParallelParsingInputFormat::segmentatorThreadFunction(ThreadGroupStatusPtr if (!have_more_data) break; + + // Segmentator thread can be long-living, so we have to manually update performance counters for CPU progress to be correct + constexpr UInt64 profile_events_update_period_microseconds = 10 * 1000; // 10 milliseconds + UInt64 total_elapsed_microseconds = total_stopwatch.elapsedMicroseconds(); + if (last_profile_events_update_time + profile_events_update_period_microseconds < total_elapsed_microseconds) + { + CurrentThread::updatePerformanceCounters(); + last_profile_events_update_time = total_elapsed_microseconds; + } } } catch (...) From f3c12b854386f9c57fe5f8a40193eaae3471a9f8 Mon Sep 17 00:00:00 2001 From: serxa Date: Wed, 22 Mar 2023 13:51:07 +0000 Subject: [PATCH 03/18] fix per-thread perf counters update --- src/Common/CurrentThread.cpp | 7 +++++++ src/Common/CurrentThread.h | 1 + src/Common/ThreadStatus.cpp | 11 +++++++++++ src/Common/ThreadStatus.h | 4 ++++ src/Common/mysqlxx/Pool.cpp | 14 +------------- .../Formats/Impl/ParallelParsingInputFormat.cpp | 11 +---------- src/QueryPipeline/ReadProgressCallback.cpp | 15 +++------------ src/QueryPipeline/ReadProgressCallback.h | 6 ++---- 8 files changed, 30 insertions(+), 39 deletions(-) diff --git a/src/Common/CurrentThread.cpp b/src/Common/CurrentThread.cpp index 7fd82426522..6ec46d6508c 100644 --- a/src/Common/CurrentThread.cpp +++ b/src/Common/CurrentThread.cpp @@ -25,6 +25,13 @@ void CurrentThread::updatePerformanceCounters() current_thread->updatePerformanceCounters(); } +void CurrentThread::updatePerformanceCountersIfNeeded() +{ + if (unlikely(!current_thread)) + return; + current_thread->updatePerformanceCountersIfNeeded(); +} + bool CurrentThread::isInitialized() { return current_thread; diff --git a/src/Common/CurrentThread.h b/src/Common/CurrentThread.h index 3b16163b1ba..373f7aa3e10 100644 --- a/src/Common/CurrentThread.h +++ b/src/Common/CurrentThread.h @@ -53,6 +53,7 @@ public: /// Makes system calls to update ProfileEvents that contain info from rusage and taskstats static void updatePerformanceCounters(); + static void updatePerformanceCountersIfNeeded(); static ProfileEvents::Counters & getProfileEvents(); inline ALWAYS_INLINE static MemoryTracker * getMemoryTracker() diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index aa1690890d8..91c695216a8 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -217,6 +217,17 @@ void ThreadStatus::updatePerformanceCounters() } } +void ThreadStatus::updatePerformanceCountersIfNeeded() +{ + constexpr UInt64 performance_counters_update_period_microseconds = 10 * 1000; // 10 milliseconds + UInt64 total_elapsed_microseconds = stopwatch.elapsedMicroseconds(); + if (last_performance_counters_update_time + performance_counters_update_period_microseconds < total_elapsed_microseconds) + { + CurrentThread::updatePerformanceCounters(); + last_performance_counters_update_time = total_elapsed_microseconds; + } +} + void ThreadStatus::onFatalError() { if (fatal_error_callback) diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index 77c924f9650..79474f292ec 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -202,6 +203,8 @@ private: /// Use ptr not to add extra dependencies in the header std::unique_ptr last_rusage; std::unique_ptr taskstats; + Stopwatch stopwatch{CLOCK_MONOTONIC_COARSE}; + UInt64 last_performance_counters_update_time = 0; /// See setInternalThread() bool internal_thread = false; @@ -265,6 +268,7 @@ public: /// Update several ProfileEvents counters void updatePerformanceCounters(); + void updatePerformanceCountersIfNeeded(); /// Update ProfileEvents and dumps info to system.query_thread_log void finalizePerformanceCounters(); diff --git a/src/Common/mysqlxx/Pool.cpp b/src/Common/mysqlxx/Pool.cpp index 6cd1ae8b399..6438d76cc3a 100644 --- a/src/Common/mysqlxx/Pool.cpp +++ b/src/Common/mysqlxx/Pool.cpp @@ -10,22 +10,10 @@ #include #include #include +#include #include -namespace -{ - -inline uint64_t clock_gettime_ns(clockid_t clock_type = CLOCK_MONOTONIC) -{ - struct timespec ts; - clock_gettime(clock_type, &ts); - return uint64_t(ts.tv_sec * 1000000000LL + ts.tv_nsec); -} - -} - - namespace mysqlxx { diff --git a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp index 5881854571c..a2e5074efb1 100644 --- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp @@ -19,9 +19,6 @@ void ParallelParsingInputFormat::segmentatorThreadFunction(ThreadGroupStatusPtr setThreadName("Segmentator"); - Stopwatch total_stopwatch {CLOCK_MONOTONIC_COARSE}; - UInt64 last_profile_events_update_time = 0; - try { while (!parsing_finished) @@ -56,13 +53,7 @@ void ParallelParsingInputFormat::segmentatorThreadFunction(ThreadGroupStatusPtr break; // Segmentator thread can be long-living, so we have to manually update performance counters for CPU progress to be correct - constexpr UInt64 profile_events_update_period_microseconds = 10 * 1000; // 10 milliseconds - UInt64 total_elapsed_microseconds = total_stopwatch.elapsedMicroseconds(); - if (last_profile_events_update_time + profile_events_update_period_microseconds < total_elapsed_microseconds) - { - CurrentThread::updatePerformanceCounters(); - last_profile_events_update_time = total_elapsed_microseconds; - } + CurrentThread::updatePerformanceCountersIfNeeded(); } } catch (...) diff --git a/src/QueryPipeline/ReadProgressCallback.cpp b/src/QueryPipeline/ReadProgressCallback.cpp index 6692b0f96bd..0f50d56f1a5 100644 --- a/src/QueryPipeline/ReadProgressCallback.cpp +++ b/src/QueryPipeline/ReadProgressCallback.cpp @@ -112,22 +112,13 @@ bool ReadProgressCallback::onProgress(uint64_t read_rows, uint64_t read_bytes, c size_t total_rows = progress.total_rows_to_read; - constexpr UInt64 profile_events_update_period_microseconds = 10 * 1000; // 10 milliseconds - UInt64 total_elapsed_microseconds = total_stopwatch.elapsedMicroseconds(); + CurrentThread::updatePerformanceCountersIfNeeded(); - std::lock_guard lock(last_profile_events_update_time_mutex); - { - if (last_profile_events_update_time + profile_events_update_period_microseconds < total_elapsed_microseconds) - { - /// TODO: Should be done in PipelineExecutor. - CurrentThread::updatePerformanceCounters(); - last_profile_events_update_time = total_elapsed_microseconds; - } - } + std::lock_guard lock(limits_and_quotas_mutex); /// TODO: Should be done in PipelineExecutor. for (const auto & limits : storage_limits) - limits.local_limits.speed_limits.throttle(progress.read_rows, progress.read_bytes, total_rows, total_elapsed_microseconds); + limits.local_limits.speed_limits.throttle(progress.read_rows, progress.read_bytes, total_rows, total_stopwatch.elapsedMicroseconds()); if (quota) quota->used({QuotaType::READ_ROWS, value.read_rows}, {QuotaType::READ_BYTES, value.read_bytes}); diff --git a/src/QueryPipeline/ReadProgressCallback.h b/src/QueryPipeline/ReadProgressCallback.h index c8f0d4cf537..08f2f9fc99b 100644 --- a/src/QueryPipeline/ReadProgressCallback.h +++ b/src/QueryPipeline/ReadProgressCallback.h @@ -38,10 +38,8 @@ private: /// The approximate total number of rows to read. For progress bar. std::atomic_size_t total_rows_approx = 0; - Stopwatch total_stopwatch {CLOCK_MONOTONIC_COARSE}; /// Time with waiting time. - /// According to total_stopwatch in microseconds. - UInt64 last_profile_events_update_time = 0; - std::mutex last_profile_events_update_time_mutex; + std::mutex limits_and_quotas_mutex; + Stopwatch total_stopwatch{CLOCK_MONOTONIC_COARSE}; /// Including waiting time bool update_profile_events = true; }; From 28ca2de86d3982b1b16aa292d5df6fe75ecda7d3 Mon Sep 17 00:00:00 2001 From: serxa Date: Fri, 24 Mar 2023 14:58:50 +0000 Subject: [PATCH 04/18] fix --- src/Common/ThreadStatus.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index 91c695216a8..16ce73cda20 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -223,7 +223,7 @@ void ThreadStatus::updatePerformanceCountersIfNeeded() UInt64 total_elapsed_microseconds = stopwatch.elapsedMicroseconds(); if (last_performance_counters_update_time + performance_counters_update_period_microseconds < total_elapsed_microseconds) { - CurrentThread::updatePerformanceCounters(); + updatePerformanceCounters(); last_performance_counters_update_time = total_elapsed_microseconds; } } From 98c9b1f75cc9452bedc129c3e41fbe794f42a0b1 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 24 Mar 2023 15:09:27 +0000 Subject: [PATCH 05/18] Automatic style fix --- docker/test/performance-comparison/perf.py | 2 ++ docker/test/performance-comparison/report.py | 1 - tests/ci/clickhouse_helper.py | 1 - tests/ci/docker_images_check.py | 1 - tests/ci/get_previous_release_tag.py | 1 - tests/ci/report.py | 2 +- tests/integration/helpers/cluster.py | 3 +-- tests/integration/helpers/network.py | 2 -- .../pytest_xdist_logging_to_separate_files.py | 1 + .../test_detach_part_wrong_partition_id.py | 1 - .../test_cluster_copier/test_three_nodes.py | 1 - .../test_cluster_copier/test_two_nodes.py | 1 - tests/integration/test_composable_protocols/test.py | 1 - .../test_create_query_constraints/test.py | 2 -- .../common.py | 1 - tests/integration/test_disks_app_func/test.py | 1 - .../test_distributed_ddl_parallel/test.py | 1 + tests/integration/test_fetch_memory_usage/test.py | 1 - .../scripts/stress_test.py | 1 - tests/integration/test_jbod_balancer/test.py | 1 - .../test_keeper_and_access_storage/test.py | 1 + tests/integration/test_keeper_back_to_back/test.py | 2 +- tests/integration/test_keeper_persistent_log/test.py | 1 - .../test_keeper_zookeeper_converter/test.py | 1 - tests/integration/test_merge_tree_load_parts/test.py | 6 +++--- .../s3_endpoint/endpoint.py | 1 - .../test_merge_tree_settings_constraints/test.py | 1 - .../test_old_parts_finally_removed/test.py | 1 - tests/integration/test_partition/test.py | 4 +++- tests/integration/test_password_constraints/test.py | 1 - tests/integration/test_read_only_table/test.py | 1 - .../test_reload_auxiliary_zookeepers/test.py | 1 - .../s3_endpoint/endpoint.py | 1 + tests/integration/test_s3_with_proxy/test.py | 1 + .../integration/test_ssl_cert_authentication/test.py | 1 - tests/integration/test_storage_kafka/kafka_pb2.py | 1 - .../test_storage_kafka/message_with_repeated_pb2.py | 1 - tests/integration/test_storage_kafka/social_pb2.py | 1 - tests/integration/test_storage_kafka/test.py | 12 ++---------- tests/integration/test_storage_nats/nats_pb2.py | 1 - .../test_storage_postgresql_replica/test.py | 1 - .../test_storage_rabbitmq/rabbitmq_pb2.py | 1 - tests/integration/test_storage_rabbitmq/test.py | 3 --- tests/integration/test_storage_s3/test.py | 1 + .../test_storage_s3/test_invalid_env_credentials.py | 1 + tests/integration/test_system_merges/test.py | 1 - tests/integration/test_ttl_move/test.py | 2 +- tests/integration/test_zero_copy_fetch/test.py | 1 - utils/changelog-simple/format-changelog.py | 1 + utils/keeper-overload/keeper-overload.py | 2 +- 50 files changed, 23 insertions(+), 57 deletions(-) diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py index 65bf49c2914..7a4e6386d0d 100755 --- a/docker/test/performance-comparison/perf.py +++ b/docker/test/performance-comparison/perf.py @@ -26,6 +26,7 @@ logging.basicConfig( total_start_seconds = time.perf_counter() stage_start_seconds = total_start_seconds + # Thread executor that does not hides exception that happens during function # execution, and rethrows it after join() class SafeThread(Thread): @@ -158,6 +159,7 @@ for e in subst_elems: available_parameters[name] = values + # Takes parallel lists of templates, substitutes them with all combos of # parameters. The set of parameters is determined based on the first list. # Note: keep the order of queries -- sometimes we have DROP IF EXISTS diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py index 782cf29863c..214f2d550b4 100755 --- a/docker/test/performance-comparison/report.py +++ b/docker/test/performance-comparison/report.py @@ -670,7 +670,6 @@ if args.report == "main": ) elif args.report == "all-queries": - print((header_template.format())) add_tested_commits() diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index d60a9e6afd1..64b64896f66 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -141,7 +141,6 @@ def prepare_tests_results_for_clickhouse( report_url: str, check_name: str, ) -> List[dict]: - pull_request_url = "https://github.com/ClickHouse/ClickHouse/commits/master" base_ref = "master" head_ref = "master" diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 192d216614e..f2b1105b3b0 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -96,7 +96,6 @@ def get_images_dict(repo_path: str, image_file_path: str) -> ImagesDict: def get_changed_docker_images( pr_info: PRInfo, images_dict: ImagesDict ) -> Set[DockerImage]: - if not images_dict: return set() diff --git a/tests/ci/get_previous_release_tag.py b/tests/ci/get_previous_release_tag.py index c6fe6cd5fb5..c2d279f7fec 100755 --- a/tests/ci/get_previous_release_tag.py +++ b/tests/ci/get_previous_release_tag.py @@ -51,7 +51,6 @@ def find_previous_release( for release in releases: if release.version < server_version: - # Check if the artifact exists on GitHub. # It can be not true for a short period of time # after creating a tag for a new release before uploading the packages. diff --git a/tests/ci/report.py b/tests/ci/report.py index 947fb33d905..ddee035d26f 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -473,7 +473,7 @@ def create_build_html_report( commit_url: str, ) -> str: rows = "" - for (build_result, build_log_url, artifact_urls) in zip( + for build_result, build_log_url, artifact_urls in zip( build_results, build_logs_urls, artifact_urls_list ): row = "" diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index dc5ada81995..a9a996e0a5f 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -63,6 +63,7 @@ DEFAULT_ENV_NAME = ".env" SANITIZER_SIGN = "==================" + # to create docker-compose env file def _create_env_file(path, variables): logging.debug(f"Env {variables} stored in {path}") @@ -1454,7 +1455,6 @@ class ClickHouseCluster: config_root_name="clickhouse", extra_configs=[], ) -> "ClickHouseInstance": - """Add an instance to the cluster. name - the name of the instance directory and the value of the 'instance' macro in ClickHouse. @@ -3089,7 +3089,6 @@ class ClickHouseInstance: config_root_name="clickhouse", extra_configs=[], ): - self.name = name self.base_cmd = cluster.base_cmd self.docker_id = cluster.get_instance_docker_id(self.name) diff --git a/tests/integration/helpers/network.py b/tests/integration/helpers/network.py index e408c9beec1..471aa2bdc2e 100644 --- a/tests/integration/helpers/network.py +++ b/tests/integration/helpers/network.py @@ -216,7 +216,6 @@ class _NetworkManager: container_exit_timeout=60, docker_api_version=os.environ.get("DOCKER_API_VERSION"), ): - self.container_expire_timeout = container_expire_timeout self.container_exit_timeout = container_exit_timeout @@ -232,7 +231,6 @@ class _NetworkManager: def _ensure_container(self): if self._container is None or self._container_expire_time <= time.time(): - for i in range(5): if self._container is not None: try: diff --git a/tests/integration/helpers/pytest_xdist_logging_to_separate_files.py b/tests/integration/helpers/pytest_xdist_logging_to_separate_files.py index d424ad58fa4..370aa23a014 100644 --- a/tests/integration/helpers/pytest_xdist_logging_to_separate_files.py +++ b/tests/integration/helpers/pytest_xdist_logging_to_separate_files.py @@ -1,6 +1,7 @@ import logging import os.path + # Makes the parallel workers of pytest-xdist to log to separate files. # Without this function all workers will log to the same log file # and mix everything together making it much more difficult for troubleshooting. diff --git a/tests/integration/test_backward_compatibility/test_detach_part_wrong_partition_id.py b/tests/integration/test_backward_compatibility/test_detach_part_wrong_partition_id.py index 02fccfae4e5..a6f7a8653da 100644 --- a/tests/integration/test_backward_compatibility/test_detach_part_wrong_partition_id.py +++ b/tests/integration/test_backward_compatibility/test_detach_part_wrong_partition_id.py @@ -24,7 +24,6 @@ def start_cluster(): def test_detach_part_wrong_partition_id(start_cluster): - # Here we create table with partition by UUID. node_21_6.query( "create table tab (id UUID, value UInt32) engine = MergeTree PARTITION BY (id) order by tuple()" diff --git a/tests/integration/test_cluster_copier/test_three_nodes.py b/tests/integration/test_cluster_copier/test_three_nodes.py index 31d6c0448f4..e7d07757adb 100644 --- a/tests/integration/test_cluster_copier/test_three_nodes.py +++ b/tests/integration/test_cluster_copier/test_three_nodes.py @@ -19,7 +19,6 @@ cluster = ClickHouseCluster(__file__) def started_cluster(): global cluster try: - for name in ["first", "second", "third"]: cluster.add_instance( name, diff --git a/tests/integration/test_cluster_copier/test_two_nodes.py b/tests/integration/test_cluster_copier/test_two_nodes.py index 10ab7d03b00..2b6fcf6cac2 100644 --- a/tests/integration/test_cluster_copier/test_two_nodes.py +++ b/tests/integration/test_cluster_copier/test_two_nodes.py @@ -19,7 +19,6 @@ cluster = ClickHouseCluster(__file__) def started_cluster(): global cluster try: - for name in ["first_of_two", "second_of_two"]: instance = cluster.add_instance( name, diff --git a/tests/integration/test_composable_protocols/test.py b/tests/integration/test_composable_protocols/test.py index bc87fea5296..df74cfffa54 100644 --- a/tests/integration/test_composable_protocols/test.py +++ b/tests/integration/test_composable_protocols/test.py @@ -63,7 +63,6 @@ def netcat(hostname, port, content): def test_connections(): - client = Client(server.ip_address, 9000, command=cluster.client_bin_path) assert client.query("SELECT 1") == "1\n" diff --git a/tests/integration/test_create_query_constraints/test.py b/tests/integration/test_create_query_constraints/test.py index 8df043fd24b..33c41b4f161 100644 --- a/tests/integration/test_create_query_constraints/test.py +++ b/tests/integration/test_create_query_constraints/test.py @@ -25,7 +25,6 @@ def start_cluster(): def test_create_query_const_constraints(): - instance.query("CREATE USER u_const SETTINGS max_threads = 1 CONST") instance.query("GRANT ALL ON *.* TO u_const") @@ -57,7 +56,6 @@ def test_create_query_const_constraints(): def test_create_query_minmax_constraints(): - instance.query("CREATE USER u_minmax SETTINGS max_threads = 4 MIN 2 MAX 6") instance.query("GRANT ALL ON *.* TO u_minmax") diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/common.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/common.py index b38e81b0227..01addae2542 100644 --- a/tests/integration/test_dictionaries_all_layouts_separate_sources/common.py +++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/common.py @@ -348,7 +348,6 @@ class RangedLayoutTester(BaseLayoutTester): self.layouts = LAYOUTS_RANGED def execute(self, layout_name, node): - if layout_name not in self.layout_to_dictionary: raise RuntimeError("Source doesn't support layout: {}".format(layout_name)) diff --git a/tests/integration/test_disks_app_func/test.py b/tests/integration/test_disks_app_func/test.py index 027ef8feed0..2428c53854e 100644 --- a/tests/integration/test_disks_app_func/test.py +++ b/tests/integration/test_disks_app_func/test.py @@ -7,7 +7,6 @@ import pytest def started_cluster(): global cluster try: - cluster = ClickHouseCluster(__file__) cluster.add_instance( "disks_app_test", main_configs=["config.xml"], with_minio=True diff --git a/tests/integration/test_distributed_ddl_parallel/test.py b/tests/integration/test_distributed_ddl_parallel/test.py index 6ebfe472e09..eb98dd3e230 100644 --- a/tests/integration/test_distributed_ddl_parallel/test.py +++ b/tests/integration/test_distributed_ddl_parallel/test.py @@ -10,6 +10,7 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) + # By default the exceptions that was throwed in threads will be ignored # (they will not mark the test as failed, only printed to stderr). # diff --git a/tests/integration/test_fetch_memory_usage/test.py b/tests/integration/test_fetch_memory_usage/test.py index a4371140150..7591cc0e8a9 100644 --- a/tests/integration/test_fetch_memory_usage/test.py +++ b/tests/integration/test_fetch_memory_usage/test.py @@ -18,7 +18,6 @@ def started_cluster(): def test_huge_column(started_cluster): - if ( node.is_built_with_thread_sanitizer() or node.is_built_with_memory_sanitizer() diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py index b8bafb3d0c1..fe69d72c1c7 100644 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py +++ b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py @@ -13,7 +13,6 @@ number_of_iterations = 100 def perform_request(): - buffer = BytesIO() crl = pycurl.Curl() crl.setopt(pycurl.INTERFACE, client_ip) diff --git a/tests/integration/test_jbod_balancer/test.py b/tests/integration/test_jbod_balancer/test.py index e746698611a..df34a075d5a 100644 --- a/tests/integration/test_jbod_balancer/test.py +++ b/tests/integration/test_jbod_balancer/test.py @@ -45,7 +45,6 @@ def start_cluster(): def check_balance(node, table): - partitions = node.query( """ WITH diff --git a/tests/integration/test_keeper_and_access_storage/test.py b/tests/integration/test_keeper_and_access_storage/test.py index 6ec307f7082..0314825b6b7 100644 --- a/tests/integration/test_keeper_and_access_storage/test.py +++ b/tests/integration/test_keeper_and_access_storage/test.py @@ -10,6 +10,7 @@ node1 = cluster.add_instance( "node1", main_configs=["configs/keeper.xml"], stay_alive=True ) + # test that server is able to start @pytest.fixture(scope="module") def started_cluster(): diff --git a/tests/integration/test_keeper_back_to_back/test.py b/tests/integration/test_keeper_back_to_back/test.py index 73fface02b4..b737ac284d2 100644 --- a/tests/integration/test_keeper_back_to_back/test.py +++ b/tests/integration/test_keeper_back_to_back/test.py @@ -546,7 +546,6 @@ def test_random_requests(started_cluster): def test_end_of_session(started_cluster): - fake_zk1 = None fake_zk2 = None genuine_zk1 = None @@ -685,6 +684,7 @@ def test_concurrent_watches(started_cluster): nonlocal watches_created nonlocal all_paths_created fake_zk.ensure_path(global_path + "/" + str(i)) + # new function each time def dumb_watch(event): nonlocal dumb_watch_triggered_counter diff --git a/tests/integration/test_keeper_persistent_log/test.py b/tests/integration/test_keeper_persistent_log/test.py index 70cc14fe26d..4164ffb33d3 100644 --- a/tests/integration/test_keeper_persistent_log/test.py +++ b/tests/integration/test_keeper_persistent_log/test.py @@ -163,7 +163,6 @@ def test_state_duplicate_restart(started_cluster): # http://zookeeper-user.578899.n2.nabble.com/Why-are-ephemeral-nodes-written-to-disk-tp7583403p7583418.html def test_ephemeral_after_restart(started_cluster): - try: node_zk = None node_zk2 = None diff --git a/tests/integration/test_keeper_zookeeper_converter/test.py b/tests/integration/test_keeper_zookeeper_converter/test.py index 063421bf922..de5a9416119 100644 --- a/tests/integration/test_keeper_zookeeper_converter/test.py +++ b/tests/integration/test_keeper_zookeeper_converter/test.py @@ -114,7 +114,6 @@ def start_clickhouse(): def copy_zookeeper_data(make_zk_snapshots): - if make_zk_snapshots: # force zookeeper to create snapshot generate_zk_snapshot() else: diff --git a/tests/integration/test_merge_tree_load_parts/test.py b/tests/integration/test_merge_tree_load_parts/test.py index 777b6f14fc6..dfbe00c8e28 100644 --- a/tests/integration/test_merge_tree_load_parts/test.py +++ b/tests/integration/test_merge_tree_load_parts/test.py @@ -148,17 +148,17 @@ def test_merge_tree_load_parts_corrupted(started_cluster): node1.query("SYSTEM WAIT LOADING PARTS mt_load_parts_2") def check_parts_loading(node, partition, loaded, failed, skipped): - for (min_block, max_block) in loaded: + for min_block, max_block in loaded: part_name = f"{partition}_{min_block}_{max_block}" assert node.contains_in_log(f"Loading Active part {part_name}") assert node.contains_in_log(f"Finished loading Active part {part_name}") - for (min_block, max_block) in failed: + for min_block, max_block in failed: part_name = f"{partition}_{min_block}_{max_block}" assert node.contains_in_log(f"Loading Active part {part_name}") assert not node.contains_in_log(f"Finished loading Active part {part_name}") - for (min_block, max_block) in skipped: + for min_block, max_block in skipped: part_name = f"{partition}_{min_block}_{max_block}" assert not node.contains_in_log(f"Loading Active part {part_name}") assert not node.contains_in_log(f"Finished loading Active part {part_name}") diff --git a/tests/integration/test_merge_tree_s3_failover/s3_endpoint/endpoint.py b/tests/integration/test_merge_tree_s3_failover/s3_endpoint/endpoint.py index b6567dfebc5..4613fdb850b 100644 --- a/tests/integration/test_merge_tree_s3_failover/s3_endpoint/endpoint.py +++ b/tests/integration/test_merge_tree_s3_failover/s3_endpoint/endpoint.py @@ -42,7 +42,6 @@ def delete(_bucket): @route("/<_bucket>/<_path:path>", ["GET", "POST", "PUT", "DELETE"]) def server(_bucket, _path): - # It's delete query for failed part if _path.endswith("delete"): response.set_header("Location", "http://minio1:9001/" + _bucket + "/" + _path) diff --git a/tests/integration/test_merge_tree_settings_constraints/test.py b/tests/integration/test_merge_tree_settings_constraints/test.py index 0bb0179108d..be6e2a31873 100644 --- a/tests/integration/test_merge_tree_settings_constraints/test.py +++ b/tests/integration/test_merge_tree_settings_constraints/test.py @@ -20,7 +20,6 @@ def start_cluster(): def test_merge_tree_settings_constraints(): - assert "Setting storage_policy should not be changed" in instance.query_and_get_error( f"CREATE TABLE wrong_table (number Int64) engine = MergeTree() ORDER BY number SETTINGS storage_policy = 'secret_policy'" ) diff --git a/tests/integration/test_old_parts_finally_removed/test.py b/tests/integration/test_old_parts_finally_removed/test.py index 108b72c5ccd..5347d433419 100644 --- a/tests/integration/test_old_parts_finally_removed/test.py +++ b/tests/integration/test_old_parts_finally_removed/test.py @@ -63,7 +63,6 @@ def test_part_finally_removed(started_cluster): ) for i in range(60): - if ( node1.query( "SELECT count() from system.parts WHERE table = 'drop_outdated_part'" diff --git a/tests/integration/test_partition/test.py b/tests/integration/test_partition/test.py index ae4393fc6f6..a34141c6189 100644 --- a/tests/integration/test_partition/test.py +++ b/tests/integration/test_partition/test.py @@ -528,7 +528,9 @@ def test_make_clone_in_detached(started_cluster): ["cp", "-r", path + "all_0_0_0", path + "detached/broken_all_0_0_0"] ) assert_eq_with_retry(instance, "select * from clone_in_detached", "\n") - assert ["broken_all_0_0_0",] == sorted( + assert [ + "broken_all_0_0_0", + ] == sorted( instance.exec_in_container(["ls", path + "detached/"]).strip().split("\n") ) diff --git a/tests/integration/test_password_constraints/test.py b/tests/integration/test_password_constraints/test.py index e3628861b28..9cdff51caa1 100644 --- a/tests/integration/test_password_constraints/test.py +++ b/tests/integration/test_password_constraints/test.py @@ -17,7 +17,6 @@ def start_cluster(): def test_complexity_rules(start_cluster): - error_message = "DB::Exception: Invalid password. The password should: be at least 12 characters long, contain at least 1 numeric character, contain at least 1 lowercase character, contain at least 1 uppercase character, contain at least 1 special character" assert error_message in node.query_and_get_error( "CREATE USER u_1 IDENTIFIED WITH plaintext_password BY ''" diff --git a/tests/integration/test_read_only_table/test.py b/tests/integration/test_read_only_table/test.py index 914c6a99508..df084f9dbbd 100644 --- a/tests/integration/test_read_only_table/test.py +++ b/tests/integration/test_read_only_table/test.py @@ -49,7 +49,6 @@ def start_cluster(): def test_restart_zookeeper(start_cluster): - for table_id in range(NUM_TABLES): node1.query( f"INSERT INTO test_table_{table_id} VALUES (1), (2), (3), (4), (5);" diff --git a/tests/integration/test_reload_auxiliary_zookeepers/test.py b/tests/integration/test_reload_auxiliary_zookeepers/test.py index bb1455333fc..476c5dee99e 100644 --- a/tests/integration/test_reload_auxiliary_zookeepers/test.py +++ b/tests/integration/test_reload_auxiliary_zookeepers/test.py @@ -20,7 +20,6 @@ def start_cluster(): def test_reload_auxiliary_zookeepers(start_cluster): - node.query( "CREATE TABLE simple (date Date, id UInt32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/simple', 'node') ORDER BY tuple() PARTITION BY date;" ) diff --git a/tests/integration/test_s3_aws_sdk_has_slightly_unreliable_behaviour/s3_endpoint/endpoint.py b/tests/integration/test_s3_aws_sdk_has_slightly_unreliable_behaviour/s3_endpoint/endpoint.py index d6a732cc681..1d33ca02f86 100644 --- a/tests/integration/test_s3_aws_sdk_has_slightly_unreliable_behaviour/s3_endpoint/endpoint.py +++ b/tests/integration/test_s3_aws_sdk_has_slightly_unreliable_behaviour/s3_endpoint/endpoint.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 from bottle import request, route, run, response + # Handle for MultipleObjectsDelete. @route("/<_bucket>", ["POST"]) def delete(_bucket): diff --git a/tests/integration/test_s3_with_proxy/test.py b/tests/integration/test_s3_with_proxy/test.py index 1102d190a87..1af040c3c30 100644 --- a/tests/integration/test_s3_with_proxy/test.py +++ b/tests/integration/test_s3_with_proxy/test.py @@ -5,6 +5,7 @@ import time import pytest from helpers.cluster import ClickHouseCluster + # Runs simple proxy resolver in python env container. def run_resolver(cluster): container_id = cluster.get_container_id("resolver") diff --git a/tests/integration/test_ssl_cert_authentication/test.py b/tests/integration/test_ssl_cert_authentication/test.py index 7c62ca0d8b6..b3570b6e281 100644 --- a/tests/integration/test_ssl_cert_authentication/test.py +++ b/tests/integration/test_ssl_cert_authentication/test.py @@ -87,7 +87,6 @@ config = """ def execute_query_native(node, query, user, cert_name): - config_path = f"{SCRIPT_DIR}/configs/client.xml" formatted = config.format( diff --git a/tests/integration/test_storage_kafka/kafka_pb2.py b/tests/integration/test_storage_kafka/kafka_pb2.py index 7de1363bbf1..3e47af6c1e0 100644 --- a/tests/integration/test_storage_kafka/kafka_pb2.py +++ b/tests/integration/test_storage_kafka/kafka_pb2.py @@ -21,7 +21,6 @@ _builder.BuildTopDescriptorsAndMessages( DESCRIPTOR, "clickhouse_path.format_schemas.kafka_pb2", globals() ) if _descriptor._USE_C_DESCRIPTORS == False: - DESCRIPTOR._options = None _KEYVALUEPAIR._serialized_start = 46 _KEYVALUEPAIR._serialized_end = 88 diff --git a/tests/integration/test_storage_kafka/message_with_repeated_pb2.py b/tests/integration/test_storage_kafka/message_with_repeated_pb2.py index 4d1a23c0b43..3715a9bea04 100644 --- a/tests/integration/test_storage_kafka/message_with_repeated_pb2.py +++ b/tests/integration/test_storage_kafka/message_with_repeated_pb2.py @@ -21,7 +21,6 @@ _builder.BuildTopDescriptorsAndMessages( DESCRIPTOR, "clickhouse_path.format_schemas.message_with_repeated_pb2", globals() ) if _descriptor._USE_C_DESCRIPTORS == False: - DESCRIPTOR._options = None DESCRIPTOR._serialized_options = b"H\001" _MESSAGE._serialized_start = 62 diff --git a/tests/integration/test_storage_kafka/social_pb2.py b/tests/integration/test_storage_kafka/social_pb2.py index 830ade81d33..f91a7bd0539 100644 --- a/tests/integration/test_storage_kafka/social_pb2.py +++ b/tests/integration/test_storage_kafka/social_pb2.py @@ -21,7 +21,6 @@ _builder.BuildTopDescriptorsAndMessages( DESCRIPTOR, "clickhouse_path.format_schemas.social_pb2", globals() ) if _descriptor._USE_C_DESCRIPTORS == False: - DESCRIPTOR._options = None _USER._serialized_start = 47 _USER._serialized_end = 90 diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 51952ac1eb7..3a4fa6c6bfe 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -121,7 +121,7 @@ def kafka_create_topic( def kafka_delete_topic(admin_client, topic, max_retries=50): result = admin_client.delete_topics([topic]) - for (topic, e) in result.topic_error_codes: + for topic, e in result.topic_error_codes: if e == 0: logging.debug(f"Topic {topic} deleted") else: @@ -917,9 +917,7 @@ def describe_consumer_group(kafka_cluster, name): member_info["client_id"] = client_id member_info["client_host"] = client_host member_topics_assignment = [] - for (topic, partitions) in MemberAssignment.decode( - member_assignment - ).assignment: + for topic, partitions in MemberAssignment.decode(member_assignment).assignment: member_topics_assignment.append({"topic": topic, "partitions": partitions}) member_info["assignment"] = member_topics_assignment res.append(member_info) @@ -1537,7 +1535,6 @@ def test_kafka_protobuf_no_delimiter(kafka_cluster): def test_kafka_materialized_view(kafka_cluster): - instance.query( """ DROP TABLE IF EXISTS test.view; @@ -2315,7 +2312,6 @@ def test_kafka_virtual_columns2(kafka_cluster): def test_kafka_produce_key_timestamp(kafka_cluster): - admin_client = KafkaAdminClient( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) ) @@ -2444,7 +2440,6 @@ def test_kafka_insert_avro(kafka_cluster): def test_kafka_produce_consume_avro(kafka_cluster): - admin_client = KafkaAdminClient( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) ) @@ -4031,7 +4026,6 @@ def test_kafka_predefined_configuration(kafka_cluster): # https://github.com/ClickHouse/ClickHouse/issues/26643 def test_issue26643(kafka_cluster): - # for backporting: # admin_client = KafkaAdminClient(bootstrap_servers="localhost:9092") admin_client = KafkaAdminClient( @@ -4313,7 +4307,6 @@ def test_row_based_formats(kafka_cluster): "RowBinaryWithNamesAndTypes", "MsgPack", ]: - print(format_name) kafka_create_topic(admin_client, format_name) @@ -4438,7 +4431,6 @@ def test_block_based_formats_2(kafka_cluster): "ORC", "JSONCompactColumns", ]: - kafka_create_topic(admin_client, format_name) instance.query( diff --git a/tests/integration/test_storage_nats/nats_pb2.py b/tests/integration/test_storage_nats/nats_pb2.py index 4330ff57950..e9e5cb72363 100644 --- a/tests/integration/test_storage_nats/nats_pb2.py +++ b/tests/integration/test_storage_nats/nats_pb2.py @@ -31,7 +31,6 @@ ProtoKeyValue = _reflection.GeneratedProtocolMessageType( _sym_db.RegisterMessage(ProtoKeyValue) if _descriptor._USE_C_DESCRIPTORS == False: - DESCRIPTOR._options = None _PROTOKEYVALUE._serialized_start = 45 _PROTOKEYVALUE._serialized_end = 88 diff --git a/tests/integration/test_storage_postgresql_replica/test.py b/tests/integration/test_storage_postgresql_replica/test.py index 5df8b9029e6..8666d7ae58c 100644 --- a/tests/integration/test_storage_postgresql_replica/test.py +++ b/tests/integration/test_storage_postgresql_replica/test.py @@ -706,7 +706,6 @@ def test_abrupt_connection_loss_while_heavy_replication(started_cluster): def test_abrupt_server_restart_while_heavy_replication(started_cluster): - # FIXME (kssenii) temporary disabled if instance.is_built_with_sanitizer(): pytest.skip("Temporary disabled (FIXME)") diff --git a/tests/integration/test_storage_rabbitmq/rabbitmq_pb2.py b/tests/integration/test_storage_rabbitmq/rabbitmq_pb2.py index e017b4e66c2..a5845652eef 100644 --- a/tests/integration/test_storage_rabbitmq/rabbitmq_pb2.py +++ b/tests/integration/test_storage_rabbitmq/rabbitmq_pb2.py @@ -21,7 +21,6 @@ _builder.BuildTopDescriptorsAndMessages( DESCRIPTOR, "clickhouse_path.format_schemas.rabbitmq_pb2", globals() ) if _descriptor._USE_C_DESCRIPTORS == False: - DESCRIPTOR._options = None _KEYVALUEPROTO._serialized_start = 49 _KEYVALUEPROTO._serialized_end = 92 diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 2e54f21787a..53b6c4109ef 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -2864,7 +2864,6 @@ def test_rabbitmq_predefined_configuration(rabbitmq_cluster): def test_rabbitmq_msgpack(rabbitmq_cluster): - instance.query( """ drop table if exists rabbit_in; @@ -2908,7 +2907,6 @@ def test_rabbitmq_msgpack(rabbitmq_cluster): def test_rabbitmq_address(rabbitmq_cluster): - instance2.query( """ drop table if exists rabbit_in; @@ -3243,7 +3241,6 @@ def test_block_based_formats_2(rabbitmq_cluster): "ORC", "JSONCompactColumns", ]: - print(format_name) instance.query( diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 8b20727a7b5..4d493d9526b 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -18,6 +18,7 @@ MINIO_INTERNAL_PORT = 9001 SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + # Creates S3 bucket for tests and allows anonymous read-write access to it. def prepare_s3_bucket(started_cluster): # Allows read-write access for bucket without authorization. diff --git a/tests/integration/test_storage_s3/test_invalid_env_credentials.py b/tests/integration/test_storage_s3/test_invalid_env_credentials.py index 2f5d9349904..aa6479a2ed3 100644 --- a/tests/integration/test_storage_s3/test_invalid_env_credentials.py +++ b/tests/integration/test_storage_s3/test_invalid_env_credentials.py @@ -11,6 +11,7 @@ MINIO_INTERNAL_PORT = 9001 SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + # Creates S3 bucket for tests and allows anonymous read-write access to it. def prepare_s3_bucket(started_cluster): # Allows read-write access for bucket without authorization. diff --git a/tests/integration/test_system_merges/test.py b/tests/integration/test_system_merges/test.py index 0a469bd7bbd..ff303afe19e 100644 --- a/tests/integration/test_system_merges/test.py +++ b/tests/integration/test_system_merges/test.py @@ -171,7 +171,6 @@ def test_mutation_simple(started_cluster, replicated): starting_block = 0 if replicated else 1 try: - for node in nodes: node.query( f"create table {name} (a Int64) engine={engine} order by tuple()" diff --git a/tests/integration/test_ttl_move/test.py b/tests/integration/test_ttl_move/test.py index 99978cbf6dc..89824293320 100644 --- a/tests/integration/test_ttl_move/test.py +++ b/tests/integration/test_ttl_move/test.py @@ -1863,7 +1863,7 @@ def test_ttl_move_if_exists(started_cluster, name, dest_type): ) ) - for (node, policy) in zip( + for node, policy in zip( [node1, node2], ["only_jbod_1", "small_jbod_with_external"] ): node.query( diff --git a/tests/integration/test_zero_copy_fetch/test.py b/tests/integration/test_zero_copy_fetch/test.py index b71752528d3..9b9aa5e0da7 100644 --- a/tests/integration/test_zero_copy_fetch/test.py +++ b/tests/integration/test_zero_copy_fetch/test.py @@ -16,7 +16,6 @@ cluster = ClickHouseCluster(__file__) @pytest.fixture(scope="module") def started_cluster(): try: - cluster.add_instance( "node1", main_configs=["configs/storage_conf.xml"], diff --git a/utils/changelog-simple/format-changelog.py b/utils/changelog-simple/format-changelog.py index d5e1518270e..01f2694dd0f 100755 --- a/utils/changelog-simple/format-changelog.py +++ b/utils/changelog-simple/format-changelog.py @@ -20,6 +20,7 @@ parser.add_argument( ) args = parser.parse_args() + # This function mirrors the PR description checks in ClickhousePullRequestTrigger. # Returns False if the PR should not be mentioned changelog. def parse_one_pull_request(item): diff --git a/utils/keeper-overload/keeper-overload.py b/utils/keeper-overload/keeper-overload.py index bdb4563c713..0a059b10588 100755 --- a/utils/keeper-overload/keeper-overload.py +++ b/utils/keeper-overload/keeper-overload.py @@ -166,7 +166,7 @@ def main(args): keeper_bench_path = args.keeper_bench_path keepers = [] - for (port, server_id) in zip(PORTS, SERVER_IDS): + for port, server_id in zip(PORTS, SERVER_IDS): keepers.append( Keeper( keeper_binary_path, server_id, port, workdir, args.with_thread_fuzzer From a0fcf81abfed85bf10caa4eae28fe36a2f164a4d Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 28 Mar 2023 18:25:52 +0000 Subject: [PATCH 06/18] Support more ClickHouse types in MsgPack format --- docs/en/interfaces/formats.md | 38 +++--- .../Formats/Impl/MsgPackRowInputFormat.cpp | 128 +++++++++++++++--- .../Formats/Impl/MsgPackRowInputFormat.h | 4 +- .../Formats/Impl/MsgPackRowOutputFormat.cpp | 64 ++++++++- .../02594_msgpack_more_types.reference | 2 + .../0_stateless/02594_msgpack_more_types.sh | 11 ++ 6 files changed, 208 insertions(+), 39 deletions(-) create mode 100644 tests/queries/0_stateless/02594_msgpack_more_types.reference create mode 100755 tests/queries/0_stateless/02594_msgpack_more_types.sh diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 8430946a6c6..d82f7c4ea3f 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -2281,22 +2281,28 @@ ClickHouse supports reading and writing [MessagePack](https://msgpack.org/) data ### Data Types Matching {#data-types-matching-msgpack} -| MessagePack data type (`INSERT`) | ClickHouse data type | MessagePack data type (`SELECT`) | -|--------------------------------------------------------------------|-----------------------------------------------------------------|------------------------------------| -| `uint N`, `positive fixint` | [UIntN](/docs/en/sql-reference/data-types/int-uint.md) | `uint N` | -| `int N`, `negative fixint` | [IntN](/docs/en/sql-reference/data-types/int-uint.md) | `int N` | -| `bool` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `uint 8` | -| `fixstr`, `str 8`, `str 16`, `str 32`, `bin 8`, `bin 16`, `bin 32` | [String](/docs/en/sql-reference/data-types/string.md) | `bin 8`, `bin 16`, `bin 32` | -| `fixstr`, `str 8`, `str 16`, `str 32`, `bin 8`, `bin 16`, `bin 32` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `bin 8`, `bin 16`, `bin 32` | -| `float 32` | [Float32](/docs/en/sql-reference/data-types/float.md) | `float 32` | -| `float 64` | [Float64](/docs/en/sql-reference/data-types/float.md) | `float 64` | -| `uint 16` | [Date](/docs/en/sql-reference/data-types/date.md) | `uint 16` | -| `uint 32` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `uint 32` | -| `uint 64` | [DateTime64](/docs/en/sql-reference/data-types/datetime.md) | `uint 64` | -| `fixarray`, `array 16`, `array 32` | [Array](/docs/en/sql-reference/data-types/array.md) | `fixarray`, `array 16`, `array 32` | -| `fixmap`, `map 16`, `map 32` | [Map](/docs/en/sql-reference/data-types/map.md) | `fixmap`, `map 16`, `map 32` | -| `uint 32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `uint 32` | -| `bin 8` | [String](/docs/en/sql-reference/data-types/string.md) | `bin 8` | +| MessagePack data type (`INSERT`) | ClickHouse data type | MessagePack data type (`SELECT`) | +|--------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------|----------------------------------| +| `uint N`, `positive fixint` | [UIntN](/docs/en/sql-reference/data-types/int-uint.md) | `uint N` | +| `int N`, `negative fixint` | [IntN](/docs/en/sql-reference/data-types/int-uint.md) | `int N` | +| `bool` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `uint 8` | +| `fixstr`, `str 8`, `str 16`, `str 32`, `bin 8`, `bin 16`, `bin 32` | [String](/docs/en/sql-reference/data-types/string.md) | `bin 8`, `bin 16`, `bin 32` | +| `fixstr`, `str 8`, `str 16`, `str 32`, `bin 8`, `bin 16`, `bin 32` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `bin 8`, `bin 16`, `bin 32` | +| `float 32` | [Float32](/docs/en/sql-reference/data-types/float.md) | `float 32` | +| `float 64` | [Float64](/docs/en/sql-reference/data-types/float.md) | `float 64` | +| `uint 16` | [Date](/docs/en/sql-reference/data-types/date.md) | `uint 16` | +| `int 32` | [Date32](/docs/en/sql-reference/data-types/date32.md) | `int 32` | +| `uint 32` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `uint 32` | +| `uint 64` | [DateTime64](/docs/en/sql-reference/data-types/datetime.md) | `uint 64` | +| `fixarray`, `array 16`, `array 32` | [Array](/docs/en/sql-reference/data-types/array.md)/[Tuple](/docs/en/sql-reference/data-types/tuple.md) | `fixarray`, `array 16`, `array 32` | +| `fixmap`, `map 16`, `map 32` | [Map](/docs/en/sql-reference/data-types/map.md) | `fixmap`, `map 16`, `map 32` | +| `uint 32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `uint 32` | +| `bin 8` | [String](/docs/en/sql-reference/data-types/string.md) | `bin 8` | +| `int 8` | [Enum8](/docs/en/sql-reference/data-types/enum.md) | `int 8` | +| `bin 8` | [(U)Int128/(U)Int256](/docs/en/sql-reference/data-types/int-uint.md) | `bin 8` | +| `int 32` | [Decimal32](/docs/en/sql-reference/data-types/decimal.md) | `int 32` | +| `int 64` | [Decimal64](/docs/en/sql-reference/data-types/decimal.md) | `int 64` | +| `bin 8` | [Decimal128/Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `bin 8 ` | Example: diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp index bc41b512f79..7ce58b9991d 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -25,6 +26,7 @@ #include #include +#include #include #include #include @@ -64,7 +66,7 @@ void MsgPackVisitor::set_info(IColumn & column, DataTypePtr type, UInt8 & read) { info_stack.pop(); } - info_stack.push(Info{column, type, &read}); + info_stack.push(Info{column, type, false, std::nullopt, &read}); } void MsgPackVisitor::reset() @@ -137,16 +139,19 @@ static void insertInteger(IColumn & column, DataTypePtr type, UInt64 value) assert_cast(column).insertValue(value); break; } + case TypeIndex::Enum8: [[fallthrough]]; case TypeIndex::Int8: { assert_cast(column).insertValue(value); break; } + case TypeIndex::Enum16: [[fallthrough]]; case TypeIndex::Int16: { assert_cast(column).insertValue(value); break; } + case TypeIndex::Date32: [[fallthrough]]; case TypeIndex::Int32: { assert_cast(column).insertValue(static_cast(value)); @@ -167,11 +172,30 @@ static void insertInteger(IColumn & column, DataTypePtr type, UInt64 value) assert_cast(column).insertValue(IPv4(static_cast(value))); break; } + case TypeIndex::Decimal32: + { + assert_cast &>(column).insertValue(static_cast(value)); + break; + } + case TypeIndex::Decimal64: + { + assert_cast &>(column).insertValue(value); + break; + } default: throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert MessagePack integer into column with type {}.", type->getName()); } } +template +static void insertFromBinaryRepresentation(IColumn & column, DataTypePtr type, const char * value, size_t size) +{ + if (size != sizeof(typename ColumnType::ValueType)) + throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected size of {} value: {}", type->getName(), size); + + assert_cast(column).insertData(value, size); +} + static void insertString(IColumn & column, DataTypePtr type, const char * value, size_t size, bool bin) { auto insert_func = [&](IColumn & column_, DataTypePtr type_) @@ -195,10 +219,33 @@ static void insertString(IColumn & column, DataTypePtr type, const char * value, return; } - if (isIPv6(type) && bin) + if (bin) { - assert_cast(column).insertData(value, size); - return; + switch (type->getTypeId()) + { + case TypeIndex::IPv6: + insertFromBinaryRepresentation(column, type, value, size); + return; + case TypeIndex::Int128: + insertFromBinaryRepresentation(column, type, value, size); + return; + case TypeIndex::UInt128: + insertFromBinaryRepresentation(column, type, value, size); + return; + case TypeIndex::Int256: + insertFromBinaryRepresentation(column, type, value, size); + return; + case TypeIndex::UInt256: + insertFromBinaryRepresentation(column, type, value, size); + return; + case TypeIndex::Decimal128: + insertFromBinaryRepresentation>(column, type, value, size); + return; + case TypeIndex::Decimal256: + insertFromBinaryRepresentation>(column, type, value, size); + return; + default:; + } } if (!isStringOrFixedString(type)) @@ -328,21 +375,47 @@ bool MsgPackVisitor::visit_boolean(bool value) bool MsgPackVisitor::start_array(size_t size) // NOLINT { - if (!isArray(info_stack.top().type)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert MessagePack array into column with type {}.", info_stack.top().type->getName()); + if (isArray(info_stack.top().type)) + { + auto nested_type = assert_cast(*info_stack.top().type).getNestedType(); + ColumnArray & column_array = assert_cast(info_stack.top().column); + ColumnArray::Offsets & offsets = column_array.getOffsets(); + IColumn & nested_column = column_array.getData(); + offsets.push_back(offsets.back() + size); + if (size > 0) + info_stack.push(Info{nested_column, nested_type, false, size, nullptr}); + } + else if (isTuple(info_stack.top().type)) + { + const auto & tuple_type = assert_cast(*info_stack.top().type); + const auto & nested_types = tuple_type.getElements(); + if (size != nested_types.size()) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert MessagePack array with size {} into Tuple column with {} elements", size, nested_types.size()); + + ColumnTuple & column_tuple = assert_cast(info_stack.top().column); + /// Push nested columns into stack in reverse order. + for (ssize_t i = nested_types.size() - 1; i >= 0; --i) + info_stack.push(Info{column_tuple.getColumn(i), nested_types[i], true, std::nullopt, nullptr}); + } + else + { + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert MessagePack array into column with type {}", info_stack.top().type->getName()); + } - auto nested_type = assert_cast(*info_stack.top().type).getNestedType(); - ColumnArray & column_array = assert_cast(info_stack.top().column); - ColumnArray::Offsets & offsets = column_array.getOffsets(); - IColumn & nested_column = column_array.getData(); - offsets.push_back(offsets.back() + size); - info_stack.push(Info{nested_column, nested_type, nullptr}); return true; } -bool MsgPackVisitor::end_array() // NOLINT + +bool MsgPackVisitor::end_array_item() // NOLINT { - info_stack.pop(); + if (info_stack.top().is_tuple_element) + info_stack.pop(); + else + { + --(*info_stack.top().array_size); + if (*info_stack.top().array_size == 0) + info_stack.pop(); + } return true; } @@ -360,7 +433,7 @@ bool MsgPackVisitor::start_map_key() // NOLINT { auto key_column = assert_cast(info_stack.top().column).getNestedData().getColumns()[0]; auto key_type = assert_cast(*info_stack.top().type).getKeyType(); - info_stack.push(Info{*key_column, key_type, nullptr}); + info_stack.push(Info{*key_column, key_type, false, std::nullopt, nullptr}); return true; } @@ -374,7 +447,7 @@ bool MsgPackVisitor::start_map_value() // NOLINT { auto value_column = assert_cast(info_stack.top().column).getNestedData().getColumns()[1]; auto value_type = assert_cast(*info_stack.top().type).getValueType(); - info_stack.push(Info{*value_column, value_type, nullptr}); + info_stack.push(Info{*value_column, value_type, false, std::nullopt, nullptr}); return true; } @@ -513,13 +586,26 @@ DataTypePtr MsgPackSchemaReader::getDataType(const msgpack::object & object) case msgpack::type::object_type::ARRAY: { msgpack::object_array object_array = object.via.array; - if (object_array.size) + if (!object_array.size) + return nullptr; + + DataTypes nested_types; + nested_types.reserve(object_array.size); + bool nested_types_are_equal = true; + for (size_t i = 0; i != object_array.size; ++i) { - auto nested_type = getDataType(object_array.ptr[0]); - if (nested_type) - return std::make_shared(getDataType(object_array.ptr[0])); + auto nested_type = getDataType(object_array.ptr[i]); + if (!nested_type) + return nullptr; + + nested_types.push_back(nested_type); + nested_types_are_equal &= nested_type->equals(*nested_types[0]); } - return nullptr; + + if (nested_types_are_equal) + return std::make_shared(nested_types[0]); + + return std::make_shared(std::move(nested_types)); } case msgpack::type::object_type::MAP: { diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.h b/src/Processors/Formats/Impl/MsgPackRowInputFormat.h index 5eaa3719d0c..0b485d3b97c 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.h +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.h @@ -25,6 +25,8 @@ public: { IColumn & column; DataTypePtr type; + bool is_tuple_element; + std::optional array_size; UInt8 * read; }; @@ -37,7 +39,7 @@ public: bool visit_bin(const char * value, size_t size); bool visit_boolean(bool value); bool start_array(size_t size); - bool end_array(); + bool end_array_item(); bool visit_nil(); bool start_map(uint32_t size); bool start_map_key(); diff --git a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp index 07951d42bc6..9c601492217 100644 --- a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp @@ -9,12 +9,14 @@ #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -66,16 +68,19 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr packer.pack_uint64(assert_cast(column).getElement(row_num)); return; } + case TypeIndex::Enum8: [[fallthrough]]; case TypeIndex::Int8: { packer.pack_int8(assert_cast(column).getElement(row_num)); return; } + case TypeIndex::Enum16: [[fallthrough]]; case TypeIndex::Int16: { packer.pack_int16(assert_cast(column).getElement(row_num)); return; } + case TypeIndex::Date32: [[fallthrough]]; case TypeIndex::Int32: { packer.pack_int32(assert_cast(column).getElement(row_num)); @@ -86,6 +91,30 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr packer.pack_int64(assert_cast(column).getElement(row_num)); return; } + case TypeIndex::Int128: + { + packer.pack_bin(static_cast(sizeof(Int128))); + packer.pack_bin_body(column.getDataAt(row_num).data, sizeof(Int128)); + return; + } + case TypeIndex::UInt128: + { + packer.pack_bin(static_cast(sizeof(UInt128))); + packer.pack_bin_body(column.getDataAt(row_num).data, sizeof(UInt128)); + return; + } + case TypeIndex::Int256: + { + packer.pack_bin(static_cast(sizeof(Int256))); + packer.pack_bin_body(column.getDataAt(row_num).data, sizeof(Int256)); + return; + } + case TypeIndex::UInt256: + { + packer.pack_bin(static_cast(sizeof(UInt256))); + packer.pack_bin_body(column.getDataAt(row_num).data, sizeof(UInt256)); + return; + } case TypeIndex::Float32: { packer.pack_float(assert_cast(column).getElement(row_num)); @@ -101,6 +130,28 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr packer.pack_uint64(assert_cast(column).getElement(row_num)); return; } + case TypeIndex::Decimal32: + { + packer.pack_int32(assert_cast &>(column).getElement(row_num)); + return; + } + case TypeIndex::Decimal64: + { + packer.pack_int64(assert_cast &>(column).getElement(row_num)); + return; + } + case TypeIndex::Decimal128: + { + packer.pack_bin(static_cast(sizeof(Decimal128))); + packer.pack_bin_body(column.getDataAt(row_num).data, sizeof(Decimal128)); + return; + } + case TypeIndex::Decimal256: + { + packer.pack_bin(static_cast(sizeof(Decimal256))); + packer.pack_bin_body(column.getDataAt(row_num).data, sizeof(Decimal256)); + return; + } case TypeIndex::String: { const std::string_view & string = assert_cast(column).getDataAt(row_num).toView(); @@ -136,7 +187,18 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr serializeField(nested_column, nested_type, offset + i); } return; - } + } + case TypeIndex::Tuple: + { + const auto & tuple_type = assert_cast(*data_type); + const auto & nested_types = tuple_type.getElements(); + const ColumnTuple & column_tuple = assert_cast(column); + const auto & nested_columns = column_tuple.getColumns(); + packer.pack_array(static_cast(nested_types.size())); + for (size_t i = 0; i < nested_types.size(); ++i) + serializeField(*nested_columns[i], nested_types[i], row_num); + return; + } case TypeIndex::Nullable: { auto nested_type = removeNullable(data_type); diff --git a/tests/queries/0_stateless/02594_msgpack_more_types.reference b/tests/queries/0_stateless/02594_msgpack_more_types.reference new file mode 100644 index 00000000000..8ccf11ccdb4 --- /dev/null +++ b/tests/queries/0_stateless/02594_msgpack_more_types.reference @@ -0,0 +1,2 @@ +a b 2020-01-01 42 42 42 42 42.42 42.42 42.42 42.42 +(42,'Hello') ({42:[1,2,3]},[([(1,2),(1,2)],'Hello',[1,2,3]),([],'World',[1])]) diff --git a/tests/queries/0_stateless/02594_msgpack_more_types.sh b/tests/queries/0_stateless/02594_msgpack_more_types.sh new file mode 100755 index 00000000000..bddfb5ad829 --- /dev/null +++ b/tests/queries/0_stateless/02594_msgpack_more_types.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select 'a'::Enum8('a' = 1) as c1, 'b'::Enum16('b' = 1) as c2, '2020-01-01'::Date32 as c3, 42::Int128 as c4, 42::UInt128 as c5, 42::Int256 as c6, 42::UInt256 as c7, 42.42::Decimal32(2) as c8, 42.42::Decimal64(2) as c9, 42.42::Decimal128(2) as c10, 42.42::Decimal256(2) as c11 format MsgPack" | $CLICKHOUSE_LOCAL --input-format MsgPack --structure="c1 Enum8('a' = 1), c2 Enum16('b' = 1), c3 Date32, c4 Int128, c5 UInt128, c6 Int256, c7 UInt256, c8 Decimal32(2), c9 Decimal64(2), c10 Decimal128(2), c11 Decimal256(2)" -q "select * from table" + +$CLICKHOUSE_LOCAL -q "select tuple(42, 'Hello') as c1, tuple(map(42, [1, 2, 3]), [tuple([tuple(1, 2), tuple(1, 2)], 'Hello', [1, 2, 3]), tuple([], 'World', [1])]) as c2 format MsgPack" | $CLICKHOUSE_LOCAL --input-format MsgPack --structure="c1 Tuple(UInt32, String), c2 Tuple(Map(UInt32, Array(UInt32)), Array(Tuple(Array(Tuple(UInt32, UInt32)), String, Array(UInt32))))" -q "select * from table" + From 81af0b6deb0b4e477c61dab704368d5ac619258d Mon Sep 17 00:00:00 2001 From: serxa Date: Wed, 29 Mar 2023 00:29:21 +0000 Subject: [PATCH 07/18] avoid counters updates if not initialized --- src/Common/ThreadStatus.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index 16ce73cda20..1b783aa9ec4 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -219,6 +219,9 @@ void ThreadStatus::updatePerformanceCounters() void ThreadStatus::updatePerformanceCountersIfNeeded() { + if (last_rusage->thread_id == 0) + return; // Performance counters are not initialized, so there is no need to update them + constexpr UInt64 performance_counters_update_period_microseconds = 10 * 1000; // 10 milliseconds UInt64 total_elapsed_microseconds = stopwatch.elapsedMicroseconds(); if (last_performance_counters_update_time + performance_counters_update_period_microseconds < total_elapsed_microseconds) From 4ea9f96b1bde2e62d834b2e8a436433414fbc53a Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 29 Mar 2023 12:46:49 +0000 Subject: [PATCH 08/18] Lower number of processes in KeeperMap test --- tests/integration/test_keeper_map/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_keeper_map/test.py b/tests/integration/test_keeper_map/test.py index 3809f046d55..4b940fbf1d1 100644 --- a/tests/integration/test_keeper_map/test.py +++ b/tests/integration/test_keeper_map/test.py @@ -123,7 +123,7 @@ def test_create_drop_keeper_map_concurrent(started_cluster): manager = multiprocessing.Manager() stop_event = manager.Event() results = [] - for i in range(multiprocessing.cpu_count()): + for i in range(8): sleep(0.2) results.append( pool.apply_async( From 5ca488d70e17e46fc1904abb52a2ad002e2b9bec Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 30 Mar 2023 09:14:52 +0300 Subject: [PATCH 09/18] Update src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp Co-authored-by: Antonio Andelic --- src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp index 7ce58b9991d..eeca14176cc 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp @@ -412,8 +412,10 @@ bool MsgPackVisitor::end_array_item() // NOLINT info_stack.pop(); else { - --(*info_stack.top().array_size); - if (*info_stack.top().array_size == 0) + assert(info_stack.top().array_size.has_value()); + auto & current_array_size = *info_stack.top().array_size; + --current_array_size; + if (current_array_size == 0) info_stack.pop(); } return true; From 202dc90045497212f3e8b95381144e468b041cee Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 30 Mar 2023 10:01:52 +0200 Subject: [PATCH 10/18] Randomize JIT settings in tests --- tests/clickhouse-test | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index a355c2f8e73..4d16fead44f 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -558,6 +558,9 @@ class SettingsRandomizer: "enable_memory_bound_merging_of_aggregation_results": lambda: random.randint( 0, 1 ), + "min_count_to_compile_expression": lambda: random.randint(0, 3), + "min_count_to_compile_aggregate_expression": lambda: random.randint(0, 3), + "min_count_to_compile_sort_description": lambda: random.randint(0, 3), } @staticmethod From 990ef56443b270dc11b50a0ab20e5661b9d3aa31 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 30 Mar 2023 10:55:29 +0200 Subject: [PATCH 11/18] Randomize JIT settings in tests --- tests/clickhouse-test | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 4d16fead44f..fa88bc19efd 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -558,9 +558,9 @@ class SettingsRandomizer: "enable_memory_bound_merging_of_aggregation_results": lambda: random.randint( 0, 1 ), - "min_count_to_compile_expression": lambda: random.randint(0, 3), - "min_count_to_compile_aggregate_expression": lambda: random.randint(0, 3), - "min_count_to_compile_sort_description": lambda: random.randint(0, 3), + "min_count_to_compile_expression": lambda: random.choice([0, 3]), + "min_count_to_compile_aggregate_expression": lambda: random.choice([0, 3]), + "min_count_to_compile_sort_description": lambda: random.choice([0, 3]), } @staticmethod From baabc49f339373f77395d0c8078208cf5f2616ea Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Thu, 30 Mar 2023 11:01:51 +0200 Subject: [PATCH 12/18] Update ParserKQLSort.cpp Style --- src/Parsers/Kusto/ParserKQLSort.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp index f7540d729fd..ef4b84b17c7 100644 --- a/src/Parsers/Kusto/ParserKQLSort.cpp +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -27,7 +27,7 @@ bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) while (!new_pos->isEnd() && new_pos->type != TokenType::PipeMark && new_pos->type != TokenType::Semicolon) { String tmp(new_pos->begin, new_pos->end); - if (tmp == "desc" or tmp == "asc") + if (tmp == "desc" || tmp == "asc") has_dir = true; if (new_pos->type == TokenType::Comma) From b22d3e913620a7bd8dc805d520a94f7824910610 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 30 Mar 2023 09:55:01 +0000 Subject: [PATCH 13/18] Remove wrong assert --- .../Passes/LogicalExpressionOptimizerPass.cpp | 1 - ...702_logical_optimizer_with_nulls.reference | 19 +++++++++++++++++++ .../02702_logical_optimizer_with_nulls.sql | 15 +++++++++++++++ 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp index 97669f3924f..13f8025f5ea 100644 --- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp +++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp @@ -219,7 +219,6 @@ private: /// we can replace OR with the operand if (or_operands[0]->getResultType()->equals(*function_node.getResultType())) { - assert(!function_node.getResultType()->isNullable()); node = std::move(or_operands[0]); return; } diff --git a/tests/queries/0_stateless/02702_logical_optimizer_with_nulls.reference b/tests/queries/0_stateless/02702_logical_optimizer_with_nulls.reference index 263329e47be..eb79bbc842a 100644 --- a/tests/queries/0_stateless/02702_logical_optimizer_with_nulls.reference +++ b/tests/queries/0_stateless/02702_logical_optimizer_with_nulls.reference @@ -56,3 +56,22 @@ QUERY id: 0 LIST id: 12, nodes: 2 COLUMN id: 9, column_name: a, result_type: Int32, source_id: 3 CONSTANT id: 13, constant_value: Tuple_(UInt64_1, UInt64_3, UInt64_2), constant_value_type: Tuple(UInt8, UInt8, UInt8) +1 test +2 test2 +3 another +QUERY id: 0 + PROJECTION COLUMNS + a Nullable(Int32) + b LowCardinality(String) + PROJECTION + LIST id: 1, nodes: 2 + COLUMN id: 2, column_name: a, result_type: Nullable(Int32), source_id: 3 + COLUMN id: 4, column_name: b, result_type: LowCardinality(String), source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.02702_logical_optimizer_with_null_column + WHERE + FUNCTION id: 5, function_name: in, function_type: ordinary, result_type: Nullable(UInt8) + ARGUMENTS + LIST id: 6, nodes: 2 + COLUMN id: 7, column_name: a, result_type: Nullable(Int32), source_id: 3 + CONSTANT id: 8, constant_value: Tuple_(UInt64_1, UInt64_3, UInt64_2), constant_value_type: Tuple(UInt8, UInt8, UInt8) diff --git a/tests/queries/0_stateless/02702_logical_optimizer_with_nulls.sql b/tests/queries/0_stateless/02702_logical_optimizer_with_nulls.sql index 9a49e31fe81..07d0b170a02 100644 --- a/tests/queries/0_stateless/02702_logical_optimizer_with_nulls.sql +++ b/tests/queries/0_stateless/02702_logical_optimizer_with_nulls.sql @@ -15,3 +15,18 @@ EXPLAIN QUERY TREE SELECT * FROM 02702_logical_optimizer WHERE a = 1 OR 3 = a OR SELECT * FROM 02702_logical_optimizer WHERE a = 1 OR 3 = a OR 2 = a OR a = NULL; EXPLAIN QUERY TREE SELECT * FROM 02702_logical_optimizer WHERE a = 1 OR 3 = a OR 2 = a OR a = NULL; + +DROP TABLE 02702_logical_optimizer; + +DROP TABLE IF EXISTS 02702_logical_optimizer_with_null_column; + +CREATE TABLE 02702_logical_optimizer_with_null_column +(a Nullable(Int32), b LowCardinality(String)) +ENGINE=Memory; + +INSERT INTO 02702_logical_optimizer_with_null_column VALUES (1, 'test'), (2, 'test2'), (3, 'another'); + +SELECT * FROM 02702_logical_optimizer_with_null_column WHERE a = 1 OR 3 = a OR 2 = a; +EXPLAIN QUERY TREE SELECT * FROM 02702_logical_optimizer_with_null_column WHERE a = 1 OR 3 = a OR 2 = a; + +DROP TABLE 02702_logical_optimizer_with_null_column; From 2df32324af0a3dba04fe688b5158ec6bff59cf2e Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 30 Mar 2023 12:03:52 +0000 Subject: [PATCH 14/18] MySQL compatibility: Make str_to_date alias case-insensitive MySQL doesn't care about the case --- src/Functions/parseDateTime.cpp | 2 +- tests/queries/0_stateless/02668_parse_datetime.reference | 2 ++ tests/queries/0_stateless/02668_parse_datetime.sql | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp index 6a7a6010d4b..553e993a806 100644 --- a/src/Functions/parseDateTime.cpp +++ b/src/Functions/parseDateTime.cpp @@ -1856,7 +1856,7 @@ REGISTER_FUNCTION(ParseDateTime) factory.registerAlias("TO_UNIXTIME", FunctionParseDateTime::name); factory.registerFunction(); factory.registerFunction(); - factory.registerAlias("str_to_date", FunctionParseDateTimeOrNull::name); + factory.registerAlias("str_to_date", FunctionParseDateTimeOrNull::name, FunctionFactory::CaseInsensitive); factory.registerFunction(); factory.registerFunction(); diff --git a/tests/queries/0_stateless/02668_parse_datetime.reference b/tests/queries/0_stateless/02668_parse_datetime.reference index f39655c6a41..afa3d0eb962 100644 --- a/tests/queries/0_stateless/02668_parse_datetime.reference +++ b/tests/queries/0_stateless/02668_parse_datetime.reference @@ -208,5 +208,7 @@ select parseDateTimeOrNull('10:04:11 invalid 03-07-2019', '%s:%i:%H %d-%m-%Y', ' 1 select str_to_date('10:04:11 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC'); 1 +select sTr_To_DaTe('10:04:11 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC'); +1 select str_to_date('10:04:11 invalid 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') IS NULL; 1 diff --git a/tests/queries/0_stateless/02668_parse_datetime.sql b/tests/queries/0_stateless/02668_parse_datetime.sql index 757c4fe2efe..51c2fda8428 100644 --- a/tests/queries/0_stateless/02668_parse_datetime.sql +++ b/tests/queries/0_stateless/02668_parse_datetime.sql @@ -138,6 +138,7 @@ select parseDateTimeOrZero('10:04:11 invalid 03-07-2019', '%s:%i:%H %d-%m-%Y', ' select parseDateTimeOrNull('10:04:11 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC'); select parseDateTimeOrNull('10:04:11 invalid 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') IS NULL; select str_to_date('10:04:11 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC'); +select sTr_To_DaTe('10:04:11 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC'); select str_to_date('10:04:11 invalid 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') IS NULL; -- { echoOff } From ad246d669e85755bb6bc88169479bd6e50379afb Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 30 Mar 2023 12:08:52 +0000 Subject: [PATCH 15/18] Disable AST optimizations for projection analysis. --- .../Optimizations/optimizeUseAggregateProjection.cpp | 6 +++++- .../25402_projection_and_ast_optimizations_bug.reference | 1 + .../25402_projection_and_ast_optimizations_bug.sql | 6 ++++++ 3 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/25402_projection_and_ast_optimizations_bug.reference create mode 100644 tests/queries/0_stateless/25402_projection_and_ast_optimizations_bug.sql diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index 77b5547207c..21cb112cb14 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -61,11 +61,15 @@ static AggregateProjectionInfo getAggregatingProjectionInfo( /// This is a bad approach. /// We'd better have a separate interpreter for projections. /// Now it's not obvious we didn't miss anything here. + /// + /// Setting ignoreASTOptimizations is used because some of them are invalid for projections. + /// Example: 'SELECT min(c0), max(c0), count() GROUP BY -c0' for minmax_count projection can be rewritten to + /// 'SELECT min(c0), max(c0), count() GROUP BY c0' which is incorrect cause we store a column '-c0' in projection. InterpreterSelectQuery interpreter( projection.query_ast, context, Pipe(std::make_shared(metadata_snapshot->getSampleBlock())), - SelectQueryOptions{QueryProcessingStage::WithMergeableState}); + SelectQueryOptions{QueryProcessingStage::WithMergeableState}.ignoreASTOptimizations()); const auto & analysis_result = interpreter.getAnalysisResult(); const auto & query_analyzer = interpreter.getQueryAnalyzer(); diff --git a/tests/queries/0_stateless/25402_projection_and_ast_optimizations_bug.reference b/tests/queries/0_stateless/25402_projection_and_ast_optimizations_bug.reference new file mode 100644 index 00000000000..9049324c392 --- /dev/null +++ b/tests/queries/0_stateless/25402_projection_and_ast_optimizations_bug.reference @@ -0,0 +1 @@ +-2.5574077246549023 0.6663667453928805 1 diff --git a/tests/queries/0_stateless/25402_projection_and_ast_optimizations_bug.sql b/tests/queries/0_stateless/25402_projection_and_ast_optimizations_bug.sql new file mode 100644 index 00000000000..5589fbeeb9e --- /dev/null +++ b/tests/queries/0_stateless/25402_projection_and_ast_optimizations_bug.sql @@ -0,0 +1,6 @@ +drop table if exists t1; +CREATE TABLE t1 (c0 Int32) ENGINE = MergeTree() ORDER BY c0 PARTITION BY (- (c0)); +insert into t1 values(1); +SELECT (- ((((tan (t1.c0)))+(t1.c0)))), (cos ((sin (pow(t1.c0,t1.c0))))), ((gcd((- (t1.c0)),((t1.c0)+(t1.c0))))*((- ((- (t1.c0)))))) FROM t1 GROUP BY (sqrt ((- (t1.c0)))), t1.c0, pow((erf ((- (t1.c0)))),t1.c0); +drop table t1; + From d6c71533fae14635efdbda83ebe20d62226a9497 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 30 Mar 2023 08:20:20 +0000 Subject: [PATCH 16/18] Move keeper map tests to stateless --- tests/integration/test_keeper_map/test.py | 110 ------------------ ...eeper_map_concurrent_create_drop.reference | 1 + ...02703_keeper_map_concurrent_create_drop.sh | 53 +++++++++ .../02704_keeper_map_zk_nodes.reference | 0 .../0_stateless/02704_keeper_map_zk_nodes.sh | 77 ++++++++++++ 5 files changed, 131 insertions(+), 110 deletions(-) create mode 100644 tests/queries/0_stateless/02703_keeper_map_concurrent_create_drop.reference create mode 100755 tests/queries/0_stateless/02703_keeper_map_concurrent_create_drop.sh create mode 100644 tests/queries/0_stateless/02704_keeper_map_zk_nodes.reference create mode 100755 tests/queries/0_stateless/02704_keeper_map_zk_nodes.sh diff --git a/tests/integration/test_keeper_map/test.py b/tests/integration/test_keeper_map/test.py index 4b940fbf1d1..c6ec7103056 100644 --- a/tests/integration/test_keeper_map/test.py +++ b/tests/integration/test_keeper_map/test.py @@ -1,14 +1,6 @@ -import multiprocessing import pytest -from time import sleep -import random -from itertools import count -from sys import stdout - -from multiprocessing.dummy import Pool from helpers.cluster import ClickHouseCluster -from helpers.test_tools import assert_eq_with_retry, assert_logs_contain from helpers.network import PartitionManager test_recover_staled_replica_run = 1 @@ -46,108 +38,6 @@ def remove_children(client, path): client.delete(child_path) -def test_create_keeper_map(started_cluster): - node.query( - "CREATE TABLE test_keeper_map (key UInt64, value UInt64) ENGINE = KeeperMap('/test1') PRIMARY KEY(key);" - ) - zk_client = get_genuine_zk() - - def assert_children_size(path, expected_size): - children_size = 0 - # 4 secs should be more than enough for replica to sync - for _ in range(10): - children_size = len(zk_client.get_children(path)) - if children_size == expected_size: - return - sleep(0.4) - assert ( - False - ), f"Invalid number of children for '{path}': actual {children_size}, expected {expected_size}" - - def assert_root_children_size(expected_size): - assert_children_size("/test_keeper_map/test1", expected_size) - - def assert_data_children_size(expected_size): - assert_children_size("/test_keeper_map/test1/data", expected_size) - - assert_root_children_size(2) - assert_data_children_size(0) - - node.query("INSERT INTO test_keeper_map VALUES (1, 11)") - assert_data_children_size(1) - - node.query( - "CREATE TABLE test_keeper_map_another (key UInt64, value UInt64) ENGINE = KeeperMap('/test1') PRIMARY KEY(key);" - ) - assert_root_children_size(2) - assert_data_children_size(1) - - node.query("INSERT INTO test_keeper_map_another VALUES (1, 11)") - assert_root_children_size(2) - assert_data_children_size(1) - - node.query("INSERT INTO test_keeper_map_another VALUES (2, 22)") - assert_root_children_size(2) - assert_data_children_size(2) - - node.query("DROP TABLE test_keeper_map SYNC") - assert_root_children_size(2) - assert_data_children_size(2) - - node.query("DROP TABLE test_keeper_map_another SYNC") - assert_root_children_size(0) - - zk_client.stop() - - -def create_drop_loop(index, stop_event): - table_name = f"test_keeper_map_{index}" - - for i in count(0, 1): - if stop_event.is_set(): - return - - node.query_with_retry( - f"CREATE TABLE IF NOT EXISTS {table_name} (key UInt64, value UInt64) ENGINE = KeeperMap('/test') PRIMARY KEY(key);" - ) - node.query_with_retry(f"INSERT INTO {table_name} VALUES ({index}, {i})") - result = node.query_with_retry( - f"SELECT value FROM {table_name} WHERE key = {index}" - ) - assert result.strip() == str(i) - node.query_with_retry(f"DROP TABLE IF EXISTS {table_name} SYNC") - - -def test_create_drop_keeper_map_concurrent(started_cluster): - pool = Pool() - manager = multiprocessing.Manager() - stop_event = manager.Event() - results = [] - for i in range(8): - sleep(0.2) - results.append( - pool.apply_async( - create_drop_loop, - args=( - i, - stop_event, - ), - ) - ) - - sleep(60) - stop_event.set() - - for result in results: - result.get() - - pool.close() - - client = get_genuine_zk() - assert len(client.get_children("/test_keeper_map/test")) == 0 - client.stop() - - def test_keeper_map_without_zk(started_cluster): def assert_keeper_exception_after_partition(query): with PartitionManager() as pm: diff --git a/tests/queries/0_stateless/02703_keeper_map_concurrent_create_drop.reference b/tests/queries/0_stateless/02703_keeper_map_concurrent_create_drop.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/02703_keeper_map_concurrent_create_drop.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/02703_keeper_map_concurrent_create_drop.sh b/tests/queries/0_stateless/02703_keeper_map_concurrent_create_drop.sh new file mode 100755 index 00000000000..3964427895c --- /dev/null +++ b/tests/queries/0_stateless/02703_keeper_map_concurrent_create_drop.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# Tags: no-ordinary-database, zookeeper, no-fasttest, no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +function create_drop_loop() +{ + table_name="02703_keeper_map_concurrent_$1" + $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS $table_name" + for _ in `seq $1` + do + sleep 0.3 + done + + i=0 + while true; + do + $CLICKHOUSE_CLIENT --query="CREATE TABLE IF NOT EXISTS $table_name (key UInt64, value UInt64) ENGINE = KeeperMap('/02703_keeper_map/$CLICKHOUSE_DATABASE') PRIMARY KEY(key)" + $CLICKHOUSE_CLIENT --query="INSERT INTO $table_name VALUES ($1, $i)" + result=$($CLICKHOUSE_CLIENT --query="SELECT value FROM $table_name WHERE key = $1") + + if [ $result != $i ] + then + echo "Got invalid result $result" + exit 1 + fi + + $CLICKHOUSE_CLIENT --query="DROP TABLE $table_name" + + ((++i)) + done +} + +export -f create_drop_loop; + +THREADS=10 +TIMEOUT=30 + +for i in `seq $THREADS` +do + timeout $TIMEOUT bash -c "create_drop_loop $i" 2> /dev/null & +done + +wait + +for i in `seq $THREADS` +do + $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS 02703_keeper_map_concurrent_$i" +done + +$CLICKHOUSE_CLIENT --query="SELECT count() FROM system.zookeeper WHERE path = '/test_keeper_map/02703_keeper_map/$CLICKHOUSE_DATABASE'" diff --git a/tests/queries/0_stateless/02704_keeper_map_zk_nodes.reference b/tests/queries/0_stateless/02704_keeper_map_zk_nodes.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02704_keeper_map_zk_nodes.sh b/tests/queries/0_stateless/02704_keeper_map_zk_nodes.sh new file mode 100755 index 00000000000..9689d4f5a50 --- /dev/null +++ b/tests/queries/0_stateless/02704_keeper_map_zk_nodes.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash +# Tags: no-ordinary-database, zookeeper, no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +table_name="02704_keeper_map_zk_nodes" +table_name_another="02704_keeper_map_zk_nodes_new_table" + +$CLICKHOUSE_CLIENT --multiquery --query=" +DROP TABLE IF EXISTS $table_name; +DROP TABLE IF EXISTS $table_name_another; +CREATE TABLE $table_name (key UInt64, value UInt64) +ENGINE = KeeperMap('/$table_name/$CLICKHOUSE_DATABASE') +PRIMARY KEY(key)" + +function assert_children_size() +{ + for _ in `seq 10` + do + children_size=$($CLICKHOUSE_CLIENT --query="SELECT count() FROM system.zookeeper WHERE path = '$1'") + if [ $children_size == $2 ] + then + return + fi + + sleep 0.4 + done + + echo "Invalid number of children for path '$1': actual $children_size, expected $2" + exit 1 +} + +function assert_root_children_size() +{ + assert_children_size "/test_keeper_map/02704_keeper_map_zk_nodes/$CLICKHOUSE_DATABASE" $1 +} + +function assert_data_children_size() +{ + assert_children_size "/test_keeper_map/02704_keeper_map_zk_nodes/$CLICKHOUSE_DATABASE/data" $1 +} + +assert_root_children_size 2 +assert_data_children_size 0 + +$CLICKHOUSE_CLIENT --query="INSERT INTO $table_name VALUES (1, 11)" + +assert_data_children_size 1 + +$CLICKHOUSE_CLIENT --query=" +CREATE TABLE $table_name_another (key UInt64, value UInt64) +ENGINE = KeeperMap('/$table_name/$CLICKHOUSE_DATABASE') +PRIMARY KEY(key)" + +assert_root_children_size 2 +assert_data_children_size 1 + +$CLICKHOUSE_CLIENT --query="INSERT INTO $table_name_another VALUES (1, 11)" + +assert_root_children_size 2 +assert_data_children_size 1 + +$CLICKHOUSE_CLIENT --query="INSERT INTO $table_name_another VALUES (2, 22)" + +assert_root_children_size 2 +assert_data_children_size 2 + +$CLICKHOUSE_CLIENT --query="DROP TABLE $table_name" + +assert_root_children_size 2 +assert_data_children_size 2 + +$CLICKHOUSE_CLIENT --query="DROP TABLE $table_name_another" + +assert_root_children_size 0 From 1c58a911eb77eee5abcc1cdd78d71de9690d3323 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 30 Mar 2023 18:07:40 +0200 Subject: [PATCH 17/18] Push clickhouse-keeper as both w/ and w/o suffix `-alpine` --- .github/workflows/backport_branches.yml | 2 +- .github/workflows/master.yml | 2 +- .github/workflows/pull_request.yml | 2 +- .github/workflows/release.yml | 2 +- .github/workflows/release_branches.yml | 2 +- docker/keeper/Dockerfile | 3 +++ docker/keeper/Dockerfile.ubuntu | 1 + 7 files changed, 9 insertions(+), 5 deletions(-) create mode 120000 docker/keeper/Dockerfile.ubuntu diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index 867cca9d037..0d81a7b303c 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -470,7 +470,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_server.py --release-type head --no-push \ --image-repo clickhouse/clickhouse-server --image-path docker/server - python3 docker_server.py --release-type head --no-push --no-ubuntu \ + python3 docker_server.py --release-type head --no-push \ --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper - name: Cleanup if: always() diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 7c5e477ab60..b1ea1641a02 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -862,7 +862,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_server.py --release-type head \ --image-repo clickhouse/clickhouse-server --image-path docker/server - python3 docker_server.py --release-type head --no-ubuntu \ + python3 docker_server.py --release-type head \ --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper - name: Cleanup if: always() diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 2f2c263df37..ab0cbbb7ec1 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -918,7 +918,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_server.py --release-type head --no-push \ --image-repo clickhouse/clickhouse-server --image-path docker/server - python3 docker_server.py --release-type head --no-push --no-ubuntu \ + python3 docker_server.py --release-type head --no-push \ --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper - name: Cleanup if: always() diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 73246af6dfc..0742ebfd449 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -55,7 +55,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_server.py --release-type auto --version "$GITHUB_TAG" \ --image-repo clickhouse/clickhouse-server --image-path docker/server - python3 docker_server.py --release-type auto --version "$GITHUB_TAG" --no-ubuntu \ + python3 docker_server.py --release-type auto --version "$GITHUB_TAG" \ --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper - name: Cleanup if: always() diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index e56a1fb58fc..1282dbef50b 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -527,7 +527,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_server.py --release-type head --no-push \ --image-repo clickhouse/clickhouse-server --image-path docker/server - python3 docker_server.py --release-type head --no-push --no-ubuntu \ + python3 docker_server.py --release-type head --no-push \ --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper - name: Cleanup if: always() diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index 34c1406b687..6496a2b2a12 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -1,3 +1,6 @@ +# The Dockerfile.ubuntu exists for the tests/ci/docker_server.py script +# If the image is built from Dockerfile.alpine, then the `-alpine` suffix is added automatically, +# so the only purpose of Dockerfile.ubuntu is to push `latest`, `head` and so on w/o suffixes FROM ubuntu:20.04 AS glibc-donor ARG TARGETARCH diff --git a/docker/keeper/Dockerfile.ubuntu b/docker/keeper/Dockerfile.ubuntu new file mode 120000 index 00000000000..1d1fe94df49 --- /dev/null +++ b/docker/keeper/Dockerfile.ubuntu @@ -0,0 +1 @@ +Dockerfile \ No newline at end of file From 37213aa6b49b773a81e4810f8d65d7a9210f4e1f Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Thu, 30 Mar 2023 14:45:01 -0400 Subject: [PATCH 18/18] edits --- CHANGELOG.md | 72 ++++++++++++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5decadf8f8c..47320208f02 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,60 +18,60 @@ * Do not allow const and non-deterministic secondary indices [#46839](https://github.com/ClickHouse/ClickHouse/pull/46839) ([Anton Popov](https://github.com/CurtizJ)). #### New Feature -* Add new mode for splitting the work on replicas using settings `parallel_replicas_custom_key` and `parallel_replicas_custom_key_filter_type`. If the cluster consists of a single shard with multiple replicas, up to `max_parallel_replicas` will be randomly picked and turned into shards. For each shard, a corresponding filter is added to the query on the initiator before being sent to the shard. If the cluster consists of multiple shards, it will behave the same as `sample_key` but with the possibility to define an arbitrary key. [#45108](https://github.com/ClickHouse/ClickHouse/pull/45108) ([Antonio Andelic](https://github.com/antonio2368)). +* Add a new mode for splitting the work on replicas using settings `parallel_replicas_custom_key` and `parallel_replicas_custom_key_filter_type`. If the cluster consists of a single shard with multiple replicas, up to `max_parallel_replicas` will be randomly picked and turned into shards. For each shard, a corresponding filter is added to the query on the initiator before being sent to the shard. If the cluster consists of multiple shards, it will behave the same as `sample_key` but with the possibility to define an arbitrary key. [#45108](https://github.com/ClickHouse/ClickHouse/pull/45108) ([Antonio Andelic](https://github.com/antonio2368)). * An option to display partial result on cancel: Added query setting `partial_result_on_first_cancel` allowing the canceled query (e.g. due to Ctrl-C) to return a partial result. [#45689](https://github.com/ClickHouse/ClickHouse/pull/45689) ([Alexey Perevyshin](https://github.com/alexX512)). * Added support of arbitrary tables engines for temporary tables (except for Replicated and KeeperMap engines). Close [#31497](https://github.com/ClickHouse/ClickHouse/issues/31497). [#46071](https://github.com/ClickHouse/ClickHouse/pull/46071) ([Roman Vasin](https://github.com/rvasin)). -* Add support for replication of user-defined SQL functions using a centralized storage in Keeper. [#46085](https://github.com/ClickHouse/ClickHouse/pull/46085) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Add support for replication of user-defined SQL functions using centralized storage in Keeper. [#46085](https://github.com/ClickHouse/ClickHouse/pull/46085) ([Aleksei Filatov](https://github.com/aalexfvk)). * Implement `system.server_settings` (similar to `system.settings`), which will contain server configurations. [#46550](https://github.com/ClickHouse/ClickHouse/pull/46550) ([pufit](https://github.com/pufit)). * Support for `UNDROP TABLE` query. Closes [#46811](https://github.com/ClickHouse/ClickHouse/issues/46811). [#47241](https://github.com/ClickHouse/ClickHouse/pull/47241) ([chen](https://github.com/xiedeyantu)). -* Allow separate grants for named collections (e.g. to be able to give `SHOW/CREATE/ALTER/DROP named collection` access only to certain collections, instead of all at once). Closes [#40894](https://github.com/ClickHouse/ClickHouse/issues/40894). Add new access type `NAMED_COLLECTION_CONTROL` which is not given to default user unless explicitly added to user config (is required to be able to do `GRANT ALL`), also `show_named_collections` is no longer obligatory to be manually specified for default user to be able to have full access rights as was in 23.2. [#46241](https://github.com/ClickHouse/ClickHouse/pull/46241) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow separate grants for named collections (e.g. to be able to give `SHOW/CREATE/ALTER/DROP named collection` access only to certain collections, instead of all at once). Closes [#40894](https://github.com/ClickHouse/ClickHouse/issues/40894). Add new access type `NAMED_COLLECTION_CONTROL` which is not given to user default unless explicitly added to the user config (is required to be able to do `GRANT ALL`), also `show_named_collections` is no longer obligatory to be manually specified for user default to be able to have full access rights as was in 23.2. [#46241](https://github.com/ClickHouse/ClickHouse/pull/46241) ([Kseniia Sumarokova](https://github.com/kssenii)). * Allow nested custom disks. Previously custom disks supported only flat disk structure. [#47106](https://github.com/ClickHouse/ClickHouse/pull/47106) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Intruduce a function `widthBucket` (with a `WIDTH_BUCKET` alias for compatibility). [#42974](https://github.com/ClickHouse/ClickHouse/issues/42974). [#46790](https://github.com/ClickHouse/ClickHouse/pull/46790) ([avoiderboi](https://github.com/avoiderboi)). -* Add new function `parseDateTime`/`parseDateTimeInJodaSyntax` according to specified format string. parseDateTime parses string to datetime in MySQL syntax, parseDateTimeInJodaSyntax parses in Joda syntax. [#46815](https://github.com/ClickHouse/ClickHouse/pull/46815) ([李扬](https://github.com/taiyang-li)). -* Use `dummy UInt8` for default structure of table function `null`. Closes [#46930](https://github.com/ClickHouse/ClickHouse/issues/46930). [#47006](https://github.com/ClickHouse/ClickHouse/pull/47006) ([flynn](https://github.com/ucasfl)). +* Introduce a function `widthBucket` (with a `WIDTH_BUCKET` alias for compatibility). [#42974](https://github.com/ClickHouse/ClickHouse/issues/42974). [#46790](https://github.com/ClickHouse/ClickHouse/pull/46790) ([avoiderboi](https://github.com/avoiderboi)). +* Add new function `parseDateTime`/`parseDateTimeInJodaSyntax` according to the specified format string. parseDateTime parses String to DateTime in MySQL syntax, parseDateTimeInJodaSyntax parses in Joda syntax. [#46815](https://github.com/ClickHouse/ClickHouse/pull/46815) ([李扬](https://github.com/taiyang-li)). +* Use `dummy UInt8` for the default structure of table function `null`. Closes [#46930](https://github.com/ClickHouse/ClickHouse/issues/46930). [#47006](https://github.com/ClickHouse/ClickHouse/pull/47006) ([flynn](https://github.com/ucasfl)). * Support for date format with a comma, like `Dec 15, 2021` in the `parseDateTimeBestEffort` function. Closes [#46816](https://github.com/ClickHouse/ClickHouse/issues/46816). [#47071](https://github.com/ClickHouse/ClickHouse/pull/47071) ([chen](https://github.com/xiedeyantu)). -* Add settings `http_wait_end_of_query` and `http_response_buffer_size` that corresponds to URL params `wait_end_of_query` and `buffer_size` for HTTP interface. This allows to change these settings in the profiles. [#47108](https://github.com/ClickHouse/ClickHouse/pull/47108) ([Vladimir C](https://github.com/vdimir)). +* Add settings `http_wait_end_of_query` and `http_response_buffer_size` that corresponds to URL params `wait_end_of_query` and `buffer_size` for the HTTP interface. This allows changing these settings in the profiles. [#47108](https://github.com/ClickHouse/ClickHouse/pull/47108) ([Vladimir C](https://github.com/vdimir)). * Add `system.dropped_tables` table that shows tables that were dropped from `Atomic` databases but were not completely removed yet. [#47364](https://github.com/ClickHouse/ClickHouse/pull/47364) ([chen](https://github.com/xiedeyantu)). * Add `INSTR` as alias of `positionCaseInsensitive` for MySQL compatibility. Closes [#47529](https://github.com/ClickHouse/ClickHouse/issues/47529). [#47535](https://github.com/ClickHouse/ClickHouse/pull/47535) ([flynn](https://github.com/ucasfl)). * Added `toDecimalString` function allowing to convert numbers to string with fixed precision. [#47838](https://github.com/ClickHouse/ClickHouse/pull/47838) ([Andrey Zvonov](https://github.com/zvonand)). * Add a merge tree setting `max_number_of_mutations_for_replica`. It limits the number of part mutations per replica to the specified amount. Zero means no limit on the number of mutations per replica (the execution can still be constrained by other settings). [#48047](https://github.com/ClickHouse/ClickHouse/pull/48047) ([Vladimir C](https://github.com/vdimir)). -* Add Map-related function `mapFromArrays`, which allows us to create map from a pair of arrays. [#31125](https://github.com/ClickHouse/ClickHouse/pull/31125) ([李扬](https://github.com/taiyang-li)). -* Allow control compression in Parquet/ORC/Arrow output formats, support more compression for input formats. This closes [#13541](https://github.com/ClickHouse/ClickHouse/issues/13541). [#47114](https://github.com/ClickHouse/ClickHouse/pull/47114) ([Kruglov Pavel](https://github.com/Avogar)). +* Add the Map-related function `mapFromArrays`, which allows the creation of a map from a pair of arrays. [#31125](https://github.com/ClickHouse/ClickHouse/pull/31125) ([李扬](https://github.com/taiyang-li)). +* Allow control of compression in Parquet/ORC/Arrow output formats, adds support for more compression input formats. This closes [#13541](https://github.com/ClickHouse/ClickHouse/issues/13541). [#47114](https://github.com/ClickHouse/ClickHouse/pull/47114) ([Kruglov Pavel](https://github.com/Avogar)). * Add SSL User Certificate authentication to the native protocol. Closes [#47077](https://github.com/ClickHouse/ClickHouse/issues/47077). [#47596](https://github.com/ClickHouse/ClickHouse/pull/47596) ([Nikolay Degterinsky](https://github.com/evillique)). * Add *OrNull() and *OrZero() variants for `parseDateTime`, add alias `str_to_date` for MySQL parity. [#48000](https://github.com/ClickHouse/ClickHouse/pull/48000) ([Robert Schulze](https://github.com/rschu1ze)). * Added operator `REGEXP` (similar to operators "LIKE", "IN", "MOD" etc.) for better compatibility with MySQL [#47869](https://github.com/ClickHouse/ClickHouse/pull/47869) ([Robert Schulze](https://github.com/rschu1ze)). #### Performance Improvement * Marks in memory are now compressed, using 3-6x less memory. [#47290](https://github.com/ClickHouse/ClickHouse/pull/47290) ([Michael Kolupaev](https://github.com/al13n321)). -* Backups for large numbers of files were unbelievably slow in previous versions. Not anymore. Now they are unbelievably fast. [#47251](https://github.com/ClickHouse/ClickHouse/pull/47251) ([Alexey Milovidov](https://github.com/alexey-milovidov)). Introduced a separate thread pool for backup's IO operations. This will allow to scale it independently of other pools and increase performance. [#47174](https://github.com/ClickHouse/ClickHouse/pull/47174) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). Use MultiRead request and retries for collecting metadata at final stage of backup processing. [#47243](https://github.com/ClickHouse/ClickHouse/pull/47243) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). If a backup and restoring data are both in S3 then server-side copy should be used from now on. [#47546](https://github.com/ClickHouse/ClickHouse/pull/47546) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backups for large numbers of files were unbelievably slow in previous versions. Not anymore. Now they are unbelievably fast. [#47251](https://github.com/ClickHouse/ClickHouse/pull/47251) ([Alexey Milovidov](https://github.com/alexey-milovidov)). Introduced a separate thread pool for backup's IO operations. This will allow scaling it independently of other pools and increase performance. [#47174](https://github.com/ClickHouse/ClickHouse/pull/47174) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). Use MultiRead request and retries for collecting metadata at the final stage of backup processing. [#47243](https://github.com/ClickHouse/ClickHouse/pull/47243) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). If a backup and restoring data are both in S3 then server-side copy should be used from now on. [#47546](https://github.com/ClickHouse/ClickHouse/pull/47546) ([Vitaly Baranov](https://github.com/vitlibar)). * Fixed excessive reading in queries with `FINAL`. [#47801](https://github.com/ClickHouse/ClickHouse/pull/47801) ([Nikita Taranov](https://github.com/nickitat)). -* Setting `max_final_threads` would be set to number of cores at server startup (by the same algorithm as we use for `max_threads`). This improves concurrency of `final` execution on servers with high number of CPUs. [#47915](https://github.com/ClickHouse/ClickHouse/pull/47915) ([Nikita Taranov](https://github.com/nickitat)). +* Setting `max_final_threads` would be set to the number of cores at server startup (by the same algorithm as used for `max_threads`). This improves the concurrency of `final` execution on servers with high number of CPUs. [#47915](https://github.com/ClickHouse/ClickHouse/pull/47915) ([Nikita Taranov](https://github.com/nickitat)). * Allow executing reading pipeline for DIRECT dictionary with CLICKHOUSE source in multiple threads. To enable set `dictionary_use_async_executor=1` in `SETTINGS` section for source in `CREATE DICTIONARY` statement. [#47986](https://github.com/ClickHouse/ClickHouse/pull/47986) ([Vladimir C](https://github.com/vdimir)). * Optimize one nullable key aggregate performance. [#45772](https://github.com/ClickHouse/ClickHouse/pull/45772) ([LiuNeng](https://github.com/liuneng1994)). * Implemented lowercase `tokenbf_v1` index utilization for `hasTokenOrNull`, `hasTokenCaseInsensitive` and `hasTokenCaseInsensitiveOrNull`. [#46252](https://github.com/ClickHouse/ClickHouse/pull/46252) ([ltrk2](https://github.com/ltrk2)). * Optimize functions `position` and `LIKE` by searching the first two chars using SIMD. [#46289](https://github.com/ClickHouse/ClickHouse/pull/46289) ([Jiebin Sun](https://github.com/jiebinn)). -* Optimize queries from the `system.detached_parts`, which could be significantly large. Added several sources with respect to the block size limitation; in each block an IO thread pool is used to calculate the part size, i.e. to make syscalls in parallel. [#46624](https://github.com/ClickHouse/ClickHouse/pull/46624) ([Sema Checherinda](https://github.com/CheSema)). +* Optimize queries from the `system.detached_parts`, which could be significantly large. Added several sources with respect to the block size limitation; in each block, an IO thread pool is used to calculate the part size, i.e. to make syscalls in parallel. [#46624](https://github.com/ClickHouse/ClickHouse/pull/46624) ([Sema Checherinda](https://github.com/CheSema)). * Increase the default value of `max_replicated_merges_in_queue` for ReplicatedMergeTree tables from 16 to 1000. It allows faster background merge operation on clusters with a very large number of replicas, such as clusters with shared storage in ClickHouse Cloud. [#47050](https://github.com/ClickHouse/ClickHouse/pull/47050) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Updated `clickhouse-copier` to use `GROUP BY` instead of `DISTINCT` to get list of partitions. For large tables this reduced the select time from over 500s to under 1s. [#47386](https://github.com/ClickHouse/ClickHouse/pull/47386) ([Clayton McClure](https://github.com/cmcclure-twilio)). +* Updated `clickhouse-copier` to use `GROUP BY` instead of `DISTINCT` to get the list of partitions. For large tables, this reduced the select time from over 500s to under 1s. [#47386](https://github.com/ClickHouse/ClickHouse/pull/47386) ([Clayton McClure](https://github.com/cmcclure-twilio)). * Fix performance degradation in `ASOF JOIN`. [#47544](https://github.com/ClickHouse/ClickHouse/pull/47544) ([Ongkong](https://github.com/ongkong)). -* Even more batching in Keeper. Avoid breaking batches on read requests to improve performance. [#47978](https://github.com/ClickHouse/ClickHouse/pull/47978) ([Antonio Andelic](https://github.com/antonio2368)). -* Allow PREWHERE for Merge with different DEFAULT expression for column. [#46831](https://github.com/ClickHouse/ClickHouse/pull/46831) ([Azat Khuzhin](https://github.com/azat)). +* Even more batching in Keeper. Improve performance by avoiding breaking batches on read requests. [#47978](https://github.com/ClickHouse/ClickHouse/pull/47978) ([Antonio Andelic](https://github.com/antonio2368)). +* Allow PREWHERE for Merge with different DEFAULT expressions for columns. [#46831](https://github.com/ClickHouse/ClickHouse/pull/46831) ([Azat Khuzhin](https://github.com/azat)). #### Experimental Feature -* Parallel replicas: Improved the overall performance by better utilizing local replica. And forbid reading with parallel replicas from non-replicated MergeTree by default. [#47858](https://github.com/ClickHouse/ClickHouse/pull/47858) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Parallel replicas: Improved the overall performance by better utilizing the local replica, and forbid the reading with parallel replicas from non-replicated MergeTree by default. [#47858](https://github.com/ClickHouse/ClickHouse/pull/47858) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). * Support filter push down to left table for JOIN with `Join`, `Dictionary` and `EmbeddedRocksDB` tables if the experimental Analyzer is enabled. [#47280](https://github.com/ClickHouse/ClickHouse/pull/47280) ([Maksim Kita](https://github.com/kitaisreal)). * Now ReplicatedMergeTree with zero copy replication has less load to Keeper. [#47676](https://github.com/ClickHouse/ClickHouse/pull/47676) ([alesapin](https://github.com/alesapin)). * Fix create materialized view with MaterializedPostgreSQL [#40807](https://github.com/ClickHouse/ClickHouse/pull/40807) ([Maksim Buren](https://github.com/maks-buren630501)). #### Improvement * Enable `input_format_json_ignore_unknown_keys_in_named_tuple` by default. [#46742](https://github.com/ClickHouse/ClickHouse/pull/46742) ([Kruglov Pavel](https://github.com/Avogar)). -* Allow to ignore errors while pushing to MATERIALIZED VIEW (add new setting `materialized_views_ignore_errors`, by default to `false`, but it is set to `true` for flushing logs to `system.*_log` tables unconditionally). [#46658](https://github.com/ClickHouse/ClickHouse/pull/46658) ([Azat Khuzhin](https://github.com/azat)). +* Allow errors to be ignored while pushing to MATERIALIZED VIEW (add new setting `materialized_views_ignore_errors`, by default to `false`, but it is set to `true` for flushing logs to `system.*_log` tables unconditionally). [#46658](https://github.com/ClickHouse/ClickHouse/pull/46658) ([Azat Khuzhin](https://github.com/azat)). * Track the file queue of distributed sends in memory. [#45491](https://github.com/ClickHouse/ClickHouse/pull/45491) ([Azat Khuzhin](https://github.com/azat)). -* Now `X-ClickHouse-Query-Id` and `X-ClickHouse-Timezone` headers are added to response in all queries via http protocol. Previously it was done only for `SELECT` queries. [#46364](https://github.com/ClickHouse/ClickHouse/pull/46364) ([Anton Popov](https://github.com/CurtizJ)). +* Now `X-ClickHouse-Query-Id` and `X-ClickHouse-Timezone` headers are added to responses in all queries via HTTP protocol. Previously it was done only for `SELECT` queries. [#46364](https://github.com/ClickHouse/ClickHouse/pull/46364) ([Anton Popov](https://github.com/CurtizJ)). * External tables from `MongoDB`: support for connection to a replica set via a URI with a host:port enum and support for the readPreference option in MongoDB dictionaries. Example URI: mongodb://db0.example.com:27017,db1.example.com:27017,db2.example.com:27017/?replicaSet=myRepl&readPreference=primary. [#46524](https://github.com/ClickHouse/ClickHouse/pull/46524) ([artem-yadr](https://github.com/artem-yadr)). * This improvement should be invisible for users. Re-implement projection analysis on top of query plan. Added setting `query_plan_optimize_projection=1` to switch between old and new version. Fixes [#44963](https://github.com/ClickHouse/ClickHouse/issues/44963). [#46537](https://github.com/ClickHouse/ClickHouse/pull/46537) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Use parquet format v2 instead of v1 in output format by default. Add setting `output_format_parquet_version` to control parquet version, possible values `1.0`, `2.4`, `2.6`, `2.latest` (default). [#46617](https://github.com/ClickHouse/ClickHouse/pull/46617) ([Kruglov Pavel](https://github.com/Avogar)). -* It is now possible using new configuration syntax to configure Kafka topics with periods (`.`) in their name. [#46752](https://github.com/ClickHouse/ClickHouse/pull/46752) ([Robert Schulze](https://github.com/rschu1ze)). +* Use Parquet format v2 instead of v1 in output format by default. Add setting `output_format_parquet_version` to control parquet version, possible values `1.0`, `2.4`, `2.6`, `2.latest` (default). [#46617](https://github.com/ClickHouse/ClickHouse/pull/46617) ([Kruglov Pavel](https://github.com/Avogar)). +* It is now possible to use the new configuration syntax to configure Kafka topics with periods (`.`) in their name. [#46752](https://github.com/ClickHouse/ClickHouse/pull/46752) ([Robert Schulze](https://github.com/rschu1ze)). * Fix heuristics that check hyperscan patterns for problematic repeats. [#46819](https://github.com/ClickHouse/ClickHouse/pull/46819) ([Robert Schulze](https://github.com/rschu1ze)). * Don't report ZK node exists to system.errors when a block was created concurrently by a different replica. [#46820](https://github.com/ClickHouse/ClickHouse/pull/46820) ([Raúl Marín](https://github.com/Algunenano)). * Increase the limit for opened files in `clickhouse-local`. It will be able to read from `web` tables on servers with a huge number of CPU cores. Do not back off reading from the URL table engine in case of too many opened files. This closes [#46852](https://github.com/ClickHouse/ClickHouse/issues/46852). [#46853](https://github.com/ClickHouse/ClickHouse/pull/46853) ([Alexey Milovidov](https://github.com/alexey-milovidov)). @@ -79,7 +79,7 @@ * Added update `system.backups` after every processed task to track the progress of backups. [#46989](https://github.com/ClickHouse/ClickHouse/pull/46989) ([Aleksandr Musorin](https://github.com/AVMusorin)). * Allow types conversion in Native input format. Add settings `input_format_native_allow_types_conversion` that controls it (enabled by default). [#46990](https://github.com/ClickHouse/ClickHouse/pull/46990) ([Kruglov Pavel](https://github.com/Avogar)). * Allow IPv4 in the `range` function to generate IP ranges. [#46995](https://github.com/ClickHouse/ClickHouse/pull/46995) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Improve exception message when it's impossible to make part move from one volume/disk to another. [#47032](https://github.com/ClickHouse/ClickHouse/pull/47032) ([alesapin](https://github.com/alesapin)). +* Improve exception message when it's impossible to move a part from one volume/disk to another. [#47032](https://github.com/ClickHouse/ClickHouse/pull/47032) ([alesapin](https://github.com/alesapin)). * Support `Bool` type in `JSONType` function. Previously `Null` type was mistakenly returned for bool values. [#47046](https://github.com/ClickHouse/ClickHouse/pull/47046) ([Anton Popov](https://github.com/CurtizJ)). * Use `_request_body` parameter to configure predefined HTTP queries. [#47086](https://github.com/ClickHouse/ClickHouse/pull/47086) ([Constantine Peresypkin](https://github.com/pkit)). * Automatic indentation in the built-in UI SQL editor when Enter is pressed. [#47113](https://github.com/ClickHouse/ClickHouse/pull/47113) ([Alexey Korepanov](https://github.com/alexkorep)). @@ -87,27 +87,27 @@ * Previously, the `repeat` function's second argument only accepted an unsigned integer type, which meant it could not accept values such as -1. This behavior differed from that of the Spark function. In this update, the repeat function has been modified to match the behavior of the Spark function. It now accepts the same types of inputs, including negative integers. Extensive testing has been performed to verify the correctness of the updated implementation. [#47134](https://github.com/ClickHouse/ClickHouse/pull/47134) ([KevinyhZou](https://github.com/KevinyhZou)). Note: the changelog entry was rewritten by ChatGPT. * Remove `::__1` part from stacktraces. Display `std::basic_string