From d7d90781839a49b76f1692d1badbaa54e3dab5a5 Mon Sep 17 00:00:00 2001 From: Artem Zuikov Date: Fri, 13 Mar 2020 20:19:28 +0300 Subject: [PATCH 001/115] fix build with dynamic libs --- contrib/CMakeLists.txt | 5 +++-- dbms/src/Storages/System/CMakeLists.txt | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index cf5fc9e81ff..bc9c2528fb0 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -310,12 +310,13 @@ if (USE_BASE64) endif() if (USE_INTERNAL_HYPERSCAN_LIBRARY) - add_subdirectory (hyperscan) - # The library is large - avoid bloat. if (USE_STATIC_LIBRARIES) + add_subdirectory (hyperscan) target_compile_options (hs PRIVATE -g0) else () + set(BUILD_SHARED_LIBS 1 CACHE INTERNAL "") + add_subdirectory (hyperscan) target_compile_options (hs_shared PRIVATE -g0) endif () endif() diff --git a/dbms/src/Storages/System/CMakeLists.txt b/dbms/src/Storages/System/CMakeLists.txt index 18c452caf7b..0b1a35dc261 100644 --- a/dbms/src/Storages/System/CMakeLists.txt +++ b/dbms/src/Storages/System/CMakeLists.txt @@ -12,7 +12,7 @@ include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake) add_headers_and_sources(storages_system .) list (APPEND storages_system_sources ${CONFIG_BUILD}) add_library(clickhouse_storages_system ${storages_system_headers} ${storages_system_sources}) -target_link_libraries(clickhouse_storages_system PRIVATE dbms common string_utils clickhouse_common_zookeeper clickhouse_parsers) +target_link_libraries(clickhouse_storages_system PRIVATE dbms common string_utils clickhouse_common_zookeeper clickhouse_parsers ${JEMALLOC_LIBRARIES}) add_custom_target(generate-contributors ./StorageSystemContributors.sh SOURCES StorageSystemContributors.sh WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} # BYPRODUCTS StorageSystemContributors.generated.cpp From 6969191c9fcdaf9eb63d0869dcfdc102a4976268 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 17 Feb 2020 10:01:11 +0300 Subject: [PATCH 002/115] Call onException if ParallelInputsHandler::onFinish* throws --- dbms/src/DataStreams/ParallelInputsProcessor.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/dbms/src/DataStreams/ParallelInputsProcessor.h b/dbms/src/DataStreams/ParallelInputsProcessor.h index 505bfac567c..a786dac7497 100644 --- a/dbms/src/DataStreams/ParallelInputsProcessor.h +++ b/dbms/src/DataStreams/ParallelInputsProcessor.h @@ -206,6 +206,8 @@ private: } loop(thread_num); + + handler.onFinishThread(thread_num); } catch (...) { @@ -217,8 +219,6 @@ private: handler.onException(exception, thread_num); } - handler.onFinishThread(thread_num); - /// The last thread on the output indicates that there is no more data. if (0 == --active_threads) { @@ -242,7 +242,19 @@ private: } } - handler.onFinish(); /// TODO If in `onFinish` or `onFinishThread` there is an exception, then std::terminate is called. + try + { + handler.onFinish(); + } + catch (...) + { + exception = std::current_exception(); + } + + if (exception) + { + handler.onException(exception, thread_num); + } } } From a15b2daf6d08d0805ed4b715ad8ef48833ce01be Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 17 Feb 2020 23:15:29 +0300 Subject: [PATCH 003/115] Do not shutdown global thread pool on exception Otherwise GlobalThreadPool can be terminated (for example due to an exception from the ParallelInputsHandler::onFinish/onFinishThread, from ParallelAggregatingBlockInputStream::Handler::onFinish/onFinishThread, since writeToTemporaryFile() can definitelly throw) and the server will not accept new connections (or/and execute queries) anymore. Here is possible stacktrace (it is a bit inaccurate, due to optimizations I guess, and it had been obtained with the DB::tryLogCurrentException() in the catch block of the ThreadPoolImpl::worker()): 2020.02.16 22:30:40.415246 [ 45909 ] {} ThreadPool: Unhandled exception in the ThreadPool(10000,1000,10000) the loop will be shutted down: Code: 241, e.displayText() = DB::Exception: Memory limit (total) exceeded: would use 279.40 GiB (attempt to allocate chunk of 4205536 bytes), maximum: 279.40 GiB, Stack trace (when copying this message, always include the lines below): 1. Common/Exception.cpp:35: DB::Exception::Exception(...) ... 6. Common/Allocator.h:102: void DB::PODArrayBase<8ul, 4096ul, Allocator, 15ul, 16ul>::reserve<>(unsigned long) (.part.0) 7. Interpreters/Aggregator.cpp:1040: void DB::Aggregator::writeToTemporaryFileImpl<...>(...) 8. Interpreters/Aggregator.cpp:719: DB::Aggregator::writeToTemporaryFile(...) 9. include/memory:4206: DB::Aggregator::writeToTemporaryFile(...) 10. DataStreams/ParallelInputsProcessor.h:223: DB::ParallelInputsProcessor::thread(...) Refs: https://github.com/ClickHouse/ClickHouse/issues/6833#issuecomment-579221732 (Reference to particular comment, since I'm not sure about the initial issue) --- dbms/src/Common/ThreadPool.cpp | 19 ++++++++++++++++--- dbms/src/Common/ThreadPool.h | 5 +++-- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/dbms/src/Common/ThreadPool.cpp b/dbms/src/Common/ThreadPool.cpp index c1cad465ed2..47d37d572df 100644 --- a/dbms/src/Common/ThreadPool.cpp +++ b/dbms/src/Common/ThreadPool.cpp @@ -28,8 +28,11 @@ ThreadPoolImpl::ThreadPoolImpl(size_t max_threads_) } template -ThreadPoolImpl::ThreadPoolImpl(size_t max_threads_, size_t max_free_threads_, size_t queue_size_) - : max_threads(max_threads_), max_free_threads(max_free_threads_), queue_size(queue_size_) +ThreadPoolImpl::ThreadPoolImpl(size_t max_threads_, size_t max_free_threads_, size_t queue_size_, bool shutdown_on_exception_) + : max_threads(max_threads_) + , max_free_threads(max_free_threads_) + , queue_size(queue_size_) + , shutdown_on_exception(shutdown_on_exception_) { } @@ -226,9 +229,19 @@ void ThreadPoolImpl::worker(typename std::list::iterator thread_ std::unique_lock lock(mutex); if (!first_exception) first_exception = std::current_exception(); - shutdown = true; + if (shutdown_on_exception) + shutdown = true; --scheduled_jobs; } + + DB::tryLogCurrentException("ThreadPool", + std::string("Exception in the ThreadPool(") + + std::to_string(max_threads) + ", " + + std::to_string(max_free_threads) + ", " + + std::to_string(queue_size) + ", " + + std::to_string(shutdown_on_exception) + + ")."); + job_finished.notify_all(); new_job_or_shutdown.notify_all(); return; diff --git a/dbms/src/Common/ThreadPool.h b/dbms/src/Common/ThreadPool.h index 662d34afadd..1f538167c8a 100644 --- a/dbms/src/Common/ThreadPool.h +++ b/dbms/src/Common/ThreadPool.h @@ -33,7 +33,7 @@ public: explicit ThreadPoolImpl(size_t max_threads_); /// queue_size - maximum number of running plus scheduled jobs. It can be greater than max_threads. Zero means unlimited. - ThreadPoolImpl(size_t max_threads_, size_t max_free_threads_, size_t queue_size_); + ThreadPoolImpl(size_t max_threads_, size_t max_free_threads_, size_t queue_size_, bool shutdown_on_exception_ = true); /// Add new job. Locks until number of scheduled jobs is less than maximum or exception in one of threads was thrown. /// If any thread was throw an exception, first exception will be rethrown from this method, @@ -79,6 +79,7 @@ private: size_t scheduled_jobs = 0; bool shutdown = false; + const bool shutdown_on_exception = true; struct JobWithPriority { @@ -128,7 +129,7 @@ using FreeThreadPool = ThreadPoolImpl; class GlobalThreadPool : public FreeThreadPool, private boost::noncopyable { public: - GlobalThreadPool() : FreeThreadPool(10000, 1000, 10000) {} + GlobalThreadPool() : FreeThreadPool(10000, 1000, 10000, false) {} static GlobalThreadPool & instance(); }; From 4547e1a25bf2c44150e46e2cecebf73c4a92bf55 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 16 Mar 2020 14:54:07 +0300 Subject: [PATCH 004/115] Simplify kill mutation test --- .../0_stateless/00834_kill_mutation.reference | 4 ++-- .../queries/0_stateless/00834_kill_mutation.sh | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00834_kill_mutation.reference b/dbms/tests/queries/0_stateless/00834_kill_mutation.reference index 577cf2d4e04..cbee44069d8 100644 --- a/dbms/tests/queries/0_stateless/00834_kill_mutation.reference +++ b/dbms/tests/queries/0_stateless/00834_kill_mutation.reference @@ -1,7 +1,7 @@ *** Create and kill a single invalid mutation *** -mutation_3.txt 1 1 Code: 6, +1 waiting test kill_mutation mutation_3.txt *** Create and kill invalid mutation that blocks another mutation *** -mutation_4.txt 1 1 Code: 6, +1 waiting test kill_mutation mutation_4.txt 2001-01-01 2 b diff --git a/dbms/tests/queries/0_stateless/00834_kill_mutation.sh b/dbms/tests/queries/0_stateless/00834_kill_mutation.sh index b27ef779416..f1be4d9e7e5 100755 --- a/dbms/tests/queries/0_stateless/00834_kill_mutation.sh +++ b/dbms/tests/queries/0_stateless/00834_kill_mutation.sh @@ -17,17 +17,17 @@ ${CLICKHOUSE_CLIENT} --query="SELECT '*** Create and kill a single invalid mutat ${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.kill_mutation DELETE WHERE toUInt32(s) = 1 SETTINGS mutations_sync = 1" & -check_query1="SELECT substr(latest_fail_reason, 1, 8) as ErrorCode FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation' AND ErrorCode != ''" +check_query1="SELECT count() FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation' AND is_done = 0" query_result=`$CLICKHOUSE_CLIENT --query="$check_query1" 2>&1` -while [ -z "$query_result" ] +while [ "$query_result" == "0" ] do query_result=`$CLICKHOUSE_CLIENT --query="$check_query1" 2>&1` - sleep 0.1 + sleep 0.5 done -${CLICKHOUSE_CLIENT} --query="SELECT mutation_id, latest_failed_part IN ('20000101_1_1_0', '20010101_2_2_0'), latest_fail_time != 0, substr(latest_fail_reason, 1, 8) FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation'" +${CLICKHOUSE_CLIENT} --query="SELECT count() FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation' and is_done = 0" ${CLICKHOUSE_CLIENT} --query="KILL MUTATION WHERE database = 'test' AND table = 'kill_mutation'" @@ -41,17 +41,17 @@ ${CLICKHOUSE_CLIENT} --query="SELECT '*** Create and kill invalid mutation that ${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.kill_mutation DELETE WHERE toUInt32(s) = 1" ${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.kill_mutation DELETE WHERE x = 1 SETTINGS mutations_sync = 1" & -check_query2="SELECT substr(latest_fail_reason, 1, 8) as ErrorCode FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation' AND mutation_id = 'mutation_4.txt' AND ErrorCode != ''" +check_query2="SELECT count() FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation' AND mutation_id = 'mutation_4.txt'" query_result=`$CLICKHOUSE_CLIENT --query="$check_query1" 2>&1` -while [ -z "$query_result" ] +while [ "$query_result" == "0" ] do query_result=`$CLICKHOUSE_CLIENT --query="$check_query1" 2>&1` - sleep 0.1 + sleep 0.5 done -${CLICKHOUSE_CLIENT} --query="SELECT mutation_id, latest_failed_part IN ('20000101_1_1_0', '20010101_2_2_0'), latest_fail_time != 0, substr(latest_fail_reason, 1, 8) FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation' AND mutation_id = 'mutation_4.txt'" +${CLICKHOUSE_CLIENT} --query="SELECT count() FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation' AND mutation_id = 'mutation_4.txt'" ${CLICKHOUSE_CLIENT} --query="KILL MUTATION WHERE database = 'test' AND table = 'kill_mutation' AND mutation_id = 'mutation_4.txt'" From 32b3e7946cb1f579bc6be96bea04dcac49281f51 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 16 Mar 2020 15:32:07 +0300 Subject: [PATCH 005/115] Fix flacky test, take two --- .../ZooKeeper/tests/gtest_zkutil_test_multi_exception.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Common/ZooKeeper/tests/gtest_zkutil_test_multi_exception.cpp b/dbms/src/Common/ZooKeeper/tests/gtest_zkutil_test_multi_exception.cpp index b74c07bca46..a07c1ae8983 100644 --- a/dbms/src/Common/ZooKeeper/tests/gtest_zkutil_test_multi_exception.cpp +++ b/dbms/src/Common/ZooKeeper/tests/gtest_zkutil_test_multi_exception.cpp @@ -131,7 +131,7 @@ TEST(zkutil, MultiAsync) /// The test is quite heavy. It is normal if session is expired during this test. /// If we don't check that, the test will be flacky. - if (e.code != Coordination::ZSESSIONEXPIRED) + if (e.code != Coordination::ZSESSIONEXPIRED && e.code != Coordination::ZCONNECTIONLOSS) throw; } } From 211ee95eb72582aa337ad7630f1802a277b32ad1 Mon Sep 17 00:00:00 2001 From: Artem Zuikov Date: Mon, 16 Mar 2020 16:30:37 +0300 Subject: [PATCH 006/115] undo jemalloc deps --- dbms/src/Storages/System/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/System/CMakeLists.txt b/dbms/src/Storages/System/CMakeLists.txt index 0b1a35dc261..18c452caf7b 100644 --- a/dbms/src/Storages/System/CMakeLists.txt +++ b/dbms/src/Storages/System/CMakeLists.txt @@ -12,7 +12,7 @@ include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake) add_headers_and_sources(storages_system .) list (APPEND storages_system_sources ${CONFIG_BUILD}) add_library(clickhouse_storages_system ${storages_system_headers} ${storages_system_sources}) -target_link_libraries(clickhouse_storages_system PRIVATE dbms common string_utils clickhouse_common_zookeeper clickhouse_parsers ${JEMALLOC_LIBRARIES}) +target_link_libraries(clickhouse_storages_system PRIVATE dbms common string_utils clickhouse_common_zookeeper clickhouse_parsers) add_custom_target(generate-contributors ./StorageSystemContributors.sh SOURCES StorageSystemContributors.sh WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} # BYPRODUCTS StorageSystemContributors.generated.cpp From 718903f14069d7c7fab66983a34c8e3ac7861a68 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 16 Mar 2020 17:05:11 +0300 Subject: [PATCH 007/115] Simplify test and fix order of messages --- .../0_stateless/00834_kill_mutation.sh | 8 +++++-- ...ll_mutation_replicated_zookeeper.reference | 6 ++--- ...0834_kill_mutation_replicated_zookeeper.sh | 24 +++++++++++-------- 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00834_kill_mutation.sh b/dbms/tests/queries/0_stateless/00834_kill_mutation.sh index f1be4d9e7e5..a00e52fc23c 100755 --- a/dbms/tests/queries/0_stateless/00834_kill_mutation.sh +++ b/dbms/tests/queries/0_stateless/00834_kill_mutation.sh @@ -29,10 +29,12 @@ done ${CLICKHOUSE_CLIENT} --query="SELECT count() FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation' and is_done = 0" -${CLICKHOUSE_CLIENT} --query="KILL MUTATION WHERE database = 'test' AND table = 'kill_mutation'" +kill_message=$(${CLICKHOUSE_CLIENT} --query="KILL MUTATION WHERE database = 'test' AND table = 'kill_mutation'") wait +echo "$kill_message" + ${CLICKHOUSE_CLIENT} --query="SELECT mutation_id FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation'" @@ -54,10 +56,12 @@ done ${CLICKHOUSE_CLIENT} --query="SELECT count() FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation' AND mutation_id = 'mutation_4.txt'" -${CLICKHOUSE_CLIENT} --query="KILL MUTATION WHERE database = 'test' AND table = 'kill_mutation' AND mutation_id = 'mutation_4.txt'" +kill_message=$(${CLICKHOUSE_CLIENT} --query="KILL MUTATION WHERE database = 'test' AND table = 'kill_mutation' AND mutation_id = 'mutation_4.txt'") wait +echo "$kill_message" + ${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.kill_mutation" # must always be empty ${CLICKHOUSE_CLIENT} --query="SELECT * FROM system.mutations WHERE table = 'kill_mutation' AND database = 'test' AND is_done = 0" diff --git a/dbms/tests/queries/0_stateless/00834_kill_mutation_replicated_zookeeper.reference b/dbms/tests/queries/0_stateless/00834_kill_mutation_replicated_zookeeper.reference index 3fe9a065099..a997ebe1dc9 100644 --- a/dbms/tests/queries/0_stateless/00834_kill_mutation_replicated_zookeeper.reference +++ b/dbms/tests/queries/0_stateless/00834_kill_mutation_replicated_zookeeper.reference @@ -1,9 +1,9 @@ *** Create and kill a single invalid mutation *** -0000000000 1 1 Code: 6, -waiting test kill_mutation_r1 0000000000 +1 Mutation 0000000000 was killed +waiting test kill_mutation_r1 0000000000 0 *** Create and kill invalid mutation that blocks another mutation *** -0000000001 1 1 Code: 6, +1 waiting test kill_mutation_r1 0000000001 2001-01-01 2 b diff --git a/dbms/tests/queries/0_stateless/00834_kill_mutation_replicated_zookeeper.sh b/dbms/tests/queries/0_stateless/00834_kill_mutation_replicated_zookeeper.sh index ec7c48147c4..2aea2e7cfb0 100755 --- a/dbms/tests/queries/0_stateless/00834_kill_mutation_replicated_zookeeper.sh +++ b/dbms/tests/queries/0_stateless/00834_kill_mutation_replicated_zookeeper.sh @@ -20,22 +20,24 @@ ${CLICKHOUSE_CLIENT} --query="SELECT '*** Create and kill a single invalid mutat # wrong mutation ${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.kill_mutation_r1 DELETE WHERE toUInt32(s) = 1 SETTINGS mutations_sync=2" 2>&1 | grep -o "Mutation 0000000000 was killed" & -check_query1="SELECT substr(latest_fail_reason, 1, 8) as ErrorCode FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation_r1' AND ErrorCode != ''" +check_query1="SELECT count() FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation_r1' AND is_done = 0" query_result=`$CLICKHOUSE_CLIENT --query="$check_query1" 2>&1` -while [ -z "$query_result" ] +while [ "$query_result" == "0" ] do query_result=`$CLICKHOUSE_CLIENT --query="$check_query1" 2>&1` - sleep 0.1 + sleep 0.5 done -$CLICKHOUSE_CLIENT --query="SELECT mutation_id, latest_failed_part IN ('20000101_0_0_0', '20010101_0_0_0'), latest_fail_time != 0, substr(latest_fail_reason, 1, 8) FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation_r1'" +$CLICKHOUSE_CLIENT --query="SELECT count() FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation_r1' AND is_done = 0" -${CLICKHOUSE_CLIENT} --query="KILL MUTATION WHERE database = 'test' AND table = 'kill_mutation_r1'" +kill_message=$(${CLICKHOUSE_CLIENT} --query="KILL MUTATION WHERE database = 'test' AND table = 'kill_mutation_r1'") wait +echo "$kill_message" + # No active mutations exists ${CLICKHOUSE_CLIENT} --query="SELECT count() FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation_r1'" @@ -52,22 +54,24 @@ ${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.kill_mutation_r1 DELETE WHERE toU # good mutation, but blocked with wrong mutation ${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.kill_mutation_r1 DELETE WHERE x = 1 SETTINGS mutations_sync=2" & -check_query2="SELECT substr(latest_fail_reason, 1, 8) as ErrorCode FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation_r1' AND mutation_id = '0000000001' AND ErrorCode != ''" +check_query2="SELECT count() FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation_r1' AND mutation_id = '0000000001' AND is_done = 0" query_result=`$CLICKHOUSE_CLIENT --query="$check_query2" 2>&1` -while [ -z "$query_result" ] +while [ "$query_result" == "0" ] do query_result=`$CLICKHOUSE_CLIENT --query="$check_query2" 2>&1` - sleep 0.1 + sleep 0.5 done -$CLICKHOUSE_CLIENT --query="SELECT mutation_id, latest_failed_part IN ('20000101_0_0_0_1', '20010101_0_0_0_1'), latest_fail_time != 0, substr(latest_fail_reason, 1, 8) FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation_r1' AND mutation_id = '0000000001'" +$CLICKHOUSE_CLIENT --query="SELECT count() FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation_r1' AND mutation_id = '0000000001' AND is_done = 0" -${CLICKHOUSE_CLIENT} --query="KILL MUTATION WHERE database = 'test' AND table = 'kill_mutation_r1' AND mutation_id = '0000000001'" +kill_message=$(${CLICKHOUSE_CLIENT} --query="KILL MUTATION WHERE database = 'test' AND table = 'kill_mutation_r1' AND mutation_id = '0000000001'") wait +echo "$kill_message" + ${CLICKHOUSE_CLIENT} --query="SYSTEM SYNC REPLICA test.kill_mutation_r1" ${CLICKHOUSE_CLIENT} --query="SYSTEM SYNC REPLICA test.kill_mutation_r2" From 811d0e00d560d7d4f871cf525ec352df41a2c733 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Mon, 16 Mar 2020 17:54:17 +0300 Subject: [PATCH 008/115] performance comparison --- docker/test/performance-comparison/entrypoint.sh | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/docker/test/performance-comparison/entrypoint.sh b/docker/test/performance-comparison/entrypoint.sh index 38fa967bd5f..330304547b7 100755 --- a/docker/test/performance-comparison/entrypoint.sh +++ b/docker/test/performance-comparison/entrypoint.sh @@ -22,19 +22,27 @@ function find_reference_sha # Go back from the revision to be tested, trying to find the closest published # testing release. - start_ref="$SHA_TO_TEST" + start_ref="$SHA_TO_TEST"~ # If we are testing a PR, and it merges with master successfully, we are # building and testing not the nominal last SHA specified by pull/.../head # and SHA_TO_TEST, but a revision that is merged with recent master, given # by pull/.../merge ref. + # Master is the first parent of the pull/.../merge. if git -C ch rev-parse pr/merge then - start_ref=pr/merge + start_ref=pr/merge~ fi while : do - ref_tag=$(git -C ch describe --match='v*-testing' --abbrev=0 --first-parent "$start_ref") + # FIXME the original idea was to compare to a closest testing tag, which + # is a version that is verified to work correctly. However, we're having + # some test stability issues now, and the testing release can't roll out + # for more that a weak already because of that. Temporarily switch to + # using just closest master, so that we can go on. + #ref_tag=$(git -C ch describe --match='v*-testing' --abbrev=0 --first-parent "$start_ref") + ref_tag="$start_ref" + echo Reference tag is "$ref_tag" # We use annotated tags which have their own shas, so we have to further # dereference the tag to get the commit it points to, hence the '~0' thing. From 5b54ef5bfd43844ee0d76e3b7ff6ab01329a4e56 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 16 Mar 2020 17:59:11 +0300 Subject: [PATCH 009/115] Split long dictionaries test --- .../test.py | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/dbms/tests/integration/test_dictionaries_all_layouts_and_sources/test.py b/dbms/tests/integration/test_dictionaries_all_layouts_and_sources/test.py index 245370aa61c..62064cf7238 100644 --- a/dbms/tests/integration/test_dictionaries_all_layouts_and_sources/test.py +++ b/dbms/tests/integration/test_dictionaries_all_layouts_and_sources/test.py @@ -253,12 +253,15 @@ def test_simple_dictionaries(started_cluster, fold): assert node.query(query) == str(answer) + '\n' -def test_complex_dictionaries(started_cluster): +@pytest.mark.parametrize("fold", list(range(10))) +def test_complex_dictionaries(started_cluster, fold): fields = FIELDS["complex"] values = VALUES["complex"] data = [Row(fields, vals) for vals in values] - complex_dicts = [d for d in DICTIONARIES if d.structure.layout.layout_type == "complex"] + all_complex_dicts = [d for d in DICTIONARIES if d.structure.layout.layout_type == "complex"] + complex_dicts = get_dictionaries(fold, 10, all_complex_dicts) + for dct in complex_dicts: dct.load_data(data) @@ -283,12 +286,15 @@ def test_complex_dictionaries(started_cluster): assert node.query(query) == str(answer) + '\n' -def test_ranged_dictionaries(started_cluster): +@pytest.mark.parametrize("fold", list(range(10))) +def test_ranged_dictionaries(started_cluster, fold): fields = FIELDS["ranged"] values = VALUES["ranged"] data = [Row(fields, vals) for vals in values] - ranged_dicts = [d for d in DICTIONARIES if d.structure.layout.layout_type == "ranged"] + all_ranged_dicts = [d for d in DICTIONARIES if d.structure.layout.layout_type == "ranged"] + ranged_dicts = get_dictionaries(fold, 10, all_ranged_dicts) + for dct in ranged_dicts: dct.load_data(data) @@ -364,12 +370,14 @@ def test_key_value_simple_dictionaries(started_cluster, fold): assert node.query(query) == str(answer) + '\n' -def test_key_value_complex_dictionaries(started_cluster): +@pytest.mark.parametrize("fold", list(range(10))) +def test_key_value_complex_dictionaries(started_cluster, fold): fields = FIELDS["complex"] values = VALUES["complex"] data = [Row(fields, vals) for vals in values] - complex_dicts = [d for d in DICTIONARIES if d.structure.layout.layout_type == "complex"] + all_complex_dicts = [d for d in DICTIONARIES if d.structure.layout.layout_type == "complex"] + complex_dicts = get_dictionaries(fold, 10, all_complex_dicts) for dct in complex_dicts: dct.load_data(data) From 81bc57874e77d32e4e7a36225fef8d1ad4a1ab5d Mon Sep 17 00:00:00 2001 From: yonesko Date: Mon, 16 Mar 2020 18:02:03 +0300 Subject: [PATCH 010/115] Fix typo (#9683) --- docs/ru/operations/table_engines/mergetree.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/operations/table_engines/mergetree.md b/docs/ru/operations/table_engines/mergetree.md index b4262a468ba..2753156f8a8 100644 --- a/docs/ru/operations/table_engines/mergetree.md +++ b/docs/ru/operations/table_engines/mergetree.md @@ -170,7 +170,7 @@ Marks numbers: 0 1 2 3 4 5 6 7 8 Разреженный индекс допускает чтение лишних строк. При чтении одного диапазона первичного ключа, может быть прочитано до `index_granularity * 2` лишних строк в каждом блоке данных. -Разреженный индекс почти всегда помещаеся в оперативную память и поволяет работать с очень большим количеством строк в таблицах. +Разреженный индекс почти всегда помещаеся в оперативную память и позволяет работать с очень большим количеством строк в таблицах. ClickHouse не требует уникального первичного ключа. Можно вставить много строк с одинаковым первичным ключом. From 1c98210b715849754ed2cea17993df722de00917 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 16 Mar 2020 18:09:20 +0300 Subject: [PATCH 011/115] Fix bug in integration test --- .../test_dictionaries_all_layouts_and_sources/test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/tests/integration/test_dictionaries_all_layouts_and_sources/test.py b/dbms/tests/integration/test_dictionaries_all_layouts_and_sources/test.py index 62064cf7238..3a7d9106267 100644 --- a/dbms/tests/integration/test_dictionaries_all_layouts_and_sources/test.py +++ b/dbms/tests/integration/test_dictionaries_all_layouts_and_sources/test.py @@ -407,6 +407,6 @@ def test_key_value_complex_dictionaries(started_cluster, fold): for query in dct.get_select_get_or_default_queries(field, row): queries_with_answers.append((query, field.default_value_for_get)) - for query, answer in queries_with_answers: - print query - assert node.query(query) == str(answer) + '\n' + for query, answer in queries_with_answers: + print query + assert node.query(query) == str(answer) + '\n' From 1edf735e8da663c116bfe53d9cc1d80a600a5b60 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 16 Mar 2020 18:27:07 +0300 Subject: [PATCH 012/115] Update script for backporting. --- utils/github/__main__.py | 4 +++- utils/github/parser.py | 18 +++++++++++++++++- utils/github/query.py | 39 ++++++++++++++++++++++++--------------- 3 files changed, 44 insertions(+), 17 deletions(-) diff --git a/utils/github/__main__.py b/utils/github/__main__.py index 920f382d613..10c92f681cc 100644 --- a/utils/github/__main__.py +++ b/utils/github/__main__.py @@ -49,7 +49,7 @@ parser.add_argument('--token', type=str, required=True, help='token for Github access') parser.add_argument('--login', type=str, help='filter authorship by login') -parser.add_argument('--auto-label', action='store_true', dest='autolabel', +parser.add_argument('--auto-label', action='store_true', dest='autolabel', default=True, help='try to automatically parse PR description and put labels') args = parser.parse_args() @@ -80,6 +80,8 @@ for i in reversed(range(len(stables))): members = set(github.get_members("ClickHouse", "ClickHouse")) def print_responsible(pull_request): + if "author" not in pull_request or pull_request["author"] is None: + return "No author" if pull_request["author"]["login"] in members: return colored(pull_request["author"]["login"], 'green') elif pull_request["mergedBy"]["login"] in members: diff --git a/utils/github/parser.py b/utils/github/parser.py index 77ad5a1b278..2f00cac9bb4 100644 --- a/utils/github/parser.py +++ b/utils/github/parser.py @@ -10,6 +10,10 @@ class Description: 'Performance Improvement': 'pr-performance', # 'Backward Incompatible Change': doesn't match anything 'Build/Testing/Packaging Improvement': 'pr-build', + 'Non-significant (changelog entry is not needed)': 'pr-non-significant', + 'Non-significant (changelog entry is not required)': 'pr-non-significant', + 'Non-significant': 'pr-non-significant', + 'Documentation (changelog entry is not required)': 'pr-documentation', # 'Other': doesn't match anything } @@ -37,8 +41,20 @@ class Description: if stripped == 'I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla/?lang=en': self.legal = True - if stripped == 'Category (leave one):': + category_headers = ( + 'Category (leave one):', + 'Changelog category (leave one):', + 'Changelog category:', + 'Category:' + ) + + if stripped in category_headers: next_category = True if category in Description.MAP_CATEGORY_TO_LABEL: self.label_name = Description.MAP_CATEGORY_TO_LABEL[category] + else: + if not category: + print('Cannot find category in pr description') + else: + print('Unknown category: ' + category) diff --git a/utils/github/query.py b/utils/github/query.py index f03cce744d3..6c22d3cfeb3 100644 --- a/utils/github/query.py +++ b/utils/github/query.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import requests +import time class Query: @@ -394,20 +395,28 @@ class Query: }} }} ''' - request = requests_retry_session().post('https://api.github.com/graphql', json={'query': query}, headers=headers) - if request.status_code == 200: - result = request.json() - if 'errors' in result: - raise Exception(f'Errors occured: {result["errors"]}') - if not is_mutation: - import inspect - caller = inspect.getouterframes(inspect.currentframe(), 2)[1][3] - if caller not in self.api_costs.keys(): - self.api_costs[caller] = 0 - self.api_costs[caller] += result['data']['rateLimit']['cost'] + while True: + request = requests_retry_session().post('https://api.github.com/graphql', json={'query': query}, headers=headers) + if request.status_code == 200: + result = request.json() + if 'errors' in result: + raise Exception(f'Errors occured: {result["errors"]}') - return result['data'] - else: - import json - raise Exception(f'Query failed with code {request.status_code}:\n{json.dumps(request.json(), indent=4)}') + if not is_mutation: + import inspect + caller = inspect.getouterframes(inspect.currentframe(), 2)[1][3] + if caller not in self.api_costs.keys(): + self.api_costs[caller] = 0 + self.api_costs[caller] += result['data']['rateLimit']['cost'] + + return result['data'] + else: + import json + resp = request.json() + if resp and len(resp) > 0 and resp[0] and 'type' in resp[0] and resp[0]['type'] == 'RATE_LIMITED': + print("API rate limit exceeded. Waiting for 1 second.") + time.sleep(1) + continue + + raise Exception(f'Query failed with code {request.status_code}:\n{json.dumps(resp, indent=4)}') From 18dcb193fd339ae44ac161d26e3522c1ab9988dc Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 16 Mar 2020 21:57:01 +0300 Subject: [PATCH 013/115] More verbose message about error in case of exception during recursive remove --- dbms/src/Storages/MergeTree/IMergeTreeDataPart.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 638fc5b0574..b5c8f16b7e5 100644 --- a/dbms/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/dbms/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -714,7 +714,7 @@ void IMergeTreeDataPart::remove() const } catch (...) { - LOG_ERROR(storage.log, "Cannot remove directory " << fullPath(disk, to_) << ". Check owner and access rights."); + LOG_ERROR(storage.log, "Cannot recursively remove directory " << fullPath(disk, to_) << ". Exception: " << getCurrentExceptionMessage(false)); throw; } } From f1f1c1c591b26497ad2639fd5d0a33be315f30d5 Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Mon, 16 Mar 2020 16:49:51 -0300 Subject: [PATCH 014/115] extended test for deduplicate_blocks_in_dependent_materialized_views --- .../test_force_deduplication/test.py | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/dbms/tests/integration/test_force_deduplication/test.py b/dbms/tests/integration/test_force_deduplication/test.py index b4e90c2a792..7af12a8c949 100644 --- a/dbms/tests/integration/test_force_deduplication/test.py +++ b/dbms/tests/integration/test_force_deduplication/test.py @@ -24,15 +24,26 @@ def test_basic(start_cluster): node.query( ''' CREATE TABLE test (A Int64) ENGINE = ReplicatedMergeTree ('/clickhouse/test/tables/test','1') ORDER BY tuple(); - CREATE MATERIALIZED VIEW test_mv Engine=ReplicatedMergeTree ('/clickhouse/test/tables/test_mv','1') partition by A order by tuple() AS SELECT A FROM test; + CREATE MATERIALIZED VIEW test_mv_a Engine=ReplicatedMergeTree ('/clickhouse/test/tables/test_mv_a','1') order by tuple() AS SELECT A FROM test; + CREATE MATERIALIZED VIEW test_mv_b Engine=ReplicatedMergeTree ('/clickhouse/test/tables/test_mv_b','1') partition by A order by tuple() AS SELECT A FROM test; + CREATE MATERIALIZED VIEW test_mv_c Engine=ReplicatedMergeTree ('/clickhouse/test/tables/test_mv_c','1') order by tuple() AS SELECT A FROM test; + INSERT INTO test values(999); + INSERT INTO test values(999); SET max_partitions_per_insert_block = 3; INSERT INTO test SELECT number FROM numbers(10); ''' ) + assert int(node.query("SELECT count() FROM test")) == 11 + assert int(node.query("SELECT count() FROM test_mv_a")) == 11 + assert int(node.query("SELECT count() FROM test_mv_b")) == 1 + assert int(node.query("SELECT count() FROM test_mv_c")) == 1 + node.query("INSERT INTO test SELECT number FROM numbers(10)") - assert int(node.query("SELECT count() FROM test")) == 10 - assert int(node.query("SELECT count() FROM test_mv")) == 0 + assert int(node.query("SELECT count() FROM test")) == 11 + assert int(node.query("SELECT count() FROM test_mv_a")) == 11 + assert int(node.query("SELECT count() FROM test_mv_b")) == 1 + assert int(node.query("SELECT count() FROM test_mv_c")) == 1 node.query( ''' @@ -40,5 +51,7 @@ def test_basic(start_cluster): INSERT INTO test SELECT number FROM numbers(10); ''' ) - assert int(node.query("SELECT count() FROM test")) == 10 - assert int(node.query("SELECT count() FROM test_mv")) == 10 + assert int(node.query("SELECT count() FROM test")) == 11 + assert int(node.query("SELECT count() FROM test_mv_a")) == 21 # first insert was succesfull with disabled dedup.. + assert int(node.query("SELECT count() FROM test_mv_b")) == 11 + assert int(node.query("SELECT count() FROM test_mv_c")) == 11 From d6e843d1f4b897d370cc9a76d9d28c2358b006a8 Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Mon, 16 Mar 2020 17:03:52 -0300 Subject: [PATCH 015/115] Update test.py --- .../test_force_deduplication/test.py | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/dbms/tests/integration/test_force_deduplication/test.py b/dbms/tests/integration/test_force_deduplication/test.py index 7af12a8c949..0969f538988 100644 --- a/dbms/tests/integration/test_force_deduplication/test.py +++ b/dbms/tests/integration/test_force_deduplication/test.py @@ -55,3 +55,24 @@ def test_basic(start_cluster): assert int(node.query("SELECT count() FROM test_mv_a")) == 21 # first insert was succesfull with disabled dedup.. assert int(node.query("SELECT count() FROM test_mv_b")) == 11 assert int(node.query("SELECT count() FROM test_mv_c")) == 11 + + with pytest.raises(QueryRuntimeException): + node.query( + ''' + SET max_partitions_per_insert_block = 3; + SET deduplicate_blocks_in_dependent_materialized_views = 1; + INSERT INTO test SELECT number FROM numbers(100,10); + ''' + ) + + node.query( + ''' + SET deduplicate_blocks_in_dependent_materialized_views = 1; + INSERT INTO test SELECT number FROM numbers(100,10); + ''' + ) + + assert int(node.query("SELECT count() FROM test")) == 21 + assert int(node.query("SELECT count() FROM test_mv_a")) == 31 + assert int(node.query("SELECT count() FROM test_mv_b")) == 21 + assert int(node.query("SELECT count() FROM test_mv_c")) == 21 From 7a863390fcf839acdb9009d72cdc22867db0d7c7 Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Mon, 16 Mar 2020 17:23:16 -0300 Subject: [PATCH 016/115] Update settings.md (#9688) insert_deduplicate / deduplicate_blocks_in_dependent_materialized_views en description --- docs/en/operations/settings/settings.md | 31 +++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index b4901458029..b10553cbe77 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -861,6 +861,37 @@ See also: - [insert_quorum](#settings-insert_quorum) - [insert_quorum_timeout](#settings-insert_quorum_timeout) +## insert_deduplicate {#settings-insert_deduplicate} + +Enables or disables block deduplication of `INSERT` (for Replicated* tables). + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: 1. + +By default, blocks inserted into replicated tables by the `INSERT` statement are deduplicated (see [Data Replication] (../ table_engines/replication.md). + +## deduplicate_blocks_in_dependent_materialized_views {#settings-deduplicate_blocks_in_dependent_materialized_views} + +Enables or disables the deduplication check for materialized views that receive data from Replicated* tables. + +Possible values: + + 0 — Disabled. + 1 — Enabled. + +Default value: 0. + +Usage + +By default, deduplication is not performed for materialized views, but is done upstream, in the source table. +If an INSERTed block is skipped due to deduplication in the source table, there will be no insertion into attached materialized views. This behavior exists to enable insertion of highly aggregated data into materialized views, for cases where inserted blocks are the same after materialized view aggregation but derived from different INSERTs into the source table. +At the same time, this behavior "breaks" `INSERT` idempotency. If an `INSERT` into the main table was successful and `INSERT` into a materialized view failed (e.g. because of communication failure with Zookeeper) a client will get an error and can retry the operation. However, the materialized view won't receive the second insert because it will be discarded by deduplication in the main (source) table. The setting `deduplicate_blocks_in_dependent_materialized_views` allows to change this behavior. On retry a materialized view will receive the repeat insert and will perform deduplication check by itself, +ignoring check result for the source table, and will insert rows lost because of first failure. + ## max_network_bytes {#settings-max_network_bytes} Limits the data volume (in bytes) that is received or transmitted over the network when executing a query. This setting applies to every individual query. From a3deb35329de31ae57109d4725c0bb045bc7b79c Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 16 Mar 2020 23:45:45 +0300 Subject: [PATCH 017/115] Update CODEOWNERS --- .github/CODEOWNERS | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 8e502c0b36f..be66f21b838 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,2 +1,3 @@ docs/* @ClickHouse/docs docs/zh/* @ClickHouse/docs-zh +website/* @ClickHouse/docs From 552ecd6b68240875bcb37ff3d9745f46b5ae0f42 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 17 Mar 2020 00:55:51 +0300 Subject: [PATCH 018/115] Less spam in build.py output (#9694) --- docs/tools/build.py | 3 ++- website/images/clickhouse-black.svg | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/tools/build.py b/docs/tools/build.py index c4a1ff37d32..64c1b0e99c9 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -181,7 +181,8 @@ def build_single_page_version(lang, args, cfg): single_page_pdf = single_page_index_html.replace('index.html', 'clickhouse_%s.pdf' % lang) create_pdf_command = ['wkhtmltopdf', '--print-media-type', single_page_index_html, single_page_pdf] logging.debug(' '.join(create_pdf_command)) - subprocess.check_call(' '.join(create_pdf_command), shell=True) + with open(os.devnull, 'w') as devnull: + subprocess.check_call(' '.join(create_pdf_command), shell=True, stderr=devnull) with util.temp_dir() as test_dir: cfg.load_dict({ diff --git a/website/images/clickhouse-black.svg b/website/images/clickhouse-black.svg index a0a607dc0b2..695d0175685 100644 --- a/website/images/clickhouse-black.svg +++ b/website/images/clickhouse-black.svg @@ -1 +1 @@ -ClickHouse \ No newline at end of file +ClickHouse From 41510275c5d3b0b230cf15862abc9f5e2454d50b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 17 Mar 2020 01:54:42 +0300 Subject: [PATCH 019/115] Small updates on roadmap --- docs/ru/extended_roadmap.md | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/docs/ru/extended_roadmap.md b/docs/ru/extended_roadmap.md index 5129b20b474..2dc1b36fd42 100644 --- a/docs/ru/extended_roadmap.md +++ b/docs/ru/extended_roadmap.md @@ -22,12 +22,14 @@ Upd. Доделывать будет другой человек. Приорит ### 1.2. Wait-free каталог баз данных. -Q1. Делает [Александр Токмаков](https://github.com/tavplubix), первый рабочий вариант в декабре 2019. Нужно для DataLens и Яндекс.Метрики. +Q2. Делает [Александр Токмаков](https://github.com/tavplubix), первый рабочий вариант в декабре 2019. Нужно для DataLens и Яндекс.Метрики. Манипуляции с каталогом баз данных: запросы CREATE TABLE, DROP TABLE, RENAME TABLE и DATABASE, требуют синхронизации с помощью блокировок. Эта синхронизация становится весьма сложной, так как на неё полагается много внутренних структур данных. Предлагается реализовать альтернативный подход, в котором таблицы и базы данных являются всего лишь ссылками на persistent объекты. Подробное описание задачи: [#6787](https://github.com/ClickHouse/ClickHouse/issues/6787) +Upd. Сделана крупная часть задачи, но ориентироваться стоит уже на Q2. + ### 1.3. Неблокирующие ALTER. Q1. И полностью immutable куски. Делает [Александр Сапин](https://github.com/alesapin). Готов приступить к задаче в конце ноября 2019. Нужно для Яндекс.Метрики. @@ -159,6 +161,8 @@ Upd. На данный момент исправляются проблемы с Upd. Включили по-умолчанию. Удаление старого кода не раньше, чем после первого релиза, в котором это включено по-умолчанию и всё ещё можно выключить обратно. +Upd. Уже есть первый релиз, в котором это включено по-умолчанию. + ### 2.2. Инфраструктура событий/метрик/ограничений/квот/трассировки. В очереди. https://gist.github.com/alexey-milovidov/d62d73222d83b9319dc519cbb13aeff6 @@ -185,6 +189,8 @@ Upd. Включили по-умолчанию. Удаление старого Александр Токмаков исправил множество проблем с использованием Context и сейчас переносит каталог БД наружу. +Upd. Каталог БД вынесен из Context. + ### 2.8. Декларативный парсер запросов. Средний приоритет. Нужно для YQL. @@ -379,12 +385,14 @@ Upd. Задача на финальной стадии разработки. ### 7.7. Доделать тесты под MSan. Уже есть ASan, TSan, UBSan. Не хватает тестов под MSan. Они уже добавлены в CI, но не проходят. -[Александр Кузьменков](https://github.com/akuzm). +[Александр Кузьменков](https://github.com/akuzm) и [Александр Токмаков](https://github.com/tavplubix). ### 7.8. Добавить clang-tidy. Уже есть PVS-Studio. Мы очень довольны, но этого недостаточно. +Upd. Алексей Миловидов. Добавлено некоторое множество проверок, но нужно рассмотреть все проверки подряд и добавить всё, что можно. + ### 7.9. Проверки на стиль имён с помощью clang-tidy. ### 7.10. Включение UBSan и MSan в интеграционных тестах. @@ -477,6 +485,8 @@ https://github.com/ClickHouse/ClickHouse/issues/8027#issuecomment-566670282 [Иван Лежанкин](https://github.com/abyss7). +Upd. В процессе реализации, есть pull request. + ### 7.21. Автосборка для Linux ppc64. [Иван Лежанкин](https://github.com/abyss7). @@ -554,6 +564,8 @@ Upd. Сергей Штыков сделал функцию `randomPrintableASCII UPD: Все патчи Максима отправлены в master. Задача взята в работу. +Upd: Задача в процессе реализации. Синхронизироваться будет master. Делает [Иван Лежанкин](https://github.com/abyss7) + ### 7.26. Побайтовая идентичность репозитория с Аркадией. Команда DevTools. Прогресс по задаче под вопросом. @@ -607,6 +619,9 @@ UPD: Все патчи Максима отправлены в master. Задач Есть жалобы на скорость загрузки и неудобство maintenance, operations, visibility. +Upd. Иван Блинков настроил CDN repo.clickhouse.tech, что решает проблему с доступностью зарубежом. +Вопрос с operations, visibility пока актуален. + ## 8. Интеграция с внешними системами. ### 8.1. Поддержка ALTER MODIFY SETTING для Kafka. @@ -687,6 +702,7 @@ Andrew Onyshchuk. Есть pull request. Q1. Сделано. ### 8.16.3. Поддержка формата MsgPack. Павел Круглов, ВШЭ и Яндекс. +Задача взята в работу. ### 8.16.4. Формат Regexp. @@ -815,6 +831,8 @@ Upd. Одну причину устранили, но ещё что-то неи ### 10.14. Поддержка всех типов в функции transform. +Задачу взяла Ольга Хвостикова. + ### 10.15. Использование словарей как специализированного layout для Join. ### 10.16. Словари на локальном SSD. @@ -840,9 +858,9 @@ Upd. Одну причину устранили, но ещё что-то неи Нужно разобраться, как упаковывать Java в статический бинарник, возможно AppImage. Или предоставить максимально простую инструкцию по установке jdbc-bridge. Может быть будет заинтересован Александр Крашенинников, Badoo, так как он разработал jdbc-bridge. -### 11.3. Интеграционные тесты ODBC драйвера путём подключения ClickHouse к самому себе через ODBC. +### 11.3. + Интеграционные тесты ODBC драйвера путём подключения ClickHouse к самому себе через ODBC. -Михаил Филимонов, Altinity. Есть почти готовый pull request. +Михаил Филимонов, Altinity. Готово. ### 11.4. Исправление упячек с типами Date и Decimal в clickhouse-cpp. @@ -877,6 +895,7 @@ zhang2014, есть pull request. [Виталий Баранов](https://github.com/vitlibar). Финальная стадия разработки, рабочая версия в начале февраля 2019. Q1. Сейчас сделаны все интерфейсы в коде и запросы, но не сделаны варианты хранения прав кроме прототипа. +Upd. Сделано хранение прав. До готового к использованию состояния осталось несколько доработок. ### 12.2. + Управление пользователями и правами доступа с помощью SQL запросов. @@ -927,7 +946,7 @@ Q1/Q2. ### 14.2. Поддержка WITH для подзапросов. -Павел Потёмкин, ВШЭ. +Михаил Коротов. ### 14.3. Поддержка подстановок для множеств в правой части IN. @@ -1058,6 +1077,7 @@ zhang2014 ### 16.3. Поддержка неконстантных аргументов с регулярными выражениями в функциях. Данила Кутенин, но только после секретного изменения в работе. +Upd. Секретного изменения в работе не будет, задачу будет делать другой человек. ### 16.4. Функция rowNumberForKey. From 04bed5f032847141f4805b385380baa266c27c1a Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 17 Mar 2020 05:15:05 +0300 Subject: [PATCH 020/115] Add high-precision timestamp to trace_log --- dbms/programs/server/clickhouse-server.cpp | 24 +++++++++++++++++++++- dbms/src/Common/Stopwatch.h | 15 ++++++-------- dbms/src/Common/TraceCollector.cpp | 2 +- dbms/src/Interpreters/TraceLog.cpp | 2 ++ dbms/src/Interpreters/TraceLog.h | 1 + 5 files changed, 33 insertions(+), 11 deletions(-) diff --git a/dbms/programs/server/clickhouse-server.cpp b/dbms/programs/server/clickhouse-server.cpp index 7e0e114f742..58fa37990de 100644 --- a/dbms/programs/server/clickhouse-server.cpp +++ b/dbms/programs/server/clickhouse-server.cpp @@ -1,2 +1,24 @@ +#include + +#include + + int mainEntryClickHouseServer(int argc, char ** argv); -int main(int argc_, char ** argv_) { return mainEntryClickHouseServer(argc_, argv_); } + +/** + * This is the entry-point for the split build server. The initialization + * is copied from single-binary entry point in main.cpp. + */ +int main(int argc_, char ** argv_) +{ + /// Reset new handler to default (that throws std::bad_alloc) + /// It is needed because LLVM library clobbers it. + std::set_new_handler(nullptr); + + /// PHDR cache is required for query profiler to work reliably + /// It also speed up exception handling, but exceptions from dynamically loaded libraries (dlopen) + /// will work only after additional call of this function. + updatePHDRCache(); + + return mainEntryClickHouseServer(argc_, argv_); +} diff --git a/dbms/src/Common/Stopwatch.h b/dbms/src/Common/Stopwatch.h index db337355e2d..502cd2e9010 100644 --- a/dbms/src/Common/Stopwatch.h +++ b/dbms/src/Common/Stopwatch.h @@ -6,14 +6,11 @@ #include -namespace StopWatchDetail +inline UInt64 clock_gettime_ns(clockid_t clock_type = CLOCK_MONOTONIC) { - inline UInt64 nanoseconds(clockid_t clock_type) - { - struct timespec ts; - clock_gettime(clock_type, &ts); - return UInt64(ts.tv_sec * 1000000000LL + ts.tv_nsec); - } + struct timespec ts; + clock_gettime(clock_type, &ts); + return UInt64(ts.tv_sec * 1000000000LL + ts.tv_nsec); } @@ -44,7 +41,7 @@ private: clockid_t clock_type; bool is_running = false; - UInt64 nanoseconds() const { return StopWatchDetail::nanoseconds(clock_type); } + UInt64 nanoseconds() const { return clock_gettime_ns(clock_type); } }; @@ -131,7 +128,7 @@ private: clockid_t clock_type; /// Most significant bit is a lock. When it is set, compareAndRestartDeferred method will return false. - UInt64 nanoseconds() const { return StopWatchDetail::nanoseconds(clock_type) & 0x7FFFFFFFFFFFFFFFULL; } + UInt64 nanoseconds() const { return clock_gettime_ns(clock_type) & 0x7FFFFFFFFFFFFFFFULL; } }; diff --git a/dbms/src/Common/TraceCollector.cpp b/dbms/src/Common/TraceCollector.cpp index 405cade2baf..399a2404b21 100644 --- a/dbms/src/Common/TraceCollector.cpp +++ b/dbms/src/Common/TraceCollector.cpp @@ -144,7 +144,7 @@ void TraceCollector::run() if (trace_log) { - TraceLogElement element{std::time(nullptr), trace_type, thread_id, query_id, trace, size}; + TraceLogElement element{std::time(nullptr), clock_gettime_ns(), trace_type, thread_id, query_id, trace, size}; trace_log->add(element); } } diff --git a/dbms/src/Interpreters/TraceLog.cpp b/dbms/src/Interpreters/TraceLog.cpp index 9bd3fdbbc53..724d9fed16c 100644 --- a/dbms/src/Interpreters/TraceLog.cpp +++ b/dbms/src/Interpreters/TraceLog.cpp @@ -24,6 +24,7 @@ Block TraceLogElement::createBlock() { {std::make_shared(), "event_date"}, {std::make_shared(), "event_time"}, + {std::make_shared(), "timestamp_ns"}, {std::make_shared(), "revision"}, {std::make_shared(trace_values), "trace_type"}, {std::make_shared(), "thread_id"}, @@ -41,6 +42,7 @@ void TraceLogElement::appendToBlock(Block & block) const columns[i++]->insert(DateLUT::instance().toDayNum(event_time)); columns[i++]->insert(event_time); + columns[i++]->insert(timestamp_ns); columns[i++]->insert(ClickHouseRevision::get()); columns[i++]->insert(static_cast(trace_type)); columns[i++]->insert(thread_id); diff --git a/dbms/src/Interpreters/TraceLog.h b/dbms/src/Interpreters/TraceLog.h index 3eb0b1829e3..ca47566ac61 100644 --- a/dbms/src/Interpreters/TraceLog.h +++ b/dbms/src/Interpreters/TraceLog.h @@ -15,6 +15,7 @@ struct TraceLogElement static const TraceDataType::Values trace_values; time_t event_time{}; + UInt64 timestamp_ns{}; TraceType trace_type{}; UInt64 thread_id{}; String query_id{}; From f6d745de7f09e5df8e95b40e2958884873ba7fb8 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 17 Mar 2020 05:41:47 +0300 Subject: [PATCH 021/115] performance comparison --- docker/test/performance-comparison/compare.sh | 7 +++++-- docker/test/performance-comparison/report.py | 8 ++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 67af2ab340d..6cf8acbebec 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -246,11 +246,13 @@ function get_profiles right/clickhouse client --port 9001 --query "set query_profiler_real_time_period_ns = 0" left/clickhouse client --port 9001 --query "select * from system.query_log where type = 2 format TSVWithNamesAndTypes" > left-query-log.tsv ||: & + left/clickhouse client --port 9001 --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > left-query-thread-log.tsv ||: & left/clickhouse client --port 9001 --query "select * from system.trace_log format TSVWithNamesAndTypes" > left-trace-log.tsv ||: & left/clickhouse client --port 9001 --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > left-addresses.tsv ||: & left/clickhouse client --port 9001 --query "select * from system.metric_log format TSVWithNamesAndTypes" > left-metric-log.tsv ||: & right/clickhouse client --port 9002 --query "select * from system.query_log where type = 2 format TSVWithNamesAndTypes" > right-query-log.tsv ||: & + right/clickhouse client --port 9002 --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > right-query-thread-log.tsv ||: & right/clickhouse client --port 9002 --query "select * from system.trace_log format TSVWithNamesAndTypes" > right-trace-log.tsv ||: & right/clickhouse client --port 9002 --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > right-addresses.tsv ||: & right/clickhouse client --port 9002 --query "select * from system.metric_log format TSVWithNamesAndTypes" > right-metric-log.tsv ||: & @@ -382,8 +384,8 @@ create table unstable_run_metrics_2 engine File(TSVWithNamesAndTypes, 'unstable- select v, n, query_id, query from (select - ['memory_usage', 'read_bytes', 'written_bytes'] n, - [memory_usage, read_bytes, written_bytes] v, + ['memory_usage', 'read_bytes', 'written_bytes', 'query_duration_ms'] n, + [memory_usage, read_bytes, written_bytes, query_duration_ms] v, query, query_id from right_query_log @@ -433,6 +435,7 @@ do query_file=$(echo "$query" | cut -c-120 | sed 's/[/]/_/g') grep -F "$query " stacks.rep \ | cut -d' ' -f 2- \ + | sed 's/\t/ /g' \ | tee "$query_file.stacks.rep" \ | ~/fg/flamegraph.pl > "$query_file.svg" & done diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py index 0491b61c530..fecf0804b6e 100755 --- a/docker/test/performance-comparison/report.py +++ b/docker/test/performance-comparison/report.py @@ -223,15 +223,15 @@ def print_test_times(): attrs = ['' for c in columns] for r in rows: - if float(r[6]) > 15: + if float(r[6]) > 22: + # FIXME should be 15s max -- investigate parallel_insert slow_average_tests += 1 attrs[6] = 'style="background: #ffb0a0"' else: attrs[6] = '' if float(r[5]) > 30: - # Just a hint for now. - # slow_average_tests += 1 + slow_average_tests += 1 attrs[5] = 'style="background: #ffb0a0"' else: attrs[5] = '' @@ -260,7 +260,7 @@ print(""" """) if slow_average_tests: - #status = 'failure' + status = 'failure' message_array.append(str(slow_average_tests) + ' too long') if faster_queries: From 1b7954cbaec24f26ab60f15481b2711d58c2d7d8 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 17 Mar 2020 05:56:44 +0300 Subject: [PATCH 022/115] Faster performance test --- .../synthetic_hardware_benchmark.xml | 52 +++++++++---------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/dbms/tests/performance/synthetic_hardware_benchmark.xml b/dbms/tests/performance/synthetic_hardware_benchmark.xml index 22cb79dcc2b..9306302552c 100644 --- a/dbms/tests/performance/synthetic_hardware_benchmark.xml +++ b/dbms/tests/performance/synthetic_hardware_benchmark.xml @@ -19,44 +19,44 @@ Мы запускаем этот запрос и наблюдаем, с какой скоростью он выполняется. Через несколько секунд, когда скорость стабилизируется, прерываем выполнение. В качестве скорости выполнения запроса указывается количество обработанных исходных (прочитанных из таблицы) данных в единицу времени. Например, в таблице numbers читаемые нами данные - это числа типа UInt64 (8 байт). Если мы обрабатываем миллиард таких чисел в секунду, то отобразится скорость - 8 GB/sec. --> -SELECT count() FROM zeros(100000000) WHERE NOT ignore(rand()) -SELECT count() FROM zeros_mt(1600000000) WHERE NOT ignore(rand()) +SELECT count() FROM zeros( 10000000) WHERE NOT ignore(rand()) +SELECT count() FROM zeros_mt(160000000) WHERE NOT ignore(rand()) -SELECT count() FROM numbers(100000000) WHERE NOT ignore(intHash64(number)) -SELECT count() FROM numbers_mt(1600000000) WHERE NOT ignore(intHash64(number)) +SELECT count() FROM numbers( 10000000) WHERE NOT ignore(intHash64(number)) +SELECT count() FROM numbers_mt(160000000) WHERE NOT ignore(intHash64(number)) -SELECT count() FROM numbers(100000000) WHERE NOT ignore(intHash32(number)) -SELECT count() FROM numbers_mt(1600000000) WHERE NOT ignore(intHash32(number)) +SELECT count() FROM numbers( 10000000) WHERE NOT ignore(intHash32(number)) +SELECT count() FROM numbers_mt(160000000) WHERE NOT ignore(intHash32(number)) -SELECT count() FROM numbers(100000000) WHERE NOT ignore(toString(number)) -SELECT count() FROM numbers_mt(1600000000) WHERE NOT ignore(toString(number)) +SELECT count() FROM numbers( 1000000) WHERE NOT ignore(toString(number)) +SELECT count() FROM numbers_mt(16000000) WHERE NOT ignore(toString(number)) -SELECT count() FROM numbers(100000000) WHERE NOT ignore(reinterpretAsString(number)) -SELECT count() FROM numbers_mt(1600000000) WHERE NOT ignore(reinterpretAsString(number)) +SELECT count() FROM numbers( 10000000) WHERE NOT ignore(reinterpretAsString(number)) +SELECT count() FROM numbers_mt(160000000) WHERE NOT ignore(reinterpretAsString(number)) -SELECT count() FROM numbers(100000000) WHERE NOT ignore(number / 7) -SELECT count() FROM numbers_mt(1600000000) WHERE NOT ignore(number / 7) +SELECT count() FROM numbers( 10000000) WHERE NOT ignore(number / 7) +SELECT count() FROM numbers_mt(160000000) WHERE NOT ignore(number / 7) -SELECT count() FROM numbers(100000000) WHERE NOT ignore(number % 7) -SELECT count() FROM numbers_mt(1600000000) WHERE NOT ignore(number % 7) +SELECT count() FROM numbers( 10000000) WHERE NOT ignore(number % 7) +SELECT count() FROM numbers_mt(160000000) WHERE NOT ignore(number % 7) -SELECT count() FROM numbers(100000000) WHERE NOT ignore(number % 34908756) -SELECT count() FROM numbers_mt(1600000000) WHERE NOT ignore(number % 34908756) +SELECT count() FROM numbers( 10000000) WHERE NOT ignore(number % 34908756) +SELECT count() FROM numbers_mt(160000000) WHERE NOT ignore(number % 34908756) -SELECT number % 1000 AS k, count() FROM numbers(100000000) GROUP BY k -SELECT number % 1000 AS k, count() FROM numbers_mt(1600000000) GROUP BY k +SELECT number % 1000 AS k, count() FROM numbers( 10000000) GROUP BY k FORMAT Null +SELECT number % 1000 AS k, count() FROM numbers_mt(160000000) GROUP BY k FORMAT Null -SELECT number % 100000 AS k, count() FROM numbers(100000000) GROUP BY k -SELECT number % 100000 AS k, count() FROM numbers_mt(1600000000) GROUP BY k +SELECT number % 100000 AS k, count() FROM numbers( 10000000) GROUP BY k FORMAT Null +SELECT number % 100000 AS k, count() FROM numbers_mt(160000000) GROUP BY k FORMAT Null -SELECT number % 1000000 AS k, count() FROM numbers(100000000) GROUP BY k -SELECT number % 1000000 AS k, count() FROM numbers_mt(1600000000) GROUP BY k +SELECT number % 1000000 AS k, count() FROM numbers( 10000000) GROUP BY k FORMAT Null +SELECT number % 1000000 AS k, count() FROM numbers_mt(160000000) GROUP BY k FORMAT Null -SELECT number % 10000000 AS k, count() FROM numbers(100000000) GROUP BY k -SELECT number % 10000000 AS k, count() FROM numbers_mt(1600000000) GROUP BY k +SELECT number % 10000000 AS k, count() FROM numbers( 100000000) GROUP BY k FORMAT Null +SELECT number % 10000000 AS k, count() FROM numbers_mt(1600000000) GROUP BY k FORMAT Null -SELECT number % 500000000 AS k, count() FROM numbers(100000000) GROUP BY k -SELECT number % 500000000 AS k, count() FROM numbers_mt(1600000000) GROUP BY k +SELECT number % 500000000 AS k, count() FROM numbers( 1000000000) GROUP BY k FORMAT Null +SELECT number % 500000000 AS k, count() FROM numbers_mt(16000000000) GROUP BY k FORMAT Null From 13be2de4a425433c661306a34d8be10a8bc5eeb6 Mon Sep 17 00:00:00 2001 From: "imgbot[bot]" <31301654+imgbot[bot]@users.noreply.github.com> Date: Tue, 17 Mar 2020 07:35:07 +0300 Subject: [PATCH 023/115] [ImgBot] Optimize images (#9695) /website/images/clickhouse-black.svg -- 4.33kb -> 4.33kb (0.02%) Signed-off-by: ImgBotApp Co-authored-by: ImgBotApp --- website/images/clickhouse-black.svg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/images/clickhouse-black.svg b/website/images/clickhouse-black.svg index 695d0175685..a0a607dc0b2 100644 --- a/website/images/clickhouse-black.svg +++ b/website/images/clickhouse-black.svg @@ -1 +1 @@ -ClickHouse +ClickHouse \ No newline at end of file From 1580ffb5072b8c2c11514e4a5a01c176e686eb0d Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 17 Mar 2020 12:22:30 +0300 Subject: [PATCH 024/115] Update success.html --- website/templates/index/success.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/templates/index/success.html b/website/templates/index/success.html index a2831a44d74..be14ab33e58 100644 --- a/website/templates/index/success.html +++ b/website/templates/index/success.html @@ -7,7 +7,7 @@

HTTP and DNS analytics

-

by CloudFlare

+

by Cloudflare

From 8e2e28c6517492d4d291c2a14d6c5f64b3ab3d07 Mon Sep 17 00:00:00 2001 From: Metikov Vadim Date: Tue, 17 Mar 2020 15:06:59 +0500 Subject: [PATCH 025/115] Update create.md Fixed one word --- docs/ru/query_language/create.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/query_language/create.md b/docs/ru/query_language/create.md index 74f1988aa8a..ec334533aef 100644 --- a/docs/ru/query_language/create.md +++ b/docs/ru/query_language/create.md @@ -164,7 +164,7 @@ ClickHouse поддерживает кодеки общего назначени - `Delta(delta_bytes)` — Метод, в котором исходные значения заменяются разностью двух соседних значений, за исключением первого значения, которое остаётся неизменным. Для хранения разниц используется до `delta_bytes`, т.е. `delta_bytes` — это максимальный размер исходных данных. Возможные значения `delta_bytes`: 1, 2, 4, 8. Значение по умолчанию для `delta_bytes` равно `sizeof(type)`, если результат 1, 2, 4, or 8. Во всех других случаях — 1. - `DoubleDelta` — Вычисляется разницу от разниц и сохраняет её в компакном бинарном виде. Оптимальная степень сжатия достигается для монотонных последовательностей с постоянным шагом, наподобие временных рядов. Можно использовать с любым типом данных фиксированного размера. Реализует алгоритм, используемый в TSDB Gorilla, поддерживает 64-битные типы данных. Использует 1 дополнительный бит для 32-байтовых значений: 5-битные префиксы вместо 4-битных префиксов. Подробнее читайте в разделе "Compressing Time Stamps" документа [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf). -- `Gorilla` — Вычисляет XOR между текущим и предыдущим значением и записывает результат в компактной бинарной форме. Еффективно сохраняет ряды медленно изменяющихся чисел с плавающей запятой, поскольку наилучший коэффициен сжатия достигается, если соседние значения одинаковые. Реализует алгоритм, используемый в TSDB Gorilla, адаптируя его для работы с 64-битными значениями. Подробнее читайте в разделе "Compressing Values" документа [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf). +- `Gorilla` — Вычисляет XOR между текущим и предыдущим значением и записывает результат в компактной бинарной форме. Еффективно сохраняет ряды медленно изменяющихся чисел с плавающей запятой, поскольку наилучший коэффициент сжатия достигается, если соседние значения одинаковые. Реализует алгоритм, используемый в TSDB Gorilla, адаптируя его для работы с 64-битными значениями. Подробнее читайте в разделе "Compressing Values" документа [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf). - `T64` — Метод сжатия который обрезает неиспользуемые старшие биты целочисленных значений (включая `Enum`, `Date` и `DateTime`). На каждом шаге алгоритма, кодек помещает блок из 64 значений в матрицу 64✕64, транспонирует её, обрезает неиспользуемые биты, а то, что осталось возвращает в виде последовательности. Неиспользуемые биты, это биты, которые не изменяются от минимального к максимальному на всём диапазоне значений куска данных. Кодеки `DoubleDelta` и `Gorilla` используются в TSDB Gorilla как компоненты алгоритма сжатия. Подход Gorilla эффективен в сценариях, когда данные представляют собой медленно изменяющиеся во времени величины. Метки времени эффективно сжимаются кодеком `DoubleDelta`, а значения кодеком `Gorilla`. Например, чтобы создать эффективно хранящуюся таблицу, используйте следующую конфигурацию: From 802b1e96c84f9732ce57a08182c9c2168f0de031 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 17 Mar 2020 14:31:01 +0300 Subject: [PATCH 026/115] Support vX.X-conflicts tag in backport script. --- utils/github/__main__.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/utils/github/__main__.py b/utils/github/__main__.py index 920f382d613..0c46ac9d28d 100644 --- a/utils/github/__main__.py +++ b/utils/github/__main__.py @@ -34,7 +34,8 @@ except ImportError: CHECK_MARK = colored('🗸', 'green') CROSS_MARK = colored('🗙', 'red') -LABEL_MARK = colored('🏷', 'yellow') +BACKPORT_LABEL_MARK = colored('🏷', 'yellow') +CONFLICT_LABEL_MARK = colored('☁', 'yellow') CLOCK_MARK = colored('↻', 'cyan') @@ -126,12 +127,14 @@ if bad_commits and not args.login: # TODO: check backports. if need_backporting: re_vlabel = re.compile(r'^v\d+\.\d+$') + re_vlabel_conflicts = re.compile(r'^v\d+\.\d+-conflicts$') print('\nPull-requests need to be backported:') for pull_request in reversed(sorted(need_backporting, key=lambda x: x['number'])): targets = [] # use common list for consistent order in output good = set() - labeled = set() + backport_labeled = set() + conflict_labeled = set() wait = set() for stable in stables: @@ -143,7 +146,10 @@ if need_backporting: for label in github.get_labels(pull_request): if re_vlabel.match(label['name']): if f'v{stable[0]}' == label['name']: - labeled.add(stable[0]) + backport_labeled.add(stable[0]) + if re_vlabel_conflicts.match(label['name']): + if f'v{stable[0]}-conflicts' == label['name']: + conflict_labeled.add(stable[0]) for event in github.get_timeline(pull_request): if(event['isCrossRepository'] or @@ -165,7 +171,7 @@ if need_backporting: wait.add(event['source']['baseRefName']) # print pull-request's status - if len(good) + len(labeled) == len(targets): + if len(good) + len(backport_labeled) + len(conflict_labeled) == len(targets): print(f'{CHECK_MARK}', end=' ') else: print(f'{CROSS_MARK}', end=' ') @@ -173,8 +179,10 @@ if need_backporting: for target in targets: if target in good: print(f'\t{CHECK_MARK} {target}', end='') - elif target in labeled: - print(f'\t{LABEL_MARK} {target}', end='') + elif target in backport_labeled: + print(f'\t{BACKPORT_LABEL_MARK} {target}', end='') + elif target in conflict_labeled: + print(f'\t{CONFLICT_LABEL_MARK} {target}', end='') elif target in wait: print(f'\t{CLOCK_MARK} {target}', end='') else: @@ -185,7 +193,8 @@ if need_backporting: print('\nLegend:') print(f'{CHECK_MARK} - good') print(f'{CROSS_MARK} - bad') -print(f'{LABEL_MARK} - backport is detected via label') +print(f'{BACKPORT_LABEL_MARK} - backport is detected via label') +print(f'{CONFLICT_LABEL_MARK} - backport conflict is detected via label') print(f'{CLOCK_MARK} - backport is waiting to merge') # print API costs From 7aca050684e7cedce0549bc6724fa41eb397806b Mon Sep 17 00:00:00 2001 From: Ivan <5627721+abyss7@users.noreply.github.com> Date: Tue, 17 Mar 2020 16:18:36 +0300 Subject: [PATCH 027/115] Add libcctz-dev package to Dockerfile --- docker/packager/deb/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index 6b299982d44..763fcd486b0 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -64,7 +64,8 @@ RUN apt-get --allow-unauthenticated update -y \ cmake \ gdb \ pigz \ - moreutils + moreutils \ + libcctz-dev # Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able # to compress files using pigz (https://zlib.net/pigz/) instead of gzip. From 81631e4e6a38b2e6185b888a37fde7a0ba5a95f8 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 17 Mar 2020 17:36:24 +0300 Subject: [PATCH 028/115] fixup --- .../synthetic_hardware_benchmark.xml | 46 +++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/dbms/tests/performance/synthetic_hardware_benchmark.xml b/dbms/tests/performance/synthetic_hardware_benchmark.xml index 9306302552c..41ac7ef1dae 100644 --- a/dbms/tests/performance/synthetic_hardware_benchmark.xml +++ b/dbms/tests/performance/synthetic_hardware_benchmark.xml @@ -19,32 +19,32 @@ Мы запускаем этот запрос и наблюдаем, с какой скоростью он выполняется. Через несколько секунд, когда скорость стабилизируется, прерываем выполнение. В качестве скорости выполнения запроса указывается количество обработанных исходных (прочитанных из таблицы) данных в единицу времени. Например, в таблице numbers читаемые нами данные - это числа типа UInt64 (8 байт). Если мы обрабатываем миллиард таких чисел в секунду, то отобразится скорость - 8 GB/sec. --> -SELECT count() FROM zeros( 10000000) WHERE NOT ignore(rand()) -SELECT count() FROM zeros_mt(160000000) WHERE NOT ignore(rand()) +SELECT count() FROM zeros( 100000000) WHERE NOT ignore(rand()) +SELECT count() FROM zeros_mt(1600000000) WHERE NOT ignore(rand()) -SELECT count() FROM numbers( 10000000) WHERE NOT ignore(intHash64(number)) -SELECT count() FROM numbers_mt(160000000) WHERE NOT ignore(intHash64(number)) +SELECT count() FROM numbers( 100000000) WHERE NOT ignore(intHash64(number)) +SELECT count() FROM numbers_mt(1600000000) WHERE NOT ignore(intHash64(number)) -SELECT count() FROM numbers( 10000000) WHERE NOT ignore(intHash32(number)) -SELECT count() FROM numbers_mt(160000000) WHERE NOT ignore(intHash32(number)) +SELECT count() FROM numbers( 100000000) WHERE NOT ignore(intHash32(number)) +SELECT count() FROM numbers_mt(1600000000) WHERE NOT ignore(intHash32(number)) -SELECT count() FROM numbers( 1000000) WHERE NOT ignore(toString(number)) -SELECT count() FROM numbers_mt(16000000) WHERE NOT ignore(toString(number)) +SELECT count() FROM numbers( 10000000) WHERE NOT ignore(toString(number)) +SELECT count() FROM numbers_mt(160000000) WHERE NOT ignore(toString(number)) -SELECT count() FROM numbers( 10000000) WHERE NOT ignore(reinterpretAsString(number)) -SELECT count() FROM numbers_mt(160000000) WHERE NOT ignore(reinterpretAsString(number)) +SELECT count() FROM numbers( 100000000) WHERE NOT ignore(reinterpretAsString(number)) +SELECT count() FROM numbers_mt(1600000000) WHERE NOT ignore(reinterpretAsString(number)) -SELECT count() FROM numbers( 10000000) WHERE NOT ignore(number / 7) -SELECT count() FROM numbers_mt(160000000) WHERE NOT ignore(number / 7) +SELECT count() FROM numbers( 100000000) WHERE NOT ignore(number / 7) +SELECT count() FROM numbers_mt(1600000000) WHERE NOT ignore(number / 7) -SELECT count() FROM numbers( 10000000) WHERE NOT ignore(number % 7) -SELECT count() FROM numbers_mt(160000000) WHERE NOT ignore(number % 7) +SELECT count() FROM numbers( 100000000) WHERE NOT ignore(number % 7) +SELECT count() FROM numbers_mt(1600000000) WHERE NOT ignore(number % 7) -SELECT count() FROM numbers( 10000000) WHERE NOT ignore(number % 34908756) -SELECT count() FROM numbers_mt(160000000) WHERE NOT ignore(number % 34908756) +SELECT count() FROM numbers( 100000000) WHERE NOT ignore(number % 34908756) +SELECT count() FROM numbers_mt(1600000000) WHERE NOT ignore(number % 34908756) -SELECT number % 1000 AS k, count() FROM numbers( 10000000) GROUP BY k FORMAT Null -SELECT number % 1000 AS k, count() FROM numbers_mt(160000000) GROUP BY k FORMAT Null +SELECT number % 1000 AS k, count() FROM numbers( 100000000) GROUP BY k FORMAT Null +SELECT number % 1000 AS k, count() FROM numbers_mt(1600000000) GROUP BY k FORMAT Null SELECT number % 100000 AS k, count() FROM numbers( 10000000) GROUP BY k FORMAT Null SELECT number % 100000 AS k, count() FROM numbers_mt(160000000) GROUP BY k FORMAT Null @@ -52,14 +52,14 @@ SELECT number % 1000000 AS k, count() FROM numbers( 10000000) GROUP BY k FORMAT Null SELECT number % 1000000 AS k, count() FROM numbers_mt(160000000) GROUP BY k FORMAT Null -SELECT number % 10000000 AS k, count() FROM numbers( 100000000) GROUP BY k FORMAT Null -SELECT number % 10000000 AS k, count() FROM numbers_mt(1600000000) GROUP BY k FORMAT Null +SELECT number % 10000000 AS k, count() FROM numbers( 10000000) GROUP BY k FORMAT Null +SELECT number % 10000000 AS k, count() FROM numbers_mt(80000000) GROUP BY k FORMAT Null -SELECT number % 500000000 AS k, count() FROM numbers( 1000000000) GROUP BY k FORMAT Null -SELECT number % 500000000 AS k, count() FROM numbers_mt(16000000000) GROUP BY k FORMAT Null +SELECT number % 500000000 AS k, count() FROM numbers( 100000000) GROUP BY k FORMAT Null +SELECT number % 500000000 AS k, count() FROM numbers_mt(800000000) GROUP BY k FORMAT Null -SELECT count() FROM zeros(10000000) WHERE NOT ignore(materialize('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx') AS s, concat(s,s,s,s,s,s,s,s,s,s) AS t, concat(t,t,t,t,t,t,t,t,t,t) AS u) SETTINGS max_block_size = 1000 +SELECT count() FROM zeros(1000000) WHERE NOT ignore(materialize('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx') AS s, concat(s,s,s,s,s,s,s,s,s,s) AS t, concat(t,t,t,t,t,t,t,t,t,t) AS u) SETTINGS max_block_size = 1000 From 339e8180513ad1a7bc8161eacbbee93657c1abc9 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 17 Mar 2020 17:38:11 +0300 Subject: [PATCH 029/115] Parce commit message from merge commit name and search for it instead of commit number. --- utils/make_changelog.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/utils/make_changelog.py b/utils/make_changelog.py index e87c125c1ac..825e8ecf080 100755 --- a/utils/make_changelog.py +++ b/utils/make_changelog.py @@ -131,6 +131,15 @@ def parse_original_commits_from_cherry_pick_message(commit_message): # Use GitHub search api to check if commit from any pull request. Update pull_requests info. def find_pull_request_for_commit(commit_info, pull_requests, token, max_retries, retry_timeout): commits = [commit_info['sha']] + parse_original_commits_from_cherry_pick_message(commit_info['commit']['message']) + + # Special case for cherry-picked merge commits without -x option. Parse pr number from commit message and search it. + if commit_info['commit']['message'].startswith('Merge pull request'): + tokens = commit_info['commit']['message'][len('Merge pull request'):].split() + if len(tokens) > 0 and tokens[0].startswith('#'): + pr_number = tokens[0][1:] + if len(pr_number) > 0 and pr_number.isdigit(): + commits = [pr_number] + query = 'search/issues?q={}+type:pr+repo:{}&sort=created&order=asc'.format(' '.join(commits), repo) resp = github_api_get_json(query, token, max_retries, retry_timeout) From c9840fa53248efee030435893651959f50f79f81 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 17 Mar 2020 17:56:47 +0300 Subject: [PATCH 030/115] Update CHANGELOG.md --- CHANGELOG.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 940eefdc3c6..c0667f6b8bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,16 @@ ## ClickHouse release v20.3 +### ClickHouse release v20.3.3.6, 2020-03-17 + +### Bug Fix +* Fixed incorrect internal function names for `sumKahan` and `sumWithOverflow`. I lead to exception while using this functions in remote queries. [#9636](https://github.com/ClickHouse/ClickHouse/pull/9636) ([Azat Khuzhin](https://github.com/azat)) +* Fixed the issue: timezone was not preserved if you write a simple arithmetic expression like `time + 1` (in contrast to an expression like `time + INTERVAL 1 SECOND`). This fixes [#5743](https://github.com/ClickHouse/ClickHouse/issues/5743). [#9323](https://github.com/ClickHouse/ClickHouse/pull/9323) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix possible exceptions `Size of filter doesn't match size of column` and `Invalid number of rows in Chunk` in `MergeTreeRangeReader`. They could appear while executing `PREWHERE` in some cases. Fixes [#9132](https://github.com/ClickHouse/ClickHouse/issues/9132). [#9612](https://github.com/ClickHouse/ClickHouse/pull/9612) ([Anton Popov](https://github.com/CurtizJ)) +* Allow `ALTER ON CLUSTER` of `Distributed` tables with internal replication. This fixes [#3268](https://github.com/ClickHouse/ClickHouse/issues/3268). [#9617](https://github.com/ClickHouse/ClickHouse/pull/9617) ([shinoi2](https://github.com/shinoi2)) +* Fix bug in a replication that doesn't allow replication to work if the user has executed mutations on the previous version. This fixes [#9645](https://github.com/ClickHouse/ClickHouse/issues/9645). [#9652](https://github.com/ClickHouse/ClickHouse/pull/9652) ([alesapin](https://github.com/alesapin)) +* Add setting `use_compact_format_in_distributed_parts_names` which allows to write files for `INSERT` queries into `Distributed` table with more compact format. This fixes [#9647](https://github.com/ClickHouse/ClickHouse/issues/9647). [#9653](https://github.com/ClickHouse/ClickHouse/pull/9653) ([alesapin](https://github.com/alesapin)) + + ### ClickHouse release v20.3.2.1, 2020-03-12 ### Backward Incompatible Change From fe60870ed5ffd065c58f69b4dc7622015b1cfd42 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 17 Mar 2020 18:09:23 +0300 Subject: [PATCH 031/115] Increase cache purge threshold --- docs/tools/release.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tools/release.sh b/docs/tools/release.sh index 649a5c7881b..37d39c01448 100755 --- a/docs/tools/release.sh +++ b/docs/tools/release.sh @@ -44,7 +44,7 @@ then if [[ ! -z "${CLOUDFLARE_TOKEN}" ]] then sleep 1m - git diff --stat="9999,9999" --diff-filter=M HEAD~1 | grep '|' | awk '$1 ~ /\.html$/ { if ($3>4) { url="https://'${BASE_DOMAIN}'/"$1; sub(/\/index.html/, "/", url); print "\""url"\""; }}' | split -l 25 /dev/stdin PURGE + git diff --stat="9999,9999" --diff-filter=M HEAD~1 | grep '|' | awk '$1 ~ /\.html$/ { if ($3>6) { url="https://'${BASE_DOMAIN}'/"$1; sub(/\/index.html/, "/", url); print "\""url"\""; }}' | split -l 25 /dev/stdin PURGE for FILENAME in $(ls PURGE*) do POST_DATA=$(cat "${FILENAME}" | sed -n -e 'H;${x;s/\n/,/g;s/^,//;p;}' | awk '{print "{\"files\":["$0"]}";}') From fd4a8bba99b359246838a04adf01e385bdc8f4f7 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Tue, 17 Mar 2020 18:24:11 +0300 Subject: [PATCH 032/115] DOCS-526: ifNotFinite docs. Ternary operator RU translation (#9650) - Documented ifNotFinite. - Translated ternary operator into Russian. --- .../functions/conditional_functions.md | 8 +++- .../functions/other_functions.md | 37 ++++++++++++++++++ .../functions/conditional_functions.md | 16 ++++++++ .../functions/other_functions.md | 38 +++++++++++++++++++ 4 files changed, 97 insertions(+), 2 deletions(-) diff --git a/docs/en/query_language/functions/conditional_functions.md b/docs/en/query_language/functions/conditional_functions.md index 31684701014..6822d40bb21 100644 --- a/docs/en/query_language/functions/conditional_functions.md +++ b/docs/en/query_language/functions/conditional_functions.md @@ -88,18 +88,22 @@ WHERE isNotNull(left) AND isNotNull(right) ``` Note: `NULL` values are not used in this example, check [NULL values in conditionals](#null-values-in-conditionals) section. -## Ternary operator +## Ternary Operator {#ternary-operator} It works same as `if` function. Syntax: `cond ? then : else` -Returns `then` if the `cond` is truthy(greater than zero), otherwise returns `else`. +Returns `then` if the `cond` evaluates to be true (greater than zero), otherwise returns `else`. * `cond` must be of type of `UInt8`, and `then` and `else` must have the lowest common type. * `then` and `else` can be `NULL` +**See also** + +- [ifNotFinite](other_functions.md#ifnotfinite). + ## multiIf Allows you to write the [CASE](../operators.md#operator_case) operator more compactly in the query. diff --git a/docs/en/query_language/functions/other_functions.md b/docs/en/query_language/functions/other_functions.md index e851cf804d8..24b6906b57e 100644 --- a/docs/en/query_language/functions/other_functions.md +++ b/docs/en/query_language/functions/other_functions.md @@ -178,6 +178,43 @@ Accepts Float32 and Float64 and returns UInt8 equal to 1 if the argument is not Accepts Float32 and Float64 and returns UInt8 equal to 1 if the argument is infinite, otherwise 0. Note that 0 is returned for a NaN. +## ifNotFinite {#ifnotfinite} + +Checks whether floating point value is finite. + +**Syntax** + +``` +ifNotFinite(x,y) +``` +**Parameters** + +- `x` — Value to be checked for infinity. Type: [Float*](../../data_types/float.md). +- `y` — Fallback value. Type: [Float*](../../data_types/float.md). + +**Returned value** + +- `x` if `x` is finite. +- `y` if `x` is not finite. + +**Example** + +Query: + +``` +SELECT 1/0 as infimum, ifNotFinite(infimum,42) +``` + +Result: + +``` +┌─infimum─┬─ifNotFinite(divide(1, 0), 42)─┐ +│ inf │ 42 │ +└─────────┴───────────────────────────────┘ +``` + +You can get similar result by using [ternary operator](conditional_functions.md#ternary-operator): `isFinite(x) ? x : y`. + ## isNaN(x) Accepts Float32 and Float64 and returns UInt8 equal to 1 if the argument is a NaN, otherwise 0. diff --git a/docs/ru/query_language/functions/conditional_functions.md b/docs/ru/query_language/functions/conditional_functions.md index ac7549e0ac8..2e0593c765b 100644 --- a/docs/ru/query_language/functions/conditional_functions.md +++ b/docs/ru/query_language/functions/conditional_functions.md @@ -52,6 +52,22 @@ SELECT if(0, plus(2, 2), plus(2, 6)) └────────────┘ ``` +## Тернарный оператор {#ternary-operator} + +Работает так же, как функция `if`. + +Синтаксис: `cond ? then : else` + +Возвращает `then`, если `cond` верно (больше нуля), в остальных случаях возвращает `else`. + +* `cond` должно быть типа `UInt8`, `then` и `else` должны относиться к наименьшему общему типу. + +* `then` и `else` могут быть `NULL`. + +**Смотрите также** + +- [ifNotFinite](other_functions.md#ifnotfinite). + ## multiIf Позволяет более компактно записать оператор [CASE](../operators.md#operator_case) в запросе. diff --git a/docs/ru/query_language/functions/other_functions.md b/docs/ru/query_language/functions/other_functions.md index e85eaac6f99..1b5bdafcc74 100644 --- a/docs/ru/query_language/functions/other_functions.md +++ b/docs/ru/query_language/functions/other_functions.md @@ -169,6 +169,44 @@ SELECT currentUser(); ## isFinite(x) Принимает Float32 или Float64 и возвращает UInt8, равный 1, если аргумент не бесконечный и не NaN, иначе 0. +## ifNotFinite {#ifnotfinite} + +Проверяет, является ли значение дробного числа с плавающей точкой конечным. + +**Синтаксис** + +``` +ifNotFinite(x,y) +``` + +**Параметры** + +- `x` — Значение, которое нужно проверить на бесконечность. Тип: [Float*](../../data_types/float.md). +- `y` — Запасное значение. Тип: [Float*](../../data_types/float.md). + +**Возвращаемые значения** + +- `x`, если `x` принимает конечное значение. +- `y`, если`x` принимает не конечное значение. + +**Пример** + +Запрос: + +``` +SELECT 1/0 as infimum, ifNotFinite(infimum,42) +``` + +Результат: + +``` +┌─infimum─┬─ifNotFinite(divide(1, 0), 42)─┐ +│ inf │ 42 │ +└─────────┴───────────────────────────────┘ +``` + +Аналогичный результат можно получить с помощью [тернарного оператора](conditional_functions.md#ternary-operator) `isFinite(x) ? x : y`. + ## isInfinite(x) Принимает Float32 или Float64 и возвращает UInt8, равный 1, если аргумент бесконечный, иначе 0. Отметим, что в случае NaN возвращается 0. From f4ea37fa3b877b1fbc6dc4e55c4cb185d9d17317 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 17 Mar 2020 19:37:09 +0300 Subject: [PATCH 033/115] peformance comparison --- docker/test/performance-comparison/compare.sh | 19 ++++++++++--------- docker/test/performance-comparison/report.py | 2 +- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 6cf8acbebec..177ce3b9e2f 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -263,7 +263,8 @@ function get_profiles # Build and analyze randomization distribution for all queries. function analyze_queries { - ls ./*-queries.tsv | xargs -n1 -I% basename % -queries.tsv | \ + find . -maxdepth 1 -name "*-queries.tsv" -print | \ + xargs -n1 -I% basename % -queries.tsv | \ parallel --verbose right/clickhouse local --file "{}-queries.tsv" \ --structure "\"query text, run int, version UInt32, time float\"" \ --query "\"$(cat "$script_dir/eqmed.sql")\"" \ @@ -274,7 +275,7 @@ function analyze_queries function report { -for x in {right,left}-{addresses,{query,trace,metric}-log}.tsv +for x in {right,left}-{addresses,{query,query-thread,trace,metric}-log}.tsv do # FIXME This loop builds column definitons from TSVWithNamesAndTypes in an # absolutely atrocious way. This should be done by the file() function itself. @@ -427,7 +428,7 @@ create table stacks engine File(TSV, 'stacks.rep') as join unstable_query_runs using query_id group by query, trace ; -" +" ||: IFS=$'\n' for query in $(cut -d' ' -f1 stacks.rep | sort | uniq) @@ -445,8 +446,6 @@ unset IFS # Remember that grep sets error code when nothing is found, hence the bayan # operator. grep -H -m2 'Exception:[^:]' ./*-err.log | sed 's/:/\t/' > run-errors.tsv ||: - -"$script_dir/report.py" > report.html } case "$stage" in @@ -462,23 +461,25 @@ case "$stage" in time restart ;& "run_tests") - # Ignore the errors to collect the log anyway + # Ignore the errors to collect the log and build at least some report, anyway time run_tests ||: ;& "get_profiles") # If the tests fail with OOM or something, still try to restart the servers # to collect the logs. Prefer not to restart, because addresses might change # and we won't be able to process trace_log data. - time get_profiles || restart || get_profiles + time get_profiles || restart || get_profiles ||: # Stop the servers to free memory for the subsequent query analysis. while killall clickhouse; do echo . ; sleep 1 ; done echo Servers stopped. ;& "analyze_queries") - time analyze_queries + time analyze_queries ||: ;& "report") - time report + time report ||: + + time "$script_dir/report.py" > report.html ;& esac diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py index fecf0804b6e..df28251f015 100755 --- a/docker/test/performance-comparison/report.py +++ b/docker/test/performance-comparison/report.py @@ -152,7 +152,7 @@ def print_changes(): 'New, s', # 1 'Relative difference (new - old)/old', # 2 'Randomization distribution quantiles \ - [5%, 50%, 95%, 99%]', # 3 + [5%, 50%, 95%, 99%]', # 3 'Test', # 4 'Query', # 5 ] From 04241b9672341690de7b1435fc51643cfbcaf591 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 17 Mar 2020 20:19:48 +0300 Subject: [PATCH 034/115] Create CHANGELOG_2019.md Move changelog for 2019 into a separate file. --- CHANGELOG_2019.md | 1945 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1945 insertions(+) create mode 100644 CHANGELOG_2019.md diff --git a/CHANGELOG_2019.md b/CHANGELOG_2019.md new file mode 100644 index 00000000000..def3e7baad0 --- /dev/null +++ b/CHANGELOG_2019.md @@ -0,0 +1,1945 @@ +## ClickHouse release v19.17 + +### ClickHouse release v19.17.6.36, 2019-12-27 + +#### Bug Fix +* Fixed potential buffer overflow in decompress. Malicious user can pass fabricated compressed data that could cause read after buffer. This issue was found by Eldar Zaitov from Yandex information security team. [#8404](https://github.com/ClickHouse/ClickHouse/pull/8404) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed possible server crash (`std::terminate`) when the server cannot send or write data in JSON or XML format with values of String data type (that require UTF-8 validation) or when compressing result data with Brotli algorithm or in some other rare cases. [#8384](https://github.com/ClickHouse/ClickHouse/pull/8384) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed dictionaries with source from a clickhouse `VIEW`, now reading such dictionaries doesn't cause the error `There is no query`. [#8351](https://github.com/ClickHouse/ClickHouse/pull/8351) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Fixed checking if a client host is allowed by host_regexp specified in users.xml. [#8241](https://github.com/ClickHouse/ClickHouse/pull/8241), [#8342](https://github.com/ClickHouse/ClickHouse/pull/8342) ([Vitaly Baranov](https://github.com/vitlibar)) +* `RENAME TABLE` for a distributed table now renames the folder containing inserted data before sending to shards. This fixes an issue with successive renames `tableA->tableB`, `tableC->tableA`. [#8306](https://github.com/ClickHouse/ClickHouse/pull/8306) ([tavplubix](https://github.com/tavplubix)) +* `range_hashed` external dictionaries created by DDL queries now allow ranges of arbitrary numeric types. [#8275](https://github.com/ClickHouse/ClickHouse/pull/8275) ([alesapin](https://github.com/alesapin)) +* Fixed `INSERT INTO table SELECT ... FROM mysql(...)` table function. [#8234](https://github.com/ClickHouse/ClickHouse/pull/8234) ([tavplubix](https://github.com/tavplubix)) +* Fixed segfault in `INSERT INTO TABLE FUNCTION file()` while inserting into a file which doesn't exist. Now in this case file would be created and then insert would be processed. [#8177](https://github.com/ClickHouse/ClickHouse/pull/8177) ([Olga Khvostikova](https://github.com/stavrolia)) +* Fixed bitmapAnd error when intersecting an aggregated bitmap and a scalar bitmap. [#8082](https://github.com/ClickHouse/ClickHouse/pull/8082) ([Yue Huang](https://github.com/moon03432)) +* Fixed segfault when `EXISTS` query was used without `TABLE` or `DICTIONARY` qualifier, just like `EXISTS t`. [#8213](https://github.com/ClickHouse/ClickHouse/pull/8213) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed return type for functions `rand` and `randConstant` in case of nullable argument. Now functions always return `UInt32` and never `Nullable(UInt32)`. [#8204](https://github.com/ClickHouse/ClickHouse/pull/8204) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Fixed `DROP DICTIONARY IF EXISTS db.dict`, now it doesn't throw exception if `db` doesn't exist. [#8185](https://github.com/ClickHouse/ClickHouse/pull/8185) ([Vitaly Baranov](https://github.com/vitlibar)) +* If a table wasn't completely dropped because of server crash, the server will try to restore and load it [#8176](https://github.com/ClickHouse/ClickHouse/pull/8176) ([tavplubix](https://github.com/tavplubix)) +* Fixed a trivial count query for a distributed table if there are more than two shard local table. [#8164](https://github.com/ClickHouse/ClickHouse/pull/8164) ([小路](https://github.com/nicelulu)) +* Fixed bug that lead to a data race in DB::BlockStreamProfileInfo::calculateRowsBeforeLimit() [#8143](https://github.com/ClickHouse/ClickHouse/pull/8143) ([Alexander Kazakov](https://github.com/Akazz)) +* Fixed `ALTER table MOVE part` executed immediately after merging the specified part, which could cause moving a part which the specified part merged into. Now it correctly moves the specified part. [#8104](https://github.com/ClickHouse/ClickHouse/pull/8104) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Expressions for dictionaries can be specified as strings now. This is useful for calculation of attributes while extracting data from non-ClickHouse sources because it allows to use non-ClickHouse syntax for those expressions. [#8098](https://github.com/ClickHouse/ClickHouse/pull/8098) ([alesapin](https://github.com/alesapin)) +* Fixed a very rare race in `clickhouse-copier` because of an overflow in ZXid. [#8088](https://github.com/ClickHouse/ClickHouse/pull/8088) ([Ding Xiang Fei](https://github.com/dingxiangfei2009)) +* Fixed the bug when after the query failed (due to "Too many simultaneous queries" for example) it would not read external tables info, and the +next request would interpret this info as the beginning of the next query causing an error like `Unknown packet from client`. [#8084](https://github.com/ClickHouse/ClickHouse/pull/8084) ([Azat Khuzhin](https://github.com/azat)) +* Avoid null dereference after "Unknown packet X from server" [#8071](https://github.com/ClickHouse/ClickHouse/pull/8071) ([Azat Khuzhin](https://github.com/azat)) +* Restore support of all ICU locales, add the ability to apply collations for constant expressions and add language name to system.collations table. [#8051](https://github.com/ClickHouse/ClickHouse/pull/8051) ([alesapin](https://github.com/alesapin)) +* Number of streams for read from `StorageFile` and `StorageHDFS` is now limited, to avoid exceeding the memory limit. [#7981](https://github.com/ClickHouse/ClickHouse/pull/7981) ([alesapin](https://github.com/alesapin)) +* Fixed `CHECK TABLE` query for `*MergeTree` tables without key. [#7979](https://github.com/ClickHouse/ClickHouse/pull/7979) ([alesapin](https://github.com/alesapin)) +* Removed the mutation number from a part name in case there were no mutations. This removing improved the compatibility with older versions. [#8250](https://github.com/ClickHouse/ClickHouse/pull/8250) ([alesapin](https://github.com/alesapin)) +* Fixed the bug that mutations are skipped for some attached parts due to their data_version are larger than the table mutation version. [#7812](https://github.com/ClickHouse/ClickHouse/pull/7812) ([Zhichang Yu](https://github.com/yuzhichang)) +* Allow starting the server with redundant copies of parts after moving them to another device. [#7810](https://github.com/ClickHouse/ClickHouse/pull/7810) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Fixed the error "Sizes of columns doesn't match" that might appear when using aggregate function columns. [#7790](https://github.com/ClickHouse/ClickHouse/pull/7790) ([Boris Granveaud](https://github.com/bgranvea)) +* Now an exception will be thrown in case of using WITH TIES alongside LIMIT BY. And now it's possible to use TOP with LIMIT BY. [#7637](https://github.com/ClickHouse/ClickHouse/pull/7637) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) +* Fix dictionary reload if it has `invalidate_query`, which stopped updates and some exception on previous update tries. [#8029](https://github.com/ClickHouse/ClickHouse/pull/8029) ([alesapin](https://github.com/alesapin)) + +### ClickHouse release v19.17.4.11, 2019-11-22 + +#### Backward Incompatible Change +* Using column instead of AST to store scalar subquery results for better performance. Setting `enable_scalar_subquery_optimization` was added in 19.17 and it was enabled by default. It leads to errors like [this](https://github.com/ClickHouse/ClickHouse/issues/7851) during upgrade to 19.17.2 or 19.17.3 from previous versions. This setting was disabled by default in 19.17.4, to make possible upgrading from 19.16 and older versions without errors. [#7392](https://github.com/ClickHouse/ClickHouse/pull/7392) ([Amos Bird](https://github.com/amosbird)) + +#### New Feature +* Add the ability to create dictionaries with DDL queries. [#7360](https://github.com/ClickHouse/ClickHouse/pull/7360) ([alesapin](https://github.com/alesapin)) +* Make `bloom_filter` type of index supporting `LowCardinality` and `Nullable` [#7363](https://github.com/ClickHouse/ClickHouse/issues/7363) [#7561](https://github.com/ClickHouse/ClickHouse/pull/7561) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Add function `isValidJSON` to check that passed string is a valid json. [#5910](https://github.com/ClickHouse/ClickHouse/issues/5910) [#7293](https://github.com/ClickHouse/ClickHouse/pull/7293) ([Vdimir](https://github.com/Vdimir)) +* Implement `arrayCompact` function [#7328](https://github.com/ClickHouse/ClickHouse/pull/7328) ([Memo](https://github.com/Joeywzr)) +* Created function `hex` for Decimal numbers. It works like `hex(reinterpretAsString())`, but doesn't delete last zero bytes. [#7355](https://github.com/ClickHouse/ClickHouse/pull/7355) ([Mikhail Korotov](https://github.com/millb)) +* Add `arrayFill` and `arrayReverseFill` functions, which replace elements by other elements in front/back of them in the array. [#7380](https://github.com/ClickHouse/ClickHouse/pull/7380) ([hcz](https://github.com/hczhcz)) +* Add `CRC32IEEE()`/`CRC64()` support [#7480](https://github.com/ClickHouse/ClickHouse/pull/7480) ([Azat Khuzhin](https://github.com/azat)) +* Implement `char` function similar to one in [mysql](https://dev.mysql.com/doc/refman/8.0/en/string-functions.html#function_char) [#7486](https://github.com/ClickHouse/ClickHouse/pull/7486) ([sundyli](https://github.com/sundy-li)) +* Add `bitmapTransform` function. It transforms an array of values in a bitmap to another array of values, the result is a new bitmap [#7598](https://github.com/ClickHouse/ClickHouse/pull/7598) ([Zhichang Yu](https://github.com/yuzhichang)) +* Implemented `javaHashUTF16LE()` function [#7651](https://github.com/ClickHouse/ClickHouse/pull/7651) ([achimbab](https://github.com/achimbab)) +* Add `_shard_num` virtual column for the Distributed engine [#7624](https://github.com/ClickHouse/ClickHouse/pull/7624) ([Azat Khuzhin](https://github.com/azat)) + +#### Experimental Feature +* Support for processors (new query execution pipeline) in `MergeTree`. [#7181](https://github.com/ClickHouse/ClickHouse/pull/7181) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) + +#### Bug Fix +* Fix incorrect float parsing in `Values` [#7817](https://github.com/ClickHouse/ClickHouse/issues/7817) [#7870](https://github.com/ClickHouse/ClickHouse/pull/7870) ([tavplubix](https://github.com/tavplubix)) +* Fix rare deadlock which can happen when trace_log is enabled. [#7838](https://github.com/ClickHouse/ClickHouse/pull/7838) ([filimonov](https://github.com/filimonov)) +* Prevent message duplication when producing Kafka table has any MVs selecting from it [#7265](https://github.com/ClickHouse/ClickHouse/pull/7265) ([Ivan](https://github.com/abyss7)) +* Support for `Array(LowCardinality(Nullable(String)))` in `IN`. Resolves [#7364](https://github.com/ClickHouse/ClickHouse/issues/7364) [#7366](https://github.com/ClickHouse/ClickHouse/pull/7366) ([achimbab](https://github.com/achimbab)) +* Add handling of `SQL_TINYINT` and `SQL_BIGINT`, and fix handling of `SQL_FLOAT` data source types in ODBC Bridge. [#7491](https://github.com/ClickHouse/ClickHouse/pull/7491) ([Denis Glazachev](https://github.com/traceon)) +* Fix aggregation (`avg` and quantiles) over empty decimal columns [#7431](https://github.com/ClickHouse/ClickHouse/pull/7431) ([Andrey Konyaev](https://github.com/akonyaev90)) +* Fix `INSERT` into Distributed with `MATERIALIZED` columns [#7377](https://github.com/ClickHouse/ClickHouse/pull/7377) ([Azat Khuzhin](https://github.com/azat)) +* Make `MOVE PARTITION` work if some parts of partition are already on destination disk or volume [#7434](https://github.com/ClickHouse/ClickHouse/pull/7434) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Fixed bug with hardlinks failing to be created during mutations in `ReplicatedMergeTree` in multi-disk configurations. [#7558](https://github.com/ClickHouse/ClickHouse/pull/7558) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Fixed a bug with a mutation on a MergeTree when whole part remains unchanged and best space is being found on another disk [#7602](https://github.com/ClickHouse/ClickHouse/pull/7602) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Fixed bug with `keep_free_space_ratio` not being read from disks configuration [#7645](https://github.com/ClickHouse/ClickHouse/pull/7645) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Fix bug with table contains only `Tuple` columns or columns with complex paths. Fixes [7541](https://github.com/ClickHouse/ClickHouse/issues/7541). [#7545](https://github.com/ClickHouse/ClickHouse/pull/7545) ([alesapin](https://github.com/alesapin)) +* Do not account memory for Buffer engine in max_memory_usage limit [#7552](https://github.com/ClickHouse/ClickHouse/pull/7552) ([Azat Khuzhin](https://github.com/azat)) +* Fix final mark usage in `MergeTree` tables ordered by `tuple()`. In rare cases it could lead to `Can't adjust last granule` error while select. [#7639](https://github.com/ClickHouse/ClickHouse/pull/7639) ([Anton Popov](https://github.com/CurtizJ)) +* Fix bug in mutations that have predicate with actions that require context (for example functions for json), which may lead to crashes or strange exceptions. [#7664](https://github.com/ClickHouse/ClickHouse/pull/7664) ([alesapin](https://github.com/alesapin)) +* Fix mismatch of database and table names escaping in `data/` and `shadow/` directories [#7575](https://github.com/ClickHouse/ClickHouse/pull/7575) ([Alexander Burmak](https://github.com/Alex-Burmak)) +* Support duplicated keys in RIGHT|FULL JOINs, e.g. ```ON t.x = u.x AND t.x = u.y```. Fix crash in this case. [#7586](https://github.com/ClickHouse/ClickHouse/pull/7586) ([Artem Zuikov](https://github.com/4ertus2)) +* Fix `Not found column in block` when joining on expression with RIGHT or FULL JOIN. [#7641](https://github.com/ClickHouse/ClickHouse/pull/7641) ([Artem Zuikov](https://github.com/4ertus2)) +* One more attempt to fix infinite loop in `PrettySpace` format [#7591](https://github.com/ClickHouse/ClickHouse/pull/7591) ([Olga Khvostikova](https://github.com/stavrolia)) +* Fix bug in `concat` function when all arguments were `FixedString` of the same size. [#7635](https://github.com/ClickHouse/ClickHouse/pull/7635) ([alesapin](https://github.com/alesapin)) +* Fixed exception in case of using 1 argument while defining S3, URL and HDFS storages. [#7618](https://github.com/ClickHouse/ClickHouse/pull/7618) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Fix scope of the InterpreterSelectQuery for views with query [#7601](https://github.com/ClickHouse/ClickHouse/pull/7601) ([Azat Khuzhin](https://github.com/azat)) + +#### Improvement +* `Nullable` columns recognized and NULL-values handled correctly by ODBC-bridge [#7402](https://github.com/ClickHouse/ClickHouse/pull/7402) ([Vasily Nemkov](https://github.com/Enmk)) +* Write current batch for distributed send atomically [#7600](https://github.com/ClickHouse/ClickHouse/pull/7600) ([Azat Khuzhin](https://github.com/azat)) +* Throw an exception if we cannot detect table for column name in query. [#7358](https://github.com/ClickHouse/ClickHouse/pull/7358) ([Artem Zuikov](https://github.com/4ertus2)) +* Add `merge_max_block_size` setting to `MergeTreeSettings` [#7412](https://github.com/ClickHouse/ClickHouse/pull/7412) ([Artem Zuikov](https://github.com/4ertus2)) +* Queries with `HAVING` and without `GROUP BY` assume group by constant. So, `SELECT 1 HAVING 1` now returns a result. [#7496](https://github.com/ClickHouse/ClickHouse/pull/7496) ([Amos Bird](https://github.com/amosbird)) +* Support parsing `(X,)` as tuple similar to python. [#7501](https://github.com/ClickHouse/ClickHouse/pull/7501), [#7562](https://github.com/ClickHouse/ClickHouse/pull/7562) ([Amos Bird](https://github.com/amosbird)) +* Make `range` function behaviors almost like pythonic one. [#7518](https://github.com/ClickHouse/ClickHouse/pull/7518) ([sundyli](https://github.com/sundy-li)) +* Add `constraints` columns to table `system.settings` [#7553](https://github.com/ClickHouse/ClickHouse/pull/7553) ([Vitaly Baranov](https://github.com/vitlibar)) +* Better Null format for tcp handler, so that it's possible to use `select ignore() from table format Null` for perf measure via clickhouse-client [#7606](https://github.com/ClickHouse/ClickHouse/pull/7606) ([Amos Bird](https://github.com/amosbird)) +* Queries like `CREATE TABLE ... AS (SELECT (1, 2))` are parsed correctly [#7542](https://github.com/ClickHouse/ClickHouse/pull/7542) ([hcz](https://github.com/hczhcz)) + +#### Performance Improvement +* The performance of aggregation over short string keys is improved. [#6243](https://github.com/ClickHouse/ClickHouse/pull/6243) ([Alexander Kuzmenkov](https://github.com/akuzm), [Amos Bird](https://github.com/amosbird)) +* Run another pass of syntax/expression analysis to get potential optimizations after constant predicates are folded. [#7497](https://github.com/ClickHouse/ClickHouse/pull/7497) ([Amos Bird](https://github.com/amosbird)) +* Use storage meta info to evaluate trivial `SELECT count() FROM table;` [#7510](https://github.com/ClickHouse/ClickHouse/pull/7510) ([Amos Bird](https://github.com/amosbird), [alexey-milovidov](https://github.com/alexey-milovidov)) +* Vectorize processing `arrayReduce` similar to Aggregator `addBatch`. [#7608](https://github.com/ClickHouse/ClickHouse/pull/7608) ([Amos Bird](https://github.com/amosbird)) +* Minor improvements in performance of `Kafka` consumption [#7475](https://github.com/ClickHouse/ClickHouse/pull/7475) ([Ivan](https://github.com/abyss7)) + +#### Build/Testing/Packaging Improvement +* Add support for cross-compiling to the CPU architecture AARCH64. Refactor packager script. [#7370](https://github.com/ClickHouse/ClickHouse/pull/7370) [#7539](https://github.com/ClickHouse/ClickHouse/pull/7539) ([Ivan](https://github.com/abyss7)) +* Unpack darwin-x86_64 and linux-aarch64 toolchains into mounted Docker volume when building packages [#7534](https://github.com/ClickHouse/ClickHouse/pull/7534) ([Ivan](https://github.com/abyss7)) +* Update Docker Image for Binary Packager [#7474](https://github.com/ClickHouse/ClickHouse/pull/7474) ([Ivan](https://github.com/abyss7)) +* Fixed compile errors on MacOS Catalina [#7585](https://github.com/ClickHouse/ClickHouse/pull/7585) ([Ernest Poletaev](https://github.com/ernestp)) +* Some refactoring in query analysis logic: split complex class into several simple ones. [#7454](https://github.com/ClickHouse/ClickHouse/pull/7454) ([Artem Zuikov](https://github.com/4ertus2)) +* Fix build without submodules [#7295](https://github.com/ClickHouse/ClickHouse/pull/7295) ([proller](https://github.com/proller)) +* Better `add_globs` in CMake files [#7418](https://github.com/ClickHouse/ClickHouse/pull/7418) ([Amos Bird](https://github.com/amosbird)) +* Remove hardcoded paths in `unwind` target [#7460](https://github.com/ClickHouse/ClickHouse/pull/7460) ([Konstantin Podshumok](https://github.com/podshumok)) +* Allow to use mysql format without ssl [#7524](https://github.com/ClickHouse/ClickHouse/pull/7524) ([proller](https://github.com/proller)) + +#### Other +* Added ANTLR4 grammar for ClickHouse SQL dialect [#7595](https://github.com/ClickHouse/ClickHouse/issues/7595) [#7596](https://github.com/ClickHouse/ClickHouse/pull/7596) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +## ClickHouse release v19.16 + +#### Clickhouse release v19.16.14.65, 2020-03-05 + +* Fix distributed subqueries incompatibility with older CH versions. Fixes [#7851](https://github.com/ClickHouse/ClickHouse/issues/7851) +[(tabplubix)](https://github.com/tavplubix) +* When executing `CREATE` query, fold constant expressions in storage engine arguments. Replace empty database name with current database. Fixes [#6508](https://github.com/ClickHouse/ClickHouse/issues/6508), [#3492](https://github.com/ClickHouse/ClickHouse/issues/3492). Also fix check for local address in `ClickHouseDictionarySource`. +[#9262](https://github.com/ClickHouse/ClickHouse/pull/9262) [(tabplubix)](https://github.com/tavplubix) +* Now background merges in `*MergeTree` table engines family preserve storage policy volume order more accurately. +[#8549](https://github.com/ClickHouse/ClickHouse/pull/8549) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Prevent losing data in `Kafka` in rare cases when exception happens after reading suffix but before commit. Fixes [#9378](https://github.com/ClickHouse/ClickHouse/issues/9378). Related: [#7175](https://github.com/ClickHouse/ClickHouse/issues/7175) +[#9507](https://github.com/ClickHouse/ClickHouse/pull/9507) [(filimonov)](https://github.com/filimonov) +* Fix bug leading to server termination when trying to use / drop `Kafka` table created with wrong parameters. Fixes [#9494](https://github.com/ClickHouse/ClickHouse/issues/9494). Incorporates [#9507](https://github.com/ClickHouse/ClickHouse/issues/9507). +[#9513](https://github.com/ClickHouse/ClickHouse/pull/9513) [(filimonov)](https://github.com/filimonov) +* Allow using `MaterializedView` with subqueries above `Kafka` tables. +[#8197](https://github.com/ClickHouse/ClickHouse/pull/8197) ([filimonov](https://github.com/filimonov)) + +#### New Feature +* Add `deduplicate_blocks_in_dependent_materialized_views` option to control the behaviour of idempotent inserts into tables with materialized views. This new feature was added to the bugfix release by a special request from Altinity. +[#9070](https://github.com/ClickHouse/ClickHouse/pull/9070) [(urykhy)](https://github.com/urykhy) + +### ClickHouse release v19.16.2.2, 2019-10-30 + +#### Backward Incompatible Change +* Add missing arity validation for count/counIf. + [#7095](https://github.com/ClickHouse/ClickHouse/issues/7095) +[#7298](https://github.com/ClickHouse/ClickHouse/pull/7298) ([Vdimir](https://github.com/Vdimir)) +* Remove legacy `asterisk_left_columns_only` setting (it was disabled by default). + [#7335](https://github.com/ClickHouse/ClickHouse/pull/7335) ([Artem +Zuikov](https://github.com/4ertus2)) +* Format strings for Template data format are now specified in files. + [#7118](https://github.com/ClickHouse/ClickHouse/pull/7118) +([tavplubix](https://github.com/tavplubix)) + +#### New Feature +* Introduce uniqCombined64() to calculate cardinality greater than UINT_MAX. + [#7213](https://github.com/ClickHouse/ClickHouse/pull/7213), +[#7222](https://github.com/ClickHouse/ClickHouse/pull/7222) ([Azat +Khuzhin](https://github.com/azat)) +* Support Bloom filter indexes on Array columns. + [#6984](https://github.com/ClickHouse/ClickHouse/pull/6984) +([achimbab](https://github.com/achimbab)) +* Add a function `getMacro(name)` that returns String with the value of corresponding `` + from server configuration. [#7240](https://github.com/ClickHouse/ClickHouse/pull/7240) +([alexey-milovidov](https://github.com/alexey-milovidov)) +* Set two configuration options for a dictionary based on an HTTP source: `credentials` and + `http-headers`. [#7092](https://github.com/ClickHouse/ClickHouse/pull/7092) ([Guillaume +Tassery](https://github.com/YiuRULE)) +* Add a new ProfileEvent `Merge` that counts the number of launched background merges. + [#7093](https://github.com/ClickHouse/ClickHouse/pull/7093) ([Mikhail +Korotov](https://github.com/millb)) +* Add fullHostName function that returns a fully qualified domain name. + [#7263](https://github.com/ClickHouse/ClickHouse/issues/7263) +[#7291](https://github.com/ClickHouse/ClickHouse/pull/7291) ([sundyli](https://github.com/sundy-li)) +* Add function `arraySplit` and `arrayReverseSplit` which split an array by "cut off" + conditions. They are useful in time sequence handling. +[#7294](https://github.com/ClickHouse/ClickHouse/pull/7294) ([hcz](https://github.com/hczhcz)) +* Add new functions that return the Array of all matched indices in multiMatch family of functions. + [#7299](https://github.com/ClickHouse/ClickHouse/pull/7299) ([Danila +Kutenin](https://github.com/danlark1)) +* Add a new database engine `Lazy` that is optimized for storing a large number of small -Log + tables. [#7171](https://github.com/ClickHouse/ClickHouse/pull/7171) ([Nikita +Vasilev](https://github.com/nikvas0)) +* Add aggregate functions groupBitmapAnd, -Or, -Xor for bitmap columns. [#7109](https://github.com/ClickHouse/ClickHouse/pull/7109) ([Zhichang +Yu](https://github.com/yuzhichang)) +* Add aggregate function combinators -OrNull and -OrDefault, which return null + or default values when there is nothing to aggregate. +[#7331](https://github.com/ClickHouse/ClickHouse/pull/7331) +([hcz](https://github.com/hczhcz)) +* Introduce CustomSeparated data format that supports custom escaping and + delimiter rules. [#7118](https://github.com/ClickHouse/ClickHouse/pull/7118) +([tavplubix](https://github.com/tavplubix)) +* Support Redis as source of external dictionary. [#4361](https://github.com/ClickHouse/ClickHouse/pull/4361) [#6962](https://github.com/ClickHouse/ClickHouse/pull/6962) ([comunodi](https://github.com/comunodi), [Anton +Popov](https://github.com/CurtizJ)) + +#### Bug Fix +* Fix wrong query result if it has `WHERE IN (SELECT ...)` section and `optimize_read_in_order` is + used. [#7371](https://github.com/ClickHouse/ClickHouse/pull/7371) ([Anton +Popov](https://github.com/CurtizJ)) +* Disabled MariaDB authentication plugin, which depends on files outside of project. + [#7140](https://github.com/ClickHouse/ClickHouse/pull/7140) ([Yuriy +Baranov](https://github.com/yurriy)) +* Fix exception `Cannot convert column ... because it is constant but values of constants are + different in source and result` which could rarely happen when functions `now()`, `today()`, +`yesterday()`, `randConstant()` are used. +[#7156](https://github.com/ClickHouse/ClickHouse/pull/7156) ([Nikolai +Kochetov](https://github.com/KochetovNicolai)) +* Fixed issue of using HTTP keep alive timeout instead of TCP keep alive timeout. + [#7351](https://github.com/ClickHouse/ClickHouse/pull/7351) ([Vasily +Nemkov](https://github.com/Enmk)) +* Fixed a segmentation fault in groupBitmapOr (issue [#7109](https://github.com/ClickHouse/ClickHouse/issues/7109)). + [#7289](https://github.com/ClickHouse/ClickHouse/pull/7289) ([Zhichang +Yu](https://github.com/yuzhichang)) +* For materialized views the commit for Kafka is called after all data were written. + [#7175](https://github.com/ClickHouse/ClickHouse/pull/7175) ([Ivan](https://github.com/abyss7)) +* Fixed wrong `duration_ms` value in `system.part_log` table. It was ten times off. + [#7172](https://github.com/ClickHouse/ClickHouse/pull/7172) ([Vladimir +Chebotarev](https://github.com/excitoon)) +* A quick fix to resolve crash in LIVE VIEW table and re-enabling all LIVE VIEW tests. + [#7201](https://github.com/ClickHouse/ClickHouse/pull/7201) +([vzakaznikov](https://github.com/vzakaznikov)) +* Serialize NULL values correctly in min/max indexes of MergeTree parts. + [#7234](https://github.com/ClickHouse/ClickHouse/pull/7234) ([Alexander +Kuzmenkov](https://github.com/akuzm)) +* Don't put virtual columns to .sql metadata when table is created as `CREATE TABLE AS`. + [#7183](https://github.com/ClickHouse/ClickHouse/pull/7183) ([Ivan](https://github.com/abyss7)) +* Fix segmentation fault in `ATTACH PART` query. + [#7185](https://github.com/ClickHouse/ClickHouse/pull/7185) +([alesapin](https://github.com/alesapin)) +* Fix wrong result for some queries given by the optimization of empty IN subqueries and empty + INNER/RIGHT JOIN. [#7284](https://github.com/ClickHouse/ClickHouse/pull/7284) ([Nikolai +Kochetov](https://github.com/KochetovNicolai)) +* Fixing AddressSanitizer error in the LIVE VIEW getHeader() method. + [#7271](https://github.com/ClickHouse/ClickHouse/pull/7271) +([vzakaznikov](https://github.com/vzakaznikov)) + +#### Improvement +* Add a message in case of queue_wait_max_ms wait takes place. + [#7390](https://github.com/ClickHouse/ClickHouse/pull/7390) ([Azat +Khuzhin](https://github.com/azat)) +* Made setting `s3_min_upload_part_size` table-level. + [#7059](https://github.com/ClickHouse/ClickHouse/pull/7059) ([Vladimir +Chebotarev](https://github.com/excitoon)) +* Check TTL in StorageFactory. [#7304](https://github.com/ClickHouse/ClickHouse/pull/7304) + ([sundyli](https://github.com/sundy-li)) +* Squash left-hand blocks in partial merge join (optimization). + [#7122](https://github.com/ClickHouse/ClickHouse/pull/7122) ([Artem +Zuikov](https://github.com/4ertus2)) +* Do not allow non-deterministic functions in mutations of Replicated table engines, because this + can introduce inconsistencies between replicas. +[#7247](https://github.com/ClickHouse/ClickHouse/pull/7247) ([Alexander +Kazakov](https://github.com/Akazz)) +* Disable memory tracker while converting exception stack trace to string. It can prevent the loss + of error messages of type `Memory limit exceeded` on server, which caused the `Attempt to read +after eof` exception on client. [#7264](https://github.com/ClickHouse/ClickHouse/pull/7264) +([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Miscellaneous format improvements. Resolves + [#6033](https://github.com/ClickHouse/ClickHouse/issues/6033), +[#2633](https://github.com/ClickHouse/ClickHouse/issues/2633), +[#6611](https://github.com/ClickHouse/ClickHouse/issues/6611), +[#6742](https://github.com/ClickHouse/ClickHouse/issues/6742) +[#7215](https://github.com/ClickHouse/ClickHouse/pull/7215) +([tavplubix](https://github.com/tavplubix)) +* ClickHouse ignores values on the right side of IN operator that are not convertible to the left + side type. Make it work properly for compound types -- Array and Tuple. +[#7283](https://github.com/ClickHouse/ClickHouse/pull/7283) ([Alexander +Kuzmenkov](https://github.com/akuzm)) +* Support missing inequalities for ASOF JOIN. It's possible to join less-or-equal variant and strict + greater and less variants for ASOF column in ON syntax. +[#7282](https://github.com/ClickHouse/ClickHouse/pull/7282) ([Artem +Zuikov](https://github.com/4ertus2)) +* Optimize partial merge join. [#7070](https://github.com/ClickHouse/ClickHouse/pull/7070) + ([Artem Zuikov](https://github.com/4ertus2)) +* Do not use more than 98K of memory in uniqCombined functions. + [#7236](https://github.com/ClickHouse/ClickHouse/pull/7236), +[#7270](https://github.com/ClickHouse/ClickHouse/pull/7270) ([Azat +Khuzhin](https://github.com/azat)) +* Flush parts of right-hand joining table on disk in PartialMergeJoin (if there is not enough + memory). Load data back when needed. [#7186](https://github.com/ClickHouse/ClickHouse/pull/7186) +([Artem Zuikov](https://github.com/4ertus2)) + +#### Performance Improvement +* Speed up joinGet with const arguments by avoiding data duplication. + [#7359](https://github.com/ClickHouse/ClickHouse/pull/7359) ([Amos +Bird](https://github.com/amosbird)) +* Return early if the subquery is empty. + [#7007](https://github.com/ClickHouse/ClickHouse/pull/7007) ([小路](https://github.com/nicelulu)) +* Optimize parsing of SQL expression in Values. + [#6781](https://github.com/ClickHouse/ClickHouse/pull/6781) +([tavplubix](https://github.com/tavplubix)) + +#### Build/Testing/Packaging Improvement +* Disable some contribs for cross-compilation to Mac OS. + [#7101](https://github.com/ClickHouse/ClickHouse/pull/7101) ([Ivan](https://github.com/abyss7)) +* Add missing linking with PocoXML for clickhouse_common_io. + [#7200](https://github.com/ClickHouse/ClickHouse/pull/7200) ([Azat +Khuzhin](https://github.com/azat)) +* Accept multiple test filter arguments in clickhouse-test. + [#7226](https://github.com/ClickHouse/ClickHouse/pull/7226) ([Alexander +Kuzmenkov](https://github.com/akuzm)) +* Enable musl and jemalloc for ARM. [#7300](https://github.com/ClickHouse/ClickHouse/pull/7300) + ([Amos Bird](https://github.com/amosbird)) +* Added `--client-option` parameter to `clickhouse-test` to pass additional parameters to client. + [#7277](https://github.com/ClickHouse/ClickHouse/pull/7277) ([Nikolai +Kochetov](https://github.com/KochetovNicolai)) +* Preserve existing configs on rpm package upgrade. + [#7103](https://github.com/ClickHouse/ClickHouse/pull/7103) +([filimonov](https://github.com/filimonov)) +* Fix errors detected by PVS. [#7153](https://github.com/ClickHouse/ClickHouse/pull/7153) ([Artem + Zuikov](https://github.com/4ertus2)) +* Fix build for Darwin. [#7149](https://github.com/ClickHouse/ClickHouse/pull/7149) + ([Ivan](https://github.com/abyss7)) +* glibc 2.29 compatibility. [#7142](https://github.com/ClickHouse/ClickHouse/pull/7142) ([Amos + Bird](https://github.com/amosbird)) +* Make sure dh_clean does not touch potential source files. + [#7205](https://github.com/ClickHouse/ClickHouse/pull/7205) ([Amos +Bird](https://github.com/amosbird)) +* Attempt to avoid conflict when updating from altinity rpm - it has config file packaged separately + in clickhouse-server-common. [#7073](https://github.com/ClickHouse/ClickHouse/pull/7073) +([filimonov](https://github.com/filimonov)) +* Optimize some header files for faster rebuilds. + [#7212](https://github.com/ClickHouse/ClickHouse/pull/7212), +[#7231](https://github.com/ClickHouse/ClickHouse/pull/7231) ([Alexander +Kuzmenkov](https://github.com/akuzm)) +* Add performance tests for Date and DateTime. [#7332](https://github.com/ClickHouse/ClickHouse/pull/7332) ([Vasily + Nemkov](https://github.com/Enmk)) +* Fix some tests that contained non-deterministic mutations. + [#7132](https://github.com/ClickHouse/ClickHouse/pull/7132) ([Alexander +Kazakov](https://github.com/Akazz)) +* Add build with MemorySanitizer to CI. [#7066](https://github.com/ClickHouse/ClickHouse/pull/7066) + ([Alexander Kuzmenkov](https://github.com/akuzm)) +* Avoid use of uninitialized values in MetricsTransmitter. + [#7158](https://github.com/ClickHouse/ClickHouse/pull/7158) ([Azat +Khuzhin](https://github.com/azat)) +* Fix some issues in Fields found by MemorySanitizer. + [#7135](https://github.com/ClickHouse/ClickHouse/pull/7135), +[#7179](https://github.com/ClickHouse/ClickHouse/pull/7179) ([Alexander +Kuzmenkov](https://github.com/akuzm)), [#7376](https://github.com/ClickHouse/ClickHouse/pull/7376) +([Amos Bird](https://github.com/amosbird)) +* Fix undefined behavior in murmurhash32. [#7388](https://github.com/ClickHouse/ClickHouse/pull/7388) ([Amos + Bird](https://github.com/amosbird)) +* Fix undefined behavior in StoragesInfoStream. [#7384](https://github.com/ClickHouse/ClickHouse/pull/7384) + ([tavplubix](https://github.com/tavplubix)) +* Fixed constant expressions folding for external database engines (MySQL, ODBC, JDBC). In previous + versions it wasn't working for multiple constant expressions and was not working at all for Date, +DateTime and UUID. This fixes [#7245](https://github.com/ClickHouse/ClickHouse/issues/7245) +[#7252](https://github.com/ClickHouse/ClickHouse/pull/7252) +([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixing ThreadSanitizer data race error in the LIVE VIEW when accessing no_users_thread variable. + [#7353](https://github.com/ClickHouse/ClickHouse/pull/7353) +([vzakaznikov](https://github.com/vzakaznikov)) +* Get rid of malloc symbols in libcommon + [#7134](https://github.com/ClickHouse/ClickHouse/pull/7134), +[#7065](https://github.com/ClickHouse/ClickHouse/pull/7065) ([Amos +Bird](https://github.com/amosbird)) +* Add global flag ENABLE_LIBRARIES for disabling all libraries. + [#7063](https://github.com/ClickHouse/ClickHouse/pull/7063) +([proller](https://github.com/proller)) + +#### Code cleanup +* Generalize configuration repository to prepare for DDL for Dictionaries. [#7155](https://github.com/ClickHouse/ClickHouse/pull/7155) + ([alesapin](https://github.com/alesapin)) +* Parser for dictionaries DDL without any semantic. + [#7209](https://github.com/ClickHouse/ClickHouse/pull/7209) +([alesapin](https://github.com/alesapin)) +* Split ParserCreateQuery into different smaller parsers. + [#7253](https://github.com/ClickHouse/ClickHouse/pull/7253) +([alesapin](https://github.com/alesapin)) +* Small refactoring and renaming near external dictionaries. + [#7111](https://github.com/ClickHouse/ClickHouse/pull/7111) +([alesapin](https://github.com/alesapin)) +* Refactor some code to prepare for role-based access control. [#7235](https://github.com/ClickHouse/ClickHouse/pull/7235) ([Vitaly + Baranov](https://github.com/vitlibar)) +* Some improvements in DatabaseOrdinary code. + [#7086](https://github.com/ClickHouse/ClickHouse/pull/7086) ([Nikita +Vasilev](https://github.com/nikvas0)) +* Do not use iterators in find() and emplace() methods of hash tables. +[#7026](https://github.com/ClickHouse/ClickHouse/pull/7026) ([Alexander +Kuzmenkov](https://github.com/akuzm)) +* Fix getMultipleValuesFromConfig in case when parameter root is not empty. [#7374](https://github.com/ClickHouse/ClickHouse/pull/7374) +([Mikhail Korotov](https://github.com/millb)) +* Remove some copy-paste (TemporaryFile and TemporaryFileStream) + [#7166](https://github.com/ClickHouse/ClickHouse/pull/7166) ([Artem +Zuikov](https://github.com/4ertus2)) +* Improved code readability a little bit (`MergeTreeData::getActiveContainingPart`). + [#7361](https://github.com/ClickHouse/ClickHouse/pull/7361) ([Vladimir +Chebotarev](https://github.com/excitoon)) +* Wait for all scheduled jobs, which are using local objects, if `ThreadPool::schedule(...)` throws + an exception. Rename `ThreadPool::schedule(...)` to `ThreadPool::scheduleOrThrowOnError(...)` and +fix comments to make obvious that it may throw. +[#7350](https://github.com/ClickHouse/ClickHouse/pull/7350) +([tavplubix](https://github.com/tavplubix)) + +## ClickHouse release 19.15 + +### ClickHouse release 19.15.4.10, 2019-10-31 + +#### Bug Fix +* Added handling of SQL_TINYINT and SQL_BIGINT, and fix handling of SQL_FLOAT data source types in ODBC Bridge. +[#7491](https://github.com/ClickHouse/ClickHouse/pull/7491) ([Denis Glazachev](https://github.com/traceon)) +* Allowed to have some parts on destination disk or volume in MOVE PARTITION. +[#7434](https://github.com/ClickHouse/ClickHouse/pull/7434) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Fixed NULL-values in nullable columns through ODBC-bridge. +[#7402](https://github.com/ClickHouse/ClickHouse/pull/7402) ([Vasily Nemkov](https://github.com/Enmk)) +* Fixed INSERT into Distributed non local node with MATERIALIZED columns. +[#7377](https://github.com/ClickHouse/ClickHouse/pull/7377) ([Azat Khuzhin](https://github.com/azat)) +* Fixed function getMultipleValuesFromConfig. +[#7374](https://github.com/ClickHouse/ClickHouse/pull/7374) ([Mikhail Korotov](https://github.com/millb)) +* Fixed issue of using HTTP keep alive timeout instead of TCP keep alive timeout. +[#7351](https://github.com/ClickHouse/ClickHouse/pull/7351) ([Vasily Nemkov](https://github.com/Enmk)) +* Wait for all jobs to finish on exception (fixes rare segfaults). +[#7350](https://github.com/ClickHouse/ClickHouse/pull/7350) ([tavplubix](https://github.com/tavplubix)) +* Don't push to MVs when inserting into Kafka table. +[#7265](https://github.com/ClickHouse/ClickHouse/pull/7265) ([Ivan](https://github.com/abyss7)) +* Disable memory tracker for exception stack. +[#7264](https://github.com/ClickHouse/ClickHouse/pull/7264) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Fixed bad code in transforming query for external database. +[#7252](https://github.com/ClickHouse/ClickHouse/pull/7252) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Avoid use of uninitialized values in MetricsTransmitter. +[#7158](https://github.com/ClickHouse/ClickHouse/pull/7158) ([Azat Khuzhin](https://github.com/azat)) +* Added example config with macros for tests ([alexey-milovidov](https://github.com/alexey-milovidov)) + +### ClickHouse release 19.15.3.6, 2019-10-09 + +#### Bug Fix +* Fixed bad_variant in hashed dictionary. +([alesapin](https://github.com/alesapin)) +* Fixed up bug with segmentation fault in ATTACH PART query. +([alesapin](https://github.com/alesapin)) +* Fixed time calculation in `MergeTreeData`. +([Vladimir Chebotarev](https://github.com/excitoon)) +* Commit to Kafka explicitly after the writing is finalized. +[#7175](https://github.com/ClickHouse/ClickHouse/pull/7175) ([Ivan](https://github.com/abyss7)) +* Serialize NULL values correctly in min/max indexes of MergeTree parts. +[#7234](https://github.com/ClickHouse/ClickHouse/pull/7234) ([Alexander Kuzmenkov](https://github.com/akuzm)) + +### ClickHouse release 19.15.2.2, 2019-10-01 + +#### New Feature +* Tiered storage: support to use multiple storage volumes for tables with MergeTree engine. It's possible to store fresh data on SSD and automatically move old data to HDD. ([example](https://clickhouse.github.io/clickhouse-presentations/meetup30/new_features/#12)). [#4918](https://github.com/ClickHouse/ClickHouse/pull/4918) ([Igr](https://github.com/ObjatieGroba)) [#6489](https://github.com/ClickHouse/ClickHouse/pull/6489) ([alesapin](https://github.com/alesapin)) +* Add table function `input` for reading incoming data in `INSERT SELECT` query. [#5450](https://github.com/ClickHouse/ClickHouse/pull/5450) ([palasonic1](https://github.com/palasonic1)) [#6832](https://github.com/ClickHouse/ClickHouse/pull/6832) ([Anton Popov](https://github.com/CurtizJ)) +* Add a `sparse_hashed` dictionary layout, that is functionally equivalent to the `hashed` layout, but is more memory efficient. It uses about twice as less memory at the cost of slower value retrieval. [#6894](https://github.com/ClickHouse/ClickHouse/pull/6894) ([Azat Khuzhin](https://github.com/azat)) +* Implement ability to define list of users for access to dictionaries. Only current connected database using. [#6907](https://github.com/ClickHouse/ClickHouse/pull/6907) ([Guillaume Tassery](https://github.com/YiuRULE)) +* Add `LIMIT` option to `SHOW` query. [#6944](https://github.com/ClickHouse/ClickHouse/pull/6944) ([Philipp Malkovsky](https://github.com/malkfilipp)) +* Add `bitmapSubsetLimit(bitmap, range_start, limit)` function, that returns subset of the smallest `limit` values in set that is no smaller than `range_start`. [#6957](https://github.com/ClickHouse/ClickHouse/pull/6957) ([Zhichang Yu](https://github.com/yuzhichang)) +* Add `bitmapMin` and `bitmapMax` functions. [#6970](https://github.com/ClickHouse/ClickHouse/pull/6970) ([Zhichang Yu](https://github.com/yuzhichang)) +* Add function `repeat` related to [issue-6648](https://github.com/ClickHouse/ClickHouse/issues/6648) [#6999](https://github.com/ClickHouse/ClickHouse/pull/6999) ([flynn](https://github.com/ucasFL)) + +#### Experimental Feature +* Implement (in memory) Merge Join variant that does not change current pipeline. Result is partially sorted by merge key. Set `partial_merge_join = 1` to use this feature. The Merge Join is still in development. [#6940](https://github.com/ClickHouse/ClickHouse/pull/6940) ([Artem Zuikov](https://github.com/4ertus2)) +* Add `S3` engine and table function. It is still in development (no authentication support yet). [#5596](https://github.com/ClickHouse/ClickHouse/pull/5596) ([Vladimir Chebotarev](https://github.com/excitoon)) + +#### Improvement +* Every message read from Kafka is inserted atomically. This resolves almost all known issues with Kafka engine. [#6950](https://github.com/ClickHouse/ClickHouse/pull/6950) ([Ivan](https://github.com/abyss7)) +* Improvements for failover of Distributed queries. Shorten recovery time, also it is now configurable and can be seen in `system.clusters`. [#6399](https://github.com/ClickHouse/ClickHouse/pull/6399) ([Vasily Nemkov](https://github.com/Enmk)) +* Support numeric values for Enums directly in `IN` section. #6766 [#6941](https://github.com/ClickHouse/ClickHouse/pull/6941) ([dimarub2000](https://github.com/dimarub2000)) +* Support (optional, disabled by default) redirects on URL storage. [#6914](https://github.com/ClickHouse/ClickHouse/pull/6914) ([maqroll](https://github.com/maqroll)) +* Add information message when client with an older version connects to a server. [#6893](https://github.com/ClickHouse/ClickHouse/pull/6893) ([Philipp Malkovsky](https://github.com/malkfilipp)) +* Remove maximum backoff sleep time limit for sending data in Distributed tables [#6895](https://github.com/ClickHouse/ClickHouse/pull/6895) ([Azat Khuzhin](https://github.com/azat)) +* Add ability to send profile events (counters) with cumulative values to graphite. It can be enabled under `` in server `config.xml`. [#6969](https://github.com/ClickHouse/ClickHouse/pull/6969) ([Azat Khuzhin](https://github.com/azat)) +* Add automatically cast type `T` to `LowCardinality(T)` while inserting data in column of type `LowCardinality(T)` in Native format via HTTP. [#6891](https://github.com/ClickHouse/ClickHouse/pull/6891) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Add ability to use function `hex` without using `reinterpretAsString` for `Float32`, `Float64`. [#7024](https://github.com/ClickHouse/ClickHouse/pull/7024) ([Mikhail Korotov](https://github.com/millb)) + +#### Build/Testing/Packaging Improvement +* Add gdb-index to clickhouse binary with debug info. It will speed up startup time of `gdb`. [#6947](https://github.com/ClickHouse/ClickHouse/pull/6947) ([alesapin](https://github.com/alesapin)) +* Speed up deb packaging with patched dpkg-deb which uses `pigz`. [#6960](https://github.com/ClickHouse/ClickHouse/pull/6960) ([alesapin](https://github.com/alesapin)) +* Set `enable_fuzzing = 1` to enable libfuzzer instrumentation of all the project code. [#7042](https://github.com/ClickHouse/ClickHouse/pull/7042) ([kyprizel](https://github.com/kyprizel)) +* Add split build smoke test in CI. [#7061](https://github.com/ClickHouse/ClickHouse/pull/7061) ([alesapin](https://github.com/alesapin)) +* Add build with MemorySanitizer to CI. [#7066](https://github.com/ClickHouse/ClickHouse/pull/7066) ([Alexander Kuzmenkov](https://github.com/akuzm)) +* Replace `libsparsehash` with `sparsehash-c11` [#6965](https://github.com/ClickHouse/ClickHouse/pull/6965) ([Azat Khuzhin](https://github.com/azat)) + +#### Bug Fix +* Fixed performance degradation of index analysis on complex keys on large tables. This fixes #6924. [#7075](https://github.com/ClickHouse/ClickHouse/pull/7075) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix logical error causing segfaults when selecting from Kafka empty topic. [#6909](https://github.com/ClickHouse/ClickHouse/pull/6909) ([Ivan](https://github.com/abyss7)) +* Fix too early MySQL connection close in `MySQLBlockInputStream.cpp`. [#6882](https://github.com/ClickHouse/ClickHouse/pull/6882) ([Clément Rodriguez](https://github.com/clemrodriguez)) +* Returned support for very old Linux kernels (fix [#6841](https://github.com/ClickHouse/ClickHouse/issues/6841)) [#6853](https://github.com/ClickHouse/ClickHouse/pull/6853) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix possible data loss in `insert select` query in case of empty block in input stream. #6834 #6862 [#6911](https://github.com/ClickHouse/ClickHouse/pull/6911) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Fix for function `АrrayEnumerateUniqRanked` with empty arrays in params [#6928](https://github.com/ClickHouse/ClickHouse/pull/6928) ([proller](https://github.com/proller)) +* Fix complex queries with array joins and global subqueries. [#6934](https://github.com/ClickHouse/ClickHouse/pull/6934) ([Ivan](https://github.com/abyss7)) +* Fix `Unknown identifier` error in ORDER BY and GROUP BY with multiple JOINs [#7022](https://github.com/ClickHouse/ClickHouse/pull/7022) ([Artem Zuikov](https://github.com/4ertus2)) +* Fixed `MSan` warning while executing function with `LowCardinality` argument. [#7062](https://github.com/ClickHouse/ClickHouse/pull/7062) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) + +#### Backward Incompatible Change +* Changed serialization format of bitmap* aggregate function states to improve performance. Serialized states of bitmap* from previous versions cannot be read. [#6908](https://github.com/ClickHouse/ClickHouse/pull/6908) ([Zhichang Yu](https://github.com/yuzhichang)) + +## ClickHouse release 19.14 +### ClickHouse release 19.14.7.15, 2019-10-02 + +#### Bug Fix +* This release also contains all bug fixes from 19.11.12.69. +* Fixed compatibility for distributed queries between 19.14 and earlier versions. This fixes [#7068](https://github.com/ClickHouse/ClickHouse/issues/7068). [#7069](https://github.com/ClickHouse/ClickHouse/pull/7069) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +### ClickHouse release 19.14.6.12, 2019-09-19 + +#### Bug Fix +* Fix for function `АrrayEnumerateUniqRanked` with empty arrays in params. [#6928](https://github.com/ClickHouse/ClickHouse/pull/6928) ([proller](https://github.com/proller)) +* Fixed subquery name in queries with `ARRAY JOIN` and `GLOBAL IN subquery` with alias. Use subquery alias for external table name if it is specified. [#6934](https://github.com/ClickHouse/ClickHouse/pull/6934) ([Ivan](https://github.com/abyss7)) + +#### Build/Testing/Packaging Improvement +* Fix [flapping](https://clickhouse-test-reports.s3.yandex.net/6944/aab95fd5175a513413c7395a73a82044bdafb906/functional_stateless_tests_(debug).html) test `00715_fetch_merged_or_mutated_part_zookeeper` by rewriting it to a shell scripts because it needs to wait for mutations to apply. [#6977](https://github.com/ClickHouse/ClickHouse/pull/6977) ([Alexander Kazakov](https://github.com/Akazz)) +* Fixed UBSan and MemSan failure in function `groupUniqArray` with emtpy array argument. It was caused by placing of empty `PaddedPODArray` into hash table zero cell because constructor for zero cell value was not called. [#6937](https://github.com/ClickHouse/ClickHouse/pull/6937) ([Amos Bird](https://github.com/amosbird)) + +### ClickHouse release 19.14.3.3, 2019-09-10 + +#### New Feature +* `WITH FILL` modifier for `ORDER BY`. (continuation of [#5069](https://github.com/ClickHouse/ClickHouse/issues/5069)) [#6610](https://github.com/ClickHouse/ClickHouse/pull/6610) ([Anton Popov](https://github.com/CurtizJ)) +* `WITH TIES` modifier for `LIMIT`. (continuation of [#5069](https://github.com/ClickHouse/ClickHouse/issues/5069)) [#6610](https://github.com/ClickHouse/ClickHouse/pull/6610) ([Anton Popov](https://github.com/CurtizJ)) +* Parse unquoted `NULL` literal as NULL (if setting `format_csv_unquoted_null_literal_as_null=1`). Initialize null fields with default values if data type of this field is not nullable (if setting `input_format_null_as_default=1`). [#5990](https://github.com/ClickHouse/ClickHouse/issues/5990) [#6055](https://github.com/ClickHouse/ClickHouse/pull/6055) ([tavplubix](https://github.com/tavplubix)) +* Support for wildcards in paths of table functions `file` and `hdfs`. If the path contains wildcards, the table will be readonly. Example of usage: `select * from hdfs('hdfs://hdfs1:9000/some_dir/another_dir/*/file{0..9}{0..9}')` and `select * from file('some_dir/{some_file,another_file,yet_another}.tsv', 'TSV', 'value UInt32')`. [#6092](https://github.com/ClickHouse/ClickHouse/pull/6092) ([Olga Khvostikova](https://github.com/stavrolia)) +* New `system.metric_log` table which stores values of `system.events` and `system.metrics` with specified time interval. [#6363](https://github.com/ClickHouse/ClickHouse/issues/6363) [#6467](https://github.com/ClickHouse/ClickHouse/pull/6467) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) [#6530](https://github.com/ClickHouse/ClickHouse/pull/6530) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Allow to write ClickHouse text logs to `system.text_log` table. [#6037](https://github.com/ClickHouse/ClickHouse/issues/6037) [#6103](https://github.com/ClickHouse/ClickHouse/pull/6103) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) [#6164](https://github.com/ClickHouse/ClickHouse/pull/6164) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Show private symbols in stack traces (this is done via parsing symbol tables of ELF files). Added information about file and line number in stack traces if debug info is present. Speedup symbol name lookup with indexing symbols present in program. Added new SQL functions for introspection: `demangle` and `addressToLine`. Renamed function `symbolizeAddress` to `addressToSymbol` for consistency. Function `addressToSymbol` will return mangled name for performance reasons and you have to apply `demangle`. Added setting `allow_introspection_functions` which is turned off by default. [#6201](https://github.com/ClickHouse/ClickHouse/pull/6201) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Table function `values` (the name is case-insensitive). It allows to read from `VALUES` list proposed in [#5984](https://github.com/ClickHouse/ClickHouse/issues/5984). Example: `SELECT * FROM VALUES('a UInt64, s String', (1, 'one'), (2, 'two'), (3, 'three'))`. [#6217](https://github.com/ClickHouse/ClickHouse/issues/6217). [#6209](https://github.com/ClickHouse/ClickHouse/pull/6209) ([dimarub2000](https://github.com/dimarub2000)) +* Added an ability to alter storage settings. Syntax: `ALTER TABLE MODIFY SETTING = `. [#6366](https://github.com/ClickHouse/ClickHouse/pull/6366) [#6669](https://github.com/ClickHouse/ClickHouse/pull/6669) [#6685](https://github.com/ClickHouse/ClickHouse/pull/6685) ([alesapin](https://github.com/alesapin)) +* Support for removing of detached parts. Syntax: `ALTER TABLE DROP DETACHED PART ''`. [#6158](https://github.com/ClickHouse/ClickHouse/pull/6158) ([tavplubix](https://github.com/tavplubix)) +* Table constraints. Allows to add constraint to table definition which will be checked at insert. [#5273](https://github.com/ClickHouse/ClickHouse/pull/5273) ([Gleb Novikov](https://github.com/NanoBjorn)) [#6652](https://github.com/ClickHouse/ClickHouse/pull/6652) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Suppport for cascaded materialized views. [#6324](https://github.com/ClickHouse/ClickHouse/pull/6324) ([Amos Bird](https://github.com/amosbird)) +* Turn on query profiler by default to sample every query execution thread once a second. [#6283](https://github.com/ClickHouse/ClickHouse/pull/6283) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Input format `ORC`. [#6454](https://github.com/ClickHouse/ClickHouse/pull/6454) [#6703](https://github.com/ClickHouse/ClickHouse/pull/6703) ([akonyaev90](https://github.com/akonyaev90)) +* Added two new functions: `sigmoid` and `tanh` (that are useful for machine learning applications). [#6254](https://github.com/ClickHouse/ClickHouse/pull/6254) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Function `hasToken(haystack, token)`, `hasTokenCaseInsensitive(haystack, token)` to check if given token is in haystack. Token is a maximal length substring between two non alphanumeric ASCII characters (or boundaries of haystack). Token must be a constant string. Supported by tokenbf_v1 index specialization. [#6596](https://github.com/ClickHouse/ClickHouse/pull/6596), [#6662](https://github.com/ClickHouse/ClickHouse/pull/6662) ([Vasily Nemkov](https://github.com/Enmk)) +* New function `neighbor(value, offset[, default_value])`. Allows to reach prev/next value within column in a block of data. [#5925](https://github.com/ClickHouse/ClickHouse/pull/5925) ([Alex Krash](https://github.com/alex-krash)) [6685365ab8c5b74f9650492c88a012596eb1b0c6](https://github.com/ClickHouse/ClickHouse/commit/6685365ab8c5b74f9650492c88a012596eb1b0c6) [341e2e4587a18065c2da1ca888c73389f48ce36c](https://github.com/ClickHouse/ClickHouse/commit/341e2e4587a18065c2da1ca888c73389f48ce36c) [Alexey Milovidov](https://github.com/alexey-milovidov) +* Created a function `currentUser()`, returning login of authorized user. Added alias `user()` for compatibility with MySQL. [#6470](https://github.com/ClickHouse/ClickHouse/pull/6470) ([Alex Krash](https://github.com/alex-krash)) +* New aggregate functions `quantilesExactInclusive` and `quantilesExactExclusive` which were proposed in [#5885](https://github.com/ClickHouse/ClickHouse/issues/5885). [#6477](https://github.com/ClickHouse/ClickHouse/pull/6477) ([dimarub2000](https://github.com/dimarub2000)) +* Function `bitmapRange(bitmap, range_begin, range_end)` which returns new set with specified range (not include the `range_end`). [#6314](https://github.com/ClickHouse/ClickHouse/pull/6314) ([Zhichang Yu](https://github.com/yuzhichang)) +* Function `geohashesInBox(longitude_min, latitude_min, longitude_max, latitude_max, precision)` which creates array of precision-long strings of geohash-boxes covering provided area. [#6127](https://github.com/ClickHouse/ClickHouse/pull/6127) ([Vasily Nemkov](https://github.com/Enmk)) +* Implement support for INSERT query with `Kafka` tables. [#6012](https://github.com/ClickHouse/ClickHouse/pull/6012) ([Ivan](https://github.com/abyss7)) +* Added support for `_partition` and `_timestamp` virtual columns to Kafka engine. [#6400](https://github.com/ClickHouse/ClickHouse/pull/6400) ([Ivan](https://github.com/abyss7)) +* Possibility to remove sensitive data from `query_log`, server logs, process list with regexp-based rules. [#5710](https://github.com/ClickHouse/ClickHouse/pull/5710) ([filimonov](https://github.com/filimonov)) + +#### Experimental Feature +* Input and output data format `Template`. It allows to specify custom format string for input and output. [#4354](https://github.com/ClickHouse/ClickHouse/issues/4354) [#6727](https://github.com/ClickHouse/ClickHouse/pull/6727) ([tavplubix](https://github.com/tavplubix)) +* Implementation of `LIVE VIEW` tables that were originally proposed in [#2898](https://github.com/ClickHouse/ClickHouse/pull/2898), prepared in [#3925](https://github.com/ClickHouse/ClickHouse/issues/3925), and then updated in [#5541](https://github.com/ClickHouse/ClickHouse/issues/5541). See [#5541](https://github.com/ClickHouse/ClickHouse/issues/5541) for detailed description. [#5541](https://github.com/ClickHouse/ClickHouse/issues/5541) ([vzakaznikov](https://github.com/vzakaznikov)) [#6425](https://github.com/ClickHouse/ClickHouse/pull/6425) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) [#6656](https://github.com/ClickHouse/ClickHouse/pull/6656) ([vzakaznikov](https://github.com/vzakaznikov)) Note that `LIVE VIEW` feature may be removed in next versions. + +#### Bug Fix +* This release also contains all bug fixes from 19.13 and 19.11. +* Fix segmentation fault when the table has skip indices and vertical merge happens. [#6723](https://github.com/ClickHouse/ClickHouse/pull/6723) ([alesapin](https://github.com/alesapin)) +* Fix per-column TTL with non-trivial column defaults. Previously in case of force TTL merge with `OPTIMIZE ... FINAL` query, expired values was replaced by type defaults instead of user-specified column defaults. [#6796](https://github.com/ClickHouse/ClickHouse/pull/6796) ([Anton Popov](https://github.com/CurtizJ)) +* Fix Kafka messages duplication problem on normal server restart. [#6597](https://github.com/ClickHouse/ClickHouse/pull/6597) ([Ivan](https://github.com/abyss7)) +* Fixed infinite loop when reading Kafka messages. Do not pause/resume consumer on subscription at all - otherwise it may get paused indefinitely in some scenarios. [#6354](https://github.com/ClickHouse/ClickHouse/pull/6354) ([Ivan](https://github.com/abyss7)) +* Fix `Key expression contains comparison between inconvertible types` exception in `bitmapContains` function. [#6136](https://github.com/ClickHouse/ClickHouse/issues/6136) [#6146](https://github.com/ClickHouse/ClickHouse/issues/6146) [#6156](https://github.com/ClickHouse/ClickHouse/pull/6156) ([dimarub2000](https://github.com/dimarub2000)) +* Fix segfault with enabled `optimize_skip_unused_shards` and missing sharding key. [#6384](https://github.com/ClickHouse/ClickHouse/pull/6384) ([Anton Popov](https://github.com/CurtizJ)) +* Fixed wrong code in mutations that may lead to memory corruption. Fixed segfault with read of address `0x14c0` that may happed due to concurrent `DROP TABLE` and `SELECT` from `system.parts` or `system.parts_columns`. Fixed race condition in preparation of mutation queries. Fixed deadlock caused by `OPTIMIZE` of Replicated tables and concurrent modification operations like ALTERs. [#6514](https://github.com/ClickHouse/ClickHouse/pull/6514) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Removed extra verbose logging in MySQL interface [#6389](https://github.com/ClickHouse/ClickHouse/pull/6389) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Return the ability to parse boolean settings from 'true' and 'false' in the configuration file. [#6278](https://github.com/ClickHouse/ClickHouse/pull/6278) ([alesapin](https://github.com/alesapin)) +* Fix crash in `quantile` and `median` function over `Nullable(Decimal128)`. [#6378](https://github.com/ClickHouse/ClickHouse/pull/6378) ([Artem Zuikov](https://github.com/4ertus2)) +* Fixed possible incomplete result returned by `SELECT` query with `WHERE` condition on primary key contained conversion to Float type. It was caused by incorrect checking of monotonicity in `toFloat` function. [#6248](https://github.com/ClickHouse/ClickHouse/issues/6248) [#6374](https://github.com/ClickHouse/ClickHouse/pull/6374) ([dimarub2000](https://github.com/dimarub2000)) +* Check `max_expanded_ast_elements` setting for mutations. Clear mutations after `TRUNCATE TABLE`. [#6205](https://github.com/ClickHouse/ClickHouse/pull/6205) ([Winter Zhang](https://github.com/zhang2014)) +* Fix JOIN results for key columns when used with `join_use_nulls`. Attach Nulls instead of columns defaults. [#6249](https://github.com/ClickHouse/ClickHouse/pull/6249) ([Artem Zuikov](https://github.com/4ertus2)) +* Fix for skip indices with vertical merge and alter. Fix for `Bad size of marks file` exception. [#6594](https://github.com/ClickHouse/ClickHouse/issues/6594) [#6713](https://github.com/ClickHouse/ClickHouse/pull/6713) ([alesapin](https://github.com/alesapin)) +* Fix rare crash in `ALTER MODIFY COLUMN` and vertical merge when one of merged/altered parts is empty (0 rows) [#6746](https://github.com/ClickHouse/ClickHouse/issues/6746) [#6780](https://github.com/ClickHouse/ClickHouse/pull/6780) ([alesapin](https://github.com/alesapin)) +* Fixed bug in conversion of `LowCardinality` types in `AggregateFunctionFactory`. This fixes [#6257](https://github.com/ClickHouse/ClickHouse/issues/6257). [#6281](https://github.com/ClickHouse/ClickHouse/pull/6281) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Fix wrong behavior and possible segfaults in `topK` and `topKWeighted` aggregated functions. [#6404](https://github.com/ClickHouse/ClickHouse/pull/6404) ([Anton Popov](https://github.com/CurtizJ)) +* Fixed unsafe code around `getIdentifier` function. [#6401](https://github.com/ClickHouse/ClickHouse/issues/6401) [#6409](https://github.com/ClickHouse/ClickHouse/pull/6409) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed bug in MySQL wire protocol (is used while connecting to ClickHouse form MySQL client). Caused by heap buffer overflow in `PacketPayloadWriteBuffer`. [#6212](https://github.com/ClickHouse/ClickHouse/pull/6212) ([Yuriy Baranov](https://github.com/yurriy)) +* Fixed memory leak in `bitmapSubsetInRange` function. [#6819](https://github.com/ClickHouse/ClickHouse/pull/6819) ([Zhichang Yu](https://github.com/yuzhichang)) +* Fix rare bug when mutation executed after granularity change. [#6816](https://github.com/ClickHouse/ClickHouse/pull/6816) ([alesapin](https://github.com/alesapin)) +* Allow protobuf message with all fields by default. [#6132](https://github.com/ClickHouse/ClickHouse/pull/6132) ([Vitaly Baranov](https://github.com/vitlibar)) +* Resolve a bug with `nullIf` function when we send a `NULL` argument on the second argument. [#6446](https://github.com/ClickHouse/ClickHouse/pull/6446) ([Guillaume Tassery](https://github.com/YiuRULE)) +* Fix rare bug with wrong memory allocation/deallocation in complex key cache dictionaries with string fields which leads to infinite memory consumption (looks like memory leak). Bug reproduces when string size was a power of two starting from eight (8, 16, 32, etc). [#6447](https://github.com/ClickHouse/ClickHouse/pull/6447) ([alesapin](https://github.com/alesapin)) +* Fixed Gorilla encoding on small sequences which caused exception `Cannot write after end of buffer`. [#6398](https://github.com/ClickHouse/ClickHouse/issues/6398) [#6444](https://github.com/ClickHouse/ClickHouse/pull/6444) ([Vasily Nemkov](https://github.com/Enmk)) +* Allow to use not nullable types in JOINs with `join_use_nulls` enabled. [#6705](https://github.com/ClickHouse/ClickHouse/pull/6705) ([Artem Zuikov](https://github.com/4ertus2)) +* Disable `Poco::AbstractConfiguration` substitutions in query in `clickhouse-client`. [#6706](https://github.com/ClickHouse/ClickHouse/pull/6706) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Avoid deadlock in `REPLACE PARTITION`. [#6677](https://github.com/ClickHouse/ClickHouse/pull/6677) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Using `arrayReduce` for constant arguments may lead to segfault. [#6242](https://github.com/ClickHouse/ClickHouse/issues/6242) [#6326](https://github.com/ClickHouse/ClickHouse/pull/6326) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix inconsistent parts which can appear if replica was restored after `DROP PARTITION`. [#6522](https://github.com/ClickHouse/ClickHouse/issues/6522) [#6523](https://github.com/ClickHouse/ClickHouse/pull/6523) ([tavplubix](https://github.com/tavplubix)) +* Fixed hang in `JSONExtractRaw` function. [#6195](https://github.com/ClickHouse/ClickHouse/issues/6195) [#6198](https://github.com/ClickHouse/ClickHouse/pull/6198) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix bug with incorrect skip indices serialization and aggregation with adaptive granularity. [#6594](https://github.com/ClickHouse/ClickHouse/issues/6594). [#6748](https://github.com/ClickHouse/ClickHouse/pull/6748) ([alesapin](https://github.com/alesapin)) +* Fix `WITH ROLLUP` and `WITH CUBE` modifiers of `GROUP BY` with two-level aggregation. [#6225](https://github.com/ClickHouse/ClickHouse/pull/6225) ([Anton Popov](https://github.com/CurtizJ)) +* Fix bug with writing secondary indices marks with adaptive granularity. [#6126](https://github.com/ClickHouse/ClickHouse/pull/6126) ([alesapin](https://github.com/alesapin)) +* Fix initialization order while server startup. Since `StorageMergeTree::background_task_handle` is initialized in `startup()` the `MergeTreeBlockOutputStream::write()` may try to use it before initialization. Just check if it is initialized. [#6080](https://github.com/ClickHouse/ClickHouse/pull/6080) ([Ivan](https://github.com/abyss7)) +* Clearing the data buffer from the previous read operation that was completed with an error. [#6026](https://github.com/ClickHouse/ClickHouse/pull/6026) ([Nikolay](https://github.com/bopohaa)) +* Fix bug with enabling adaptive granularity when creating a new replica for Replicated\*MergeTree table. [#6394](https://github.com/ClickHouse/ClickHouse/issues/6394) [#6452](https://github.com/ClickHouse/ClickHouse/pull/6452) ([alesapin](https://github.com/alesapin)) +* Fixed possible crash during server startup in case of exception happened in `libunwind` during exception at access to uninitialized `ThreadStatus` structure. [#6456](https://github.com/ClickHouse/ClickHouse/pull/6456) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) +* Fix crash in `yandexConsistentHash` function. Found by fuzz test. [#6304](https://github.com/ClickHouse/ClickHouse/issues/6304) [#6305](https://github.com/ClickHouse/ClickHouse/pull/6305) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed the possibility of hanging queries when server is overloaded and global thread pool becomes near full. This have higher chance to happen on clusters with large number of shards (hundreds), because distributed queries allocate a thread per connection to each shard. For example, this issue may reproduce if a cluster of 330 shards is processing 30 concurrent distributed queries. This issue affects all versions starting from 19.2. [#6301](https://github.com/ClickHouse/ClickHouse/pull/6301) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed logic of `arrayEnumerateUniqRanked` function. [#6423](https://github.com/ClickHouse/ClickHouse/pull/6423) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix segfault when decoding symbol table. [#6603](https://github.com/ClickHouse/ClickHouse/pull/6603) ([Amos Bird](https://github.com/amosbird)) +* Fixed irrelevant exception in cast of `LowCardinality(Nullable)` to not-Nullable column in case if it doesn't contain Nulls (e.g. in query like `SELECT CAST(CAST('Hello' AS LowCardinality(Nullable(String))) AS String)`. [#6094](https://github.com/ClickHouse/ClickHouse/issues/6094) [#6119](https://github.com/ClickHouse/ClickHouse/pull/6119) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Removed extra quoting of description in `system.settings` table. [#6696](https://github.com/ClickHouse/ClickHouse/issues/6696) [#6699](https://github.com/ClickHouse/ClickHouse/pull/6699) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Avoid possible deadlock in `TRUNCATE` of Replicated table. [#6695](https://github.com/ClickHouse/ClickHouse/pull/6695) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix reading in order of sorting key. [#6189](https://github.com/ClickHouse/ClickHouse/pull/6189) ([Anton Popov](https://github.com/CurtizJ)) +* Fix `ALTER TABLE ... UPDATE` query for tables with `enable_mixed_granularity_parts=1`. [#6543](https://github.com/ClickHouse/ClickHouse/pull/6543) ([alesapin](https://github.com/alesapin)) +* Fix bug opened by [#4405](https://github.com/ClickHouse/ClickHouse/pull/4405) (since 19.4.0). Reproduces in queries to Distributed tables over MergeTree tables when we doesn't query any columns (`SELECT 1`). [#6236](https://github.com/ClickHouse/ClickHouse/pull/6236) ([alesapin](https://github.com/alesapin)) +* Fixed overflow in integer division of signed type to unsigned type. The behaviour was exactly as in C or C++ language (integer promotion rules) that may be surprising. Please note that the overflow is still possible when dividing large signed number to large unsigned number or vice-versa (but that case is less usual). The issue existed in all server versions. [#6214](https://github.com/ClickHouse/ClickHouse/issues/6214) [#6233](https://github.com/ClickHouse/ClickHouse/pull/6233) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Limit maximum sleep time for throttling when `max_execution_speed` or `max_execution_speed_bytes` is set. Fixed false errors like `Estimated query execution time (inf seconds) is too long`. [#5547](https://github.com/ClickHouse/ClickHouse/issues/5547) [#6232](https://github.com/ClickHouse/ClickHouse/pull/6232) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed issues about using `MATERIALIZED` columns and aliases in `MaterializedView`. [#448](https://github.com/ClickHouse/ClickHouse/issues/448) [#3484](https://github.com/ClickHouse/ClickHouse/issues/3484) [#3450](https://github.com/ClickHouse/ClickHouse/issues/3450) [#2878](https://github.com/ClickHouse/ClickHouse/issues/2878) [#2285](https://github.com/ClickHouse/ClickHouse/issues/2285) [#3796](https://github.com/ClickHouse/ClickHouse/pull/3796) ([Amos Bird](https://github.com/amosbird)) [#6316](https://github.com/ClickHouse/ClickHouse/pull/6316) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix `FormatFactory` behaviour for input streams which are not implemented as processor. [#6495](https://github.com/ClickHouse/ClickHouse/pull/6495) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Fixed typo. [#6631](https://github.com/ClickHouse/ClickHouse/pull/6631) ([Alex Ryndin](https://github.com/alexryndin)) +* Typo in the error message ( is -> are ). [#6839](https://github.com/ClickHouse/ClickHouse/pull/6839) ([Denis Zhuravlev](https://github.com/den-crane)) +* Fixed error while parsing of columns list from string if type contained a comma (this issue was relevant for `File`, `URL`, `HDFS` storages) [#6217](https://github.com/ClickHouse/ClickHouse/issues/6217). [#6209](https://github.com/ClickHouse/ClickHouse/pull/6209) ([dimarub2000](https://github.com/dimarub2000)) + +#### Security Fix +* This release also contains all bug security fixes from 19.13 and 19.11. +* Fixed the possibility of a fabricated query to cause server crash due to stack overflow in SQL parser. Fixed the possibility of stack overflow in Merge and Distributed tables, materialized views and conditions for row-level security that involve subqueries. [#6433](https://github.com/ClickHouse/ClickHouse/pull/6433) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +#### Improvement +* Correct implementation of ternary logic for `AND/OR`. [#6048](https://github.com/ClickHouse/ClickHouse/pull/6048) ([Alexander Kazakov](https://github.com/Akazz)) +* Now values and rows with expired TTL will be removed after `OPTIMIZE ... FINAL` query from old parts without TTL infos or with outdated TTL infos, e.g. after `ALTER ... MODIFY TTL` query. Added queries `SYSTEM STOP/START TTL MERGES` to disallow/allow assign merges with TTL and filter expired values in all merges. [#6274](https://github.com/ClickHouse/ClickHouse/pull/6274) ([Anton Popov](https://github.com/CurtizJ)) +* Possibility to change the location of ClickHouse history file for client using `CLICKHOUSE_HISTORY_FILE` env. [#6840](https://github.com/ClickHouse/ClickHouse/pull/6840) ([filimonov](https://github.com/filimonov)) +* Remove `dry_run` flag from `InterpreterSelectQuery`. ... [#6375](https://github.com/ClickHouse/ClickHouse/pull/6375) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Support `ASOF JOIN` with `ON` section. [#6211](https://github.com/ClickHouse/ClickHouse/pull/6211) ([Artem Zuikov](https://github.com/4ertus2)) +* Better support of skip indexes for mutations and replication. Support for `MATERIALIZE/CLEAR INDEX ... IN PARTITION` query. `UPDATE x = x` recalculates all indices that use column `x`. [#5053](https://github.com/ClickHouse/ClickHouse/pull/5053) ([Nikita Vasilev](https://github.com/nikvas0)) +* Allow to `ATTACH` live views (for example, at the server startup) regardless to `allow_experimental_live_view` setting. [#6754](https://github.com/ClickHouse/ClickHouse/pull/6754) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* For stack traces gathered by query profiler, do not include stack frames generated by the query profiler itself. [#6250](https://github.com/ClickHouse/ClickHouse/pull/6250) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Now table functions `values`, `file`, `url`, `hdfs` have support for ALIAS columns. [#6255](https://github.com/ClickHouse/ClickHouse/pull/6255) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Throw an exception if `config.d` file doesn't have the corresponding root element as the config file. [#6123](https://github.com/ClickHouse/ClickHouse/pull/6123) ([dimarub2000](https://github.com/dimarub2000)) +* Print extra info in exception message for `no space left on device`. [#6182](https://github.com/ClickHouse/ClickHouse/issues/6182), [#6252](https://github.com/ClickHouse/ClickHouse/issues/6252) [#6352](https://github.com/ClickHouse/ClickHouse/pull/6352) ([tavplubix](https://github.com/tavplubix)) +* When determining shards of a `Distributed` table to be covered by a read query (for `optimize_skip_unused_shards` = 1) ClickHouse now checks conditions from both `prewhere` and `where` clauses of select statement. [#6521](https://github.com/ClickHouse/ClickHouse/pull/6521) ([Alexander Kazakov](https://github.com/Akazz)) +* Enabled `SIMDJSON` for machines without AVX2 but with SSE 4.2 and PCLMUL instruction set. [#6285](https://github.com/ClickHouse/ClickHouse/issues/6285) [#6320](https://github.com/ClickHouse/ClickHouse/pull/6320) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* ClickHouse can work on filesystems without `O_DIRECT` support (such as ZFS and BtrFS) without additional tuning. [#4449](https://github.com/ClickHouse/ClickHouse/issues/4449) [#6730](https://github.com/ClickHouse/ClickHouse/pull/6730) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Support push down predicate for final subquery. [#6120](https://github.com/ClickHouse/ClickHouse/pull/6120) ([TCeason](https://github.com/TCeason)) [#6162](https://github.com/ClickHouse/ClickHouse/pull/6162) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Better `JOIN ON` keys extraction [#6131](https://github.com/ClickHouse/ClickHouse/pull/6131) ([Artem Zuikov](https://github.com/4ertus2)) +* Upated `SIMDJSON`. [#6285](https://github.com/ClickHouse/ClickHouse/issues/6285). [#6306](https://github.com/ClickHouse/ClickHouse/pull/6306) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Optimize selecting of smallest column for `SELECT count()` query. [#6344](https://github.com/ClickHouse/ClickHouse/pull/6344) ([Amos Bird](https://github.com/amosbird)) +* Added `strict` parameter in `windowFunnel()`. When the `strict` is set, the `windowFunnel()` applies conditions only for the unique values. [#6548](https://github.com/ClickHouse/ClickHouse/pull/6548) ([achimbab](https://github.com/achimbab)) +* Safer interface of `mysqlxx::Pool`. [#6150](https://github.com/ClickHouse/ClickHouse/pull/6150) ([avasiliev](https://github.com/avasiliev)) +* Options line size when executing with `--help` option now corresponds with terminal size. [#6590](https://github.com/ClickHouse/ClickHouse/pull/6590) ([dimarub2000](https://github.com/dimarub2000)) +* Disable "read in order" optimization for aggregation without keys. [#6599](https://github.com/ClickHouse/ClickHouse/pull/6599) ([Anton Popov](https://github.com/CurtizJ)) +* HTTP status code for `INCORRECT_DATA` and `TYPE_MISMATCH` error codes was changed from default `500 Internal Server Error` to `400 Bad Request`. [#6271](https://github.com/ClickHouse/ClickHouse/pull/6271) ([Alexander Rodin](https://github.com/a-rodin)) +* Move Join object from `ExpressionAction` into `AnalyzedJoin`. `ExpressionAnalyzer` and `ExpressionAction` do not know about `Join` class anymore. Its logic is hidden by `AnalyzedJoin` iface. [#6801](https://github.com/ClickHouse/ClickHouse/pull/6801) ([Artem Zuikov](https://github.com/4ertus2)) +* Fixed possible deadlock of distributed queries when one of shards is localhost but the query is sent via network connection. [#6759](https://github.com/ClickHouse/ClickHouse/pull/6759) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Changed semantic of multiple tables `RENAME` to avoid possible deadlocks. [#6757](https://github.com/ClickHouse/ClickHouse/issues/6757). [#6756](https://github.com/ClickHouse/ClickHouse/pull/6756) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Rewritten MySQL compatibility server to prevent loading full packet payload in memory. Decreased memory consumption for each connection to approximately `2 * DBMS_DEFAULT_BUFFER_SIZE` (read/write buffers). [#5811](https://github.com/ClickHouse/ClickHouse/pull/5811) ([Yuriy Baranov](https://github.com/yurriy)) +* Move AST alias interpreting logic out of parser that doesn't have to know anything about query semantics. [#6108](https://github.com/ClickHouse/ClickHouse/pull/6108) ([Artem Zuikov](https://github.com/4ertus2)) +* Slightly more safe parsing of `NamesAndTypesList`. [#6408](https://github.com/ClickHouse/ClickHouse/issues/6408). [#6410](https://github.com/ClickHouse/ClickHouse/pull/6410) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* `clickhouse-copier`: Allow use `where_condition` from config with `partition_key` alias in query for checking partition existence (Earlier it was used only in reading data queries). [#6577](https://github.com/ClickHouse/ClickHouse/pull/6577) ([proller](https://github.com/proller)) +* Added optional message argument in `throwIf`. ([#5772](https://github.com/ClickHouse/ClickHouse/issues/5772)) [#6329](https://github.com/ClickHouse/ClickHouse/pull/6329) ([Vdimir](https://github.com/Vdimir)) +* Server exception got while sending insertion data is now being processed in client as well. [#5891](https://github.com/ClickHouse/ClickHouse/issues/5891) [#6711](https://github.com/ClickHouse/ClickHouse/pull/6711) ([dimarub2000](https://github.com/dimarub2000)) +* Added a metric `DistributedFilesToInsert` that shows the total number of files in filesystem that are selected to send to remote servers by Distributed tables. The number is summed across all shards. [#6600](https://github.com/ClickHouse/ClickHouse/pull/6600) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Move most of JOINs prepare logic from `ExpressionAction/ExpressionAnalyzer` to `AnalyzedJoin`. [#6785](https://github.com/ClickHouse/ClickHouse/pull/6785) ([Artem Zuikov](https://github.com/4ertus2)) +* Fix TSan [warning](https://clickhouse-test-reports.s3.yandex.net/6399/c1c1d1daa98e199e620766f1bd06a5921050a00d/functional_stateful_tests_(thread).html) 'lock-order-inversion'. [#6740](https://github.com/ClickHouse/ClickHouse/pull/6740) ([Vasily Nemkov](https://github.com/Enmk)) +* Better information messages about lack of Linux capabilities. Logging fatal errors with "fatal" level, that will make it easier to find in `system.text_log`. [#6441](https://github.com/ClickHouse/ClickHouse/pull/6441) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* When enable dumping temporary data to the disk to restrict memory usage during `GROUP BY`, `ORDER BY`, it didn't check the free disk space. The fix add a new setting `min_free_disk_space`, when the free disk space it smaller then the threshold, the query will stop and throw `ErrorCodes::NOT_ENOUGH_SPACE`. [#6678](https://github.com/ClickHouse/ClickHouse/pull/6678) ([Weiqing Xu](https://github.com/weiqxu)) [#6691](https://github.com/ClickHouse/ClickHouse/pull/6691) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Removed recursive rwlock by thread. It makes no sense, because threads are reused between queries. `SELECT` query may acquire a lock in one thread, hold a lock from another thread and exit from first thread. In the same time, first thread can be reused by `DROP` query. This will lead to false "Attempt to acquire exclusive lock recursively" messages. [#6771](https://github.com/ClickHouse/ClickHouse/pull/6771) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Split `ExpressionAnalyzer.appendJoin()`. Prepare a place in `ExpressionAnalyzer` for `MergeJoin`. [#6524](https://github.com/ClickHouse/ClickHouse/pull/6524) ([Artem Zuikov](https://github.com/4ertus2)) +* Added `mysql_native_password` authentication plugin to MySQL compatibility server. [#6194](https://github.com/ClickHouse/ClickHouse/pull/6194) ([Yuriy Baranov](https://github.com/yurriy)) +* Less number of `clock_gettime` calls; fixed ABI compatibility between debug/release in `Allocator` (insignificant issue). [#6197](https://github.com/ClickHouse/ClickHouse/pull/6197) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Move `collectUsedColumns` from `ExpressionAnalyzer` to `SyntaxAnalyzer`. `SyntaxAnalyzer` makes `required_source_columns` itself now. [#6416](https://github.com/ClickHouse/ClickHouse/pull/6416) ([Artem Zuikov](https://github.com/4ertus2)) +* Add setting `joined_subquery_requires_alias` to require aliases for subselects and table functions in `FROM` that more than one table is present (i.e. queries with JOINs). [#6733](https://github.com/ClickHouse/ClickHouse/pull/6733) ([Artem Zuikov](https://github.com/4ertus2)) +* Extract `GetAggregatesVisitor` class from `ExpressionAnalyzer`. [#6458](https://github.com/ClickHouse/ClickHouse/pull/6458) ([Artem Zuikov](https://github.com/4ertus2)) +* `system.query_log`: change data type of `type` column to `Enum`. [#6265](https://github.com/ClickHouse/ClickHouse/pull/6265) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) +* Static linking of `sha256_password` authentication plugin. [#6512](https://github.com/ClickHouse/ClickHouse/pull/6512) ([Yuriy Baranov](https://github.com/yurriy)) +* Avoid extra dependency for the setting `compile` to work. In previous versions, the user may get error like `cannot open crti.o`, `unable to find library -lc` etc. [#6309](https://github.com/ClickHouse/ClickHouse/pull/6309) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* More validation of the input that may come from malicious replica. [#6303](https://github.com/ClickHouse/ClickHouse/pull/6303) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Now `clickhouse-obfuscator` file is available in `clickhouse-client` package. In previous versions it was available as `clickhouse obfuscator` (with whitespace). [#5816](https://github.com/ClickHouse/ClickHouse/issues/5816) [#6609](https://github.com/ClickHouse/ClickHouse/pull/6609) ([dimarub2000](https://github.com/dimarub2000)) +* Fixed deadlock when we have at least two queries that read at least two tables in different order and another query that performs DDL operation on one of tables. Fixed another very rare deadlock. [#6764](https://github.com/ClickHouse/ClickHouse/pull/6764) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Added `os_thread_ids` column to `system.processes` and `system.query_log` for better debugging possibilities. [#6763](https://github.com/ClickHouse/ClickHouse/pull/6763) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* A workaround for PHP mysqlnd extension bugs which occur when `sha256_password` is used as a default authentication plugin (described in [#6031](https://github.com/ClickHouse/ClickHouse/issues/6031)). [#6113](https://github.com/ClickHouse/ClickHouse/pull/6113) ([Yuriy Baranov](https://github.com/yurriy)) +* Remove unneeded place with changed nullability columns. [#6693](https://github.com/ClickHouse/ClickHouse/pull/6693) ([Artem Zuikov](https://github.com/4ertus2)) +* Set default value of `queue_max_wait_ms` to zero, because current value (five seconds) makes no sense. There are rare circumstances when this settings has any use. Added settings `replace_running_query_max_wait_ms`, `kafka_max_wait_ms` and `connection_pool_max_wait_ms` for disambiguation. [#6692](https://github.com/ClickHouse/ClickHouse/pull/6692) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Extract `SelectQueryExpressionAnalyzer` from `ExpressionAnalyzer`. Keep the last one for non-select queries. [#6499](https://github.com/ClickHouse/ClickHouse/pull/6499) ([Artem Zuikov](https://github.com/4ertus2)) +* Removed duplicating input and output formats. [#6239](https://github.com/ClickHouse/ClickHouse/pull/6239) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Allow user to override `poll_interval` and `idle_connection_timeout` settings on connection. [#6230](https://github.com/ClickHouse/ClickHouse/pull/6230) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* `MergeTree` now has an additional option `ttl_only_drop_parts` (disabled by default) to avoid partial pruning of parts, so that they dropped completely when all the rows in a part are expired. [#6191](https://github.com/ClickHouse/ClickHouse/pull/6191) ([Sergi Vladykin](https://github.com/svladykin)) +* Type checks for set index functions. Throw exception if function got a wrong type. This fixes fuzz test with UBSan. [#6511](https://github.com/ClickHouse/ClickHouse/pull/6511) ([Nikita Vasilev](https://github.com/nikvas0)) + +#### Performance Improvement +* Optimize queries with `ORDER BY expressions` clause, where `expressions` have coinciding prefix with sorting key in `MergeTree` tables. This optimization is controlled by `optimize_read_in_order` setting. [#6054](https://github.com/ClickHouse/ClickHouse/pull/6054) [#6629](https://github.com/ClickHouse/ClickHouse/pull/6629) ([Anton Popov](https://github.com/CurtizJ)) +* Allow to use multiple threads during parts loading and removal. [#6372](https://github.com/ClickHouse/ClickHouse/issues/6372) [#6074](https://github.com/ClickHouse/ClickHouse/issues/6074) [#6438](https://github.com/ClickHouse/ClickHouse/pull/6438) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Implemented batch variant of updating aggregate function states. It may lead to performance benefits. [#6435](https://github.com/ClickHouse/ClickHouse/pull/6435) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Using `FastOps` library for functions `exp`, `log`, `sigmoid`, `tanh`. FastOps is a fast vector math library from Michael Parakhin (Yandex CTO). Improved performance of `exp` and `log` functions more than 6 times. The functions `exp` and `log` from `Float32` argument will return `Float32` (in previous versions they always return `Float64`). Now `exp(nan)` may return `inf`. The result of `exp` and `log` functions may be not the nearest machine representable number to the true answer. [#6254](https://github.com/ClickHouse/ClickHouse/pull/6254) ([alexey-milovidov](https://github.com/alexey-milovidov)) Using Danila Kutenin variant to make fastops working [#6317](https://github.com/ClickHouse/ClickHouse/pull/6317) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Disable consecutive key optimization for `UInt8/16`. [#6298](https://github.com/ClickHouse/ClickHouse/pull/6298) [#6701](https://github.com/ClickHouse/ClickHouse/pull/6701) ([akuzm](https://github.com/akuzm)) +* Improved performance of `simdjson` library by getting rid of dynamic allocation in `ParsedJson::Iterator`. [#6479](https://github.com/ClickHouse/ClickHouse/pull/6479) ([Vitaly Baranov](https://github.com/vitlibar)) +* Pre-fault pages when allocating memory with `mmap()`. [#6667](https://github.com/ClickHouse/ClickHouse/pull/6667) ([akuzm](https://github.com/akuzm)) +* Fix performance bug in `Decimal` comparison. [#6380](https://github.com/ClickHouse/ClickHouse/pull/6380) ([Artem Zuikov](https://github.com/4ertus2)) + +#### Build/Testing/Packaging Improvement +* Remove Compiler (runtime template instantiation) because we've win over it's performance. [#6646](https://github.com/ClickHouse/ClickHouse/pull/6646) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Added performance test to show degradation of performance in gcc-9 in more isolated way. [#6302](https://github.com/ClickHouse/ClickHouse/pull/6302) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Added table function `numbers_mt`, which is multithreaded version of `numbers`. Updated performance tests with hash functions. [#6554](https://github.com/ClickHouse/ClickHouse/pull/6554) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Comparison mode in `clickhouse-benchmark` [#6220](https://github.com/ClickHouse/ClickHouse/issues/6220) [#6343](https://github.com/ClickHouse/ClickHouse/pull/6343) ([dimarub2000](https://github.com/dimarub2000)) +* Best effort for printing stack traces. Also added `SIGPROF` as a debugging signal to print stack trace of a running thread. [#6529](https://github.com/ClickHouse/ClickHouse/pull/6529) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Every function in its own file, part 10. [#6321](https://github.com/ClickHouse/ClickHouse/pull/6321) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Remove doubled const `TABLE_IS_READ_ONLY`. [#6566](https://github.com/ClickHouse/ClickHouse/pull/6566) ([filimonov](https://github.com/filimonov)) +* Formatting changes for `StringHashMap` PR [#5417](https://github.com/ClickHouse/ClickHouse/issues/5417). [#6700](https://github.com/ClickHouse/ClickHouse/pull/6700) ([akuzm](https://github.com/akuzm)) +* Better subquery for join creation in `ExpressionAnalyzer`. [#6824](https://github.com/ClickHouse/ClickHouse/pull/6824) ([Artem Zuikov](https://github.com/4ertus2)) +* Remove a redundant condition (found by PVS Studio). [#6775](https://github.com/ClickHouse/ClickHouse/pull/6775) ([akuzm](https://github.com/akuzm)) +* Separate the hash table interface for `ReverseIndex`. [#6672](https://github.com/ClickHouse/ClickHouse/pull/6672) ([akuzm](https://github.com/akuzm)) +* Refactoring of settings. [#6689](https://github.com/ClickHouse/ClickHouse/pull/6689) ([alesapin](https://github.com/alesapin)) +* Add comments for `set` index functions. [#6319](https://github.com/ClickHouse/ClickHouse/pull/6319) ([Nikita Vasilev](https://github.com/nikvas0)) +* Increase OOM score in debug version on Linux. [#6152](https://github.com/ClickHouse/ClickHouse/pull/6152) ([akuzm](https://github.com/akuzm)) +* HDFS HA now work in debug build. [#6650](https://github.com/ClickHouse/ClickHouse/pull/6650) ([Weiqing Xu](https://github.com/weiqxu)) +* Added a test to `transform_query_for_external_database`. [#6388](https://github.com/ClickHouse/ClickHouse/pull/6388) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Add test for multiple materialized views for Kafka table. [#6509](https://github.com/ClickHouse/ClickHouse/pull/6509) ([Ivan](https://github.com/abyss7)) +* Make a better build scheme. [#6500](https://github.com/ClickHouse/ClickHouse/pull/6500) ([Ivan](https://github.com/abyss7)) +* Fixed `test_external_dictionaries` integration in case it was executed under non root user. [#6507](https://github.com/ClickHouse/ClickHouse/pull/6507) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* The bug reproduces when total size of written packets exceeds `DBMS_DEFAULT_BUFFER_SIZE`. [#6204](https://github.com/ClickHouse/ClickHouse/pull/6204) ([Yuriy Baranov](https://github.com/yurriy)) +* Added a test for `RENAME` table race condition [#6752](https://github.com/ClickHouse/ClickHouse/pull/6752) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Avoid data race on Settings in `KILL QUERY`. [#6753](https://github.com/ClickHouse/ClickHouse/pull/6753) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Add integration test for handling errors by a cache dictionary. [#6755](https://github.com/ClickHouse/ClickHouse/pull/6755) ([Vitaly Baranov](https://github.com/vitlibar)) +* Disable parsing of ELF object files on Mac OS, because it makes no sense. [#6578](https://github.com/ClickHouse/ClickHouse/pull/6578) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Attempt to make changelog generator better. [#6327](https://github.com/ClickHouse/ClickHouse/pull/6327) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Adding `-Wshadow` switch to the GCC. [#6325](https://github.com/ClickHouse/ClickHouse/pull/6325) ([kreuzerkrieg](https://github.com/kreuzerkrieg)) +* Removed obsolete code for `mimalloc` support. [#6715](https://github.com/ClickHouse/ClickHouse/pull/6715) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* `zlib-ng` determines x86 capabilities and saves this info to global variables. This is done in defalteInit call, which may be made by different threads simultaneously. To avoid multithreaded writes, do it on library startup. [#6141](https://github.com/ClickHouse/ClickHouse/pull/6141) ([akuzm](https://github.com/akuzm)) +* Regression test for a bug which in join which was fixed in [#5192](https://github.com/ClickHouse/ClickHouse/issues/5192). [#6147](https://github.com/ClickHouse/ClickHouse/pull/6147) ([Bakhtiyor Ruziev](https://github.com/theruziev)) +* Fixed MSan report. [#6144](https://github.com/ClickHouse/ClickHouse/pull/6144) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix flapping TTL test. [#6782](https://github.com/ClickHouse/ClickHouse/pull/6782) ([Anton Popov](https://github.com/CurtizJ)) +* Fixed false data race in `MergeTreeDataPart::is_frozen` field. [#6583](https://github.com/ClickHouse/ClickHouse/pull/6583) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed timeouts in fuzz test. In previous version, it managed to find false hangup in query `SELECT * FROM numbers_mt(gccMurmurHash(''))`. [#6582](https://github.com/ClickHouse/ClickHouse/pull/6582) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Added debug checks to `static_cast` of columns. [#6581](https://github.com/ClickHouse/ClickHouse/pull/6581) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Support for Oracle Linux in official RPM packages. [#6356](https://github.com/ClickHouse/ClickHouse/issues/6356) [#6585](https://github.com/ClickHouse/ClickHouse/pull/6585) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Changed json perftests from `once` to `loop` type. [#6536](https://github.com/ClickHouse/ClickHouse/pull/6536) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* `odbc-bridge.cpp` defines `main()` so it should not be included in `clickhouse-lib`. [#6538](https://github.com/ClickHouse/ClickHouse/pull/6538) ([Orivej Desh](https://github.com/orivej)) +* Test for crash in `FULL|RIGHT JOIN` with nulls in right table's keys. [#6362](https://github.com/ClickHouse/ClickHouse/pull/6362) ([Artem Zuikov](https://github.com/4ertus2)) +* Added a test for the limit on expansion of aliases just in case. [#6442](https://github.com/ClickHouse/ClickHouse/pull/6442) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Switched from `boost::filesystem` to `std::filesystem` where appropriate. [#6253](https://github.com/ClickHouse/ClickHouse/pull/6253) [#6385](https://github.com/ClickHouse/ClickHouse/pull/6385) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Added RPM packages to website. [#6251](https://github.com/ClickHouse/ClickHouse/pull/6251) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Add a test for fixed `Unknown identifier` exception in `IN` section. [#6708](https://github.com/ClickHouse/ClickHouse/pull/6708) ([Artem Zuikov](https://github.com/4ertus2)) +* Simplify `shared_ptr_helper` because people facing difficulties understanding it. [#6675](https://github.com/ClickHouse/ClickHouse/pull/6675) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Added performance tests for fixed Gorilla and DoubleDelta codec. [#6179](https://github.com/ClickHouse/ClickHouse/pull/6179) ([Vasily Nemkov](https://github.com/Enmk)) +* Split the integration test `test_dictionaries` into 4 separate tests. [#6776](https://github.com/ClickHouse/ClickHouse/pull/6776) ([Vitaly Baranov](https://github.com/vitlibar)) +* Fix PVS-Studio warning in `PipelineExecutor`. [#6777](https://github.com/ClickHouse/ClickHouse/pull/6777) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Allow to use `library` dictionary source with ASan. [#6482](https://github.com/ClickHouse/ClickHouse/pull/6482) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Added option to generate changelog from a list of PRs. [#6350](https://github.com/ClickHouse/ClickHouse/pull/6350) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Lock the `TinyLog` storage when reading. [#6226](https://github.com/ClickHouse/ClickHouse/pull/6226) ([akuzm](https://github.com/akuzm)) +* Check for broken symlinks in CI. [#6634](https://github.com/ClickHouse/ClickHouse/pull/6634) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Increase timeout for "stack overflow" test because it may take a long time in debug build. [#6637](https://github.com/ClickHouse/ClickHouse/pull/6637) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Added a check for double whitespaces. [#6643](https://github.com/ClickHouse/ClickHouse/pull/6643) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix `new/delete` memory tracking when build with sanitizers. Tracking is not clear. It only prevents memory limit exceptions in tests. [#6450](https://github.com/ClickHouse/ClickHouse/pull/6450) ([Artem Zuikov](https://github.com/4ertus2)) +* Enable back the check of undefined symbols while linking. [#6453](https://github.com/ClickHouse/ClickHouse/pull/6453) ([Ivan](https://github.com/abyss7)) +* Avoid rebuilding `hyperscan` every day. [#6307](https://github.com/ClickHouse/ClickHouse/pull/6307) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed UBSan report in `ProtobufWriter`. [#6163](https://github.com/ClickHouse/ClickHouse/pull/6163) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Don't allow to use query profiler with sanitizers because it is not compatible. [#6769](https://github.com/ClickHouse/ClickHouse/pull/6769) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Add test for reloading a dictionary after fail by timer. [#6114](https://github.com/ClickHouse/ClickHouse/pull/6114) ([Vitaly Baranov](https://github.com/vitlibar)) +* Fix inconsistency in `PipelineExecutor::prepareProcessor` argument type. [#6494](https://github.com/ClickHouse/ClickHouse/pull/6494) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Added a test for bad URIs. [#6493](https://github.com/ClickHouse/ClickHouse/pull/6493) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Added more checks to `CAST` function. This should get more information about segmentation fault in fuzzy test. [#6346](https://github.com/ClickHouse/ClickHouse/pull/6346) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Added `gcc-9` support to `docker/builder` container that builds image locally. [#6333](https://github.com/ClickHouse/ClickHouse/pull/6333) ([Gleb Novikov](https://github.com/NanoBjorn)) +* Test for primary key with `LowCardinality(String)`. [#5044](https://github.com/ClickHouse/ClickHouse/issues/5044) [#6219](https://github.com/ClickHouse/ClickHouse/pull/6219) ([dimarub2000](https://github.com/dimarub2000)) +* Fixed tests affected by slow stack traces printing. [#6315](https://github.com/ClickHouse/ClickHouse/pull/6315) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Add a test case for crash in `groupUniqArray` fixed in [#6029](https://github.com/ClickHouse/ClickHouse/pull/6029). [#4402](https://github.com/ClickHouse/ClickHouse/issues/4402) [#6129](https://github.com/ClickHouse/ClickHouse/pull/6129) ([akuzm](https://github.com/akuzm)) +* Fixed indices mutations tests. [#6645](https://github.com/ClickHouse/ClickHouse/pull/6645) ([Nikita Vasilev](https://github.com/nikvas0)) +* In performance test, do not read query log for queries we didn't run. [#6427](https://github.com/ClickHouse/ClickHouse/pull/6427) ([akuzm](https://github.com/akuzm)) +* Materialized view now could be created with any low cardinality types regardless to the setting about suspicious low cardinality types. [#6428](https://github.com/ClickHouse/ClickHouse/pull/6428) ([Olga Khvostikova](https://github.com/stavrolia)) +* Updated tests for `send_logs_level` setting. [#6207](https://github.com/ClickHouse/ClickHouse/pull/6207) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Fix build under gcc-8.2. [#6196](https://github.com/ClickHouse/ClickHouse/pull/6196) ([Max Akhmedov](https://github.com/zlobober)) +* Fix build with internal libc++. [#6724](https://github.com/ClickHouse/ClickHouse/pull/6724) ([Ivan](https://github.com/abyss7)) +* Fix shared build with `rdkafka` library [#6101](https://github.com/ClickHouse/ClickHouse/pull/6101) ([Ivan](https://github.com/abyss7)) +* Fixes for Mac OS build (incomplete). [#6390](https://github.com/ClickHouse/ClickHouse/pull/6390) ([alexey-milovidov](https://github.com/alexey-milovidov)) [#6429](https://github.com/ClickHouse/ClickHouse/pull/6429) ([alex-zaitsev](https://github.com/alex-zaitsev)) +* Fix "splitted" build. [#6618](https://github.com/ClickHouse/ClickHouse/pull/6618) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Other build fixes: [#6186](https://github.com/ClickHouse/ClickHouse/pull/6186) ([Amos Bird](https://github.com/amosbird)) [#6486](https://github.com/ClickHouse/ClickHouse/pull/6486) [#6348](https://github.com/ClickHouse/ClickHouse/pull/6348) ([vxider](https://github.com/Vxider)) [#6744](https://github.com/ClickHouse/ClickHouse/pull/6744) ([Ivan](https://github.com/abyss7)) [#6016](https://github.com/ClickHouse/ClickHouse/pull/6016) [#6421](https://github.com/ClickHouse/ClickHouse/pull/6421) [#6491](https://github.com/ClickHouse/ClickHouse/pull/6491) ([proller](https://github.com/proller)) + +#### Backward Incompatible Change +* Removed rarely used table function `catBoostPool` and storage `CatBoostPool`. If you have used this table function, please write email to `clickhouse-feedback@yandex-team.com`. Note that CatBoost integration remains and will be supported. [#6279](https://github.com/ClickHouse/ClickHouse/pull/6279) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Disable `ANY RIGHT JOIN` and `ANY FULL JOIN` by default. Set `any_join_distinct_right_table_keys` setting to enable them. [#5126](https://github.com/ClickHouse/ClickHouse/issues/5126) [#6351](https://github.com/ClickHouse/ClickHouse/pull/6351) ([Artem Zuikov](https://github.com/4ertus2)) + +## ClickHouse release 19.13 +### ClickHouse release 19.13.6.51, 2019-10-02 + +#### Bug Fix +* This release also contains all bug fixes from 19.11.12.69. + +### ClickHouse release 19.13.5.44, 2019-09-20 + +#### Bug Fix +* This release also contains all bug fixes from 19.14.6.12. +* Fixed possible inconsistent state of table while executing `DROP` query for replicated table while zookeeper is not accessible. [#6045](https://github.com/ClickHouse/ClickHouse/issues/6045) [#6413](https://github.com/ClickHouse/ClickHouse/pull/6413) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) +* Fix for data race in StorageMerge [#6717](https://github.com/ClickHouse/ClickHouse/pull/6717) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix bug introduced in query profiler which leads to endless recv from socket. [#6386](https://github.com/ClickHouse/ClickHouse/pull/6386) ([alesapin](https://github.com/alesapin)) +* Fix excessive CPU usage while executing `JSONExtractRaw` function over a boolean value. [#6208](https://github.com/ClickHouse/ClickHouse/pull/6208) ([Vitaly Baranov](https://github.com/vitlibar)) +* Fixes the regression while pushing to materialized view. [#6415](https://github.com/ClickHouse/ClickHouse/pull/6415) ([Ivan](https://github.com/abyss7)) +* Table function `url` had the vulnerability allowed the attacker to inject arbitrary HTTP headers in the request. This issue was found by [Nikita Tikhomirov](https://github.com/NSTikhomirov). [#6466](https://github.com/ClickHouse/ClickHouse/pull/6466) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix useless `AST` check in Set index. [#6510](https://github.com/ClickHouse/ClickHouse/issues/6510) [#6651](https://github.com/ClickHouse/ClickHouse/pull/6651) ([Nikita Vasilev](https://github.com/nikvas0)) +* Fixed parsing of `AggregateFunction` values embedded in query. [#6575](https://github.com/ClickHouse/ClickHouse/issues/6575) [#6773](https://github.com/ClickHouse/ClickHouse/pull/6773) ([Zhichang Yu](https://github.com/yuzhichang)) +* Fixed wrong behaviour of `trim` functions family. [#6647](https://github.com/ClickHouse/ClickHouse/pull/6647) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +### ClickHouse release 19.13.4.32, 2019-09-10 + +#### Bug Fix +* This release also contains all bug security fixes from 19.11.9.52 and 19.11.10.54. +* Fixed data race in `system.parts` table and `ALTER` query. [#6245](https://github.com/ClickHouse/ClickHouse/issues/6245) [#6513](https://github.com/ClickHouse/ClickHouse/pull/6513) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed mismatched header in streams happened in case of reading from empty distributed table with sample and prewhere. [#6167](https://github.com/ClickHouse/ClickHouse/issues/6167) ([Lixiang Qian](https://github.com/fancyqlx)) [#6823](https://github.com/ClickHouse/ClickHouse/pull/6823) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Fixed crash when using `IN` clause with a subquery with a tuple. [#6125](https://github.com/ClickHouse/ClickHouse/issues/6125) [#6550](https://github.com/ClickHouse/ClickHouse/pull/6550) ([tavplubix](https://github.com/tavplubix)) +* Fix case with same column names in `GLOBAL JOIN ON` section. [#6181](https://github.com/ClickHouse/ClickHouse/pull/6181) ([Artem Zuikov](https://github.com/4ertus2)) +* Fix crash when casting types to `Decimal` that do not support it. Throw exception instead. [#6297](https://github.com/ClickHouse/ClickHouse/pull/6297) ([Artem Zuikov](https://github.com/4ertus2)) +* Fixed crash in `extractAll()` function. [#6644](https://github.com/ClickHouse/ClickHouse/pull/6644) ([Artem Zuikov](https://github.com/4ertus2)) +* Query transformation for `MySQL`, `ODBC`, `JDBC` table functions now works properly for `SELECT WHERE` queries with multiple `AND` expressions. [#6381](https://github.com/ClickHouse/ClickHouse/issues/6381) [#6676](https://github.com/ClickHouse/ClickHouse/pull/6676) ([dimarub2000](https://github.com/dimarub2000)) +* Added previous declaration checks for MySQL 8 integration. [#6569](https://github.com/ClickHouse/ClickHouse/pull/6569) ([Rafael David Tinoco](https://github.com/rafaeldtinoco)) + +#### Security Fix +* Fix two vulnerabilities in codecs in decompression phase (malicious user can fabricate compressed data that will lead to buffer overflow in decompression). [#6670](https://github.com/ClickHouse/ClickHouse/pull/6670) ([Artem Zuikov](https://github.com/4ertus2)) + + +### ClickHouse release 19.13.3.26, 2019-08-22 + +#### Bug Fix +* Fix `ALTER TABLE ... UPDATE` query for tables with `enable_mixed_granularity_parts=1`. [#6543](https://github.com/ClickHouse/ClickHouse/pull/6543) ([alesapin](https://github.com/alesapin)) +* Fix NPE when using IN clause with a subquery with a tuple. [#6125](https://github.com/ClickHouse/ClickHouse/issues/6125) [#6550](https://github.com/ClickHouse/ClickHouse/pull/6550) ([tavplubix](https://github.com/tavplubix)) +* Fixed an issue that if a stale replica becomes alive, it may still have data parts that were removed by DROP PARTITION. [#6522](https://github.com/ClickHouse/ClickHouse/issues/6522) [#6523](https://github.com/ClickHouse/ClickHouse/pull/6523) ([tavplubix](https://github.com/tavplubix)) +* Fixed issue with parsing CSV [#6426](https://github.com/ClickHouse/ClickHouse/issues/6426) [#6559](https://github.com/ClickHouse/ClickHouse/pull/6559) ([tavplubix](https://github.com/tavplubix)) +* Fixed data race in system.parts table and ALTER query. This fixes [#6245](https://github.com/ClickHouse/ClickHouse/issues/6245). [#6513](https://github.com/ClickHouse/ClickHouse/pull/6513) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed wrong code in mutations that may lead to memory corruption. Fixed segfault with read of address `0x14c0` that may happed due to concurrent `DROP TABLE` and `SELECT` from `system.parts` or `system.parts_columns`. Fixed race condition in preparation of mutation queries. Fixed deadlock caused by `OPTIMIZE` of Replicated tables and concurrent modification operations like ALTERs. [#6514](https://github.com/ClickHouse/ClickHouse/pull/6514) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed possible data loss after `ALTER DELETE` query on table with skipping index. [#6224](https://github.com/ClickHouse/ClickHouse/issues/6224) [#6282](https://github.com/ClickHouse/ClickHouse/pull/6282) ([Nikita Vasilev](https://github.com/nikvas0)) + +#### Security Fix +* If the attacker has write access to ZooKeeper and is able to run custom server available from the network where ClickHouse run, it can create custom-built malicious server that will act as ClickHouse replica and register it in ZooKeeper. When another replica will fetch data part from malicious replica, it can force clickhouse-server to write to arbitrary path on filesystem. Found by Eldar Zaitov, information security team at Yandex. [#6247](https://github.com/ClickHouse/ClickHouse/pull/6247) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +### ClickHouse release 19.13.2.19, 2019-08-14 + +#### New Feature +* Sampling profiler on query level. [Example](https://gist.github.com/alexey-milovidov/92758583dd41c24c360fdb8d6a4da194). [#4247](https://github.com/ClickHouse/ClickHouse/issues/4247) ([laplab](https://github.com/laplab)) [#6124](https://github.com/ClickHouse/ClickHouse/pull/6124) ([alexey-milovidov](https://github.com/alexey-milovidov)) [#6250](https://github.com/ClickHouse/ClickHouse/pull/6250) [#6283](https://github.com/ClickHouse/ClickHouse/pull/6283) [#6386](https://github.com/ClickHouse/ClickHouse/pull/6386) +* Allow to specify a list of columns with `COLUMNS('regexp')` expression that works like a more sophisticated variant of `*` asterisk. [#5951](https://github.com/ClickHouse/ClickHouse/pull/5951) ([mfridental](https://github.com/mfridental)), ([alexey-milovidov](https://github.com/alexey-milovidov)) +* `CREATE TABLE AS table_function()` is now possible [#6057](https://github.com/ClickHouse/ClickHouse/pull/6057) ([dimarub2000](https://github.com/dimarub2000)) +* Adam optimizer for stochastic gradient descent is used by default in `stochasticLinearRegression()` and `stochasticLogisticRegression()` aggregate functions, because it shows good quality without almost any tuning. [#6000](https://github.com/ClickHouse/ClickHouse/pull/6000) ([Quid37](https://github.com/Quid37)) +* Added functions for working with the сustom week number [#5212](https://github.com/ClickHouse/ClickHouse/pull/5212) ([Andy Yang](https://github.com/andyyzh)) +* `RENAME` queries now work with all storages. [#5953](https://github.com/ClickHouse/ClickHouse/pull/5953) ([Ivan](https://github.com/abyss7)) +* Now client receive logs from server with any desired level by setting `send_logs_level` regardless to the log level specified in server settings. [#5964](https://github.com/ClickHouse/ClickHouse/pull/5964) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) + +#### Backward Incompatible Change +* The setting `input_format_defaults_for_omitted_fields` is enabled by default. Inserts in Distributed tables need this setting to be the same on cluster (you need to set it before rolling update). It enables calculation of complex default expressions for omitted fields in `JSONEachRow` and `CSV*` formats. It should be the expected behavior but may lead to negligible performance difference. [#6043](https://github.com/ClickHouse/ClickHouse/pull/6043) ([Artem Zuikov](https://github.com/4ertus2)), [#5625](https://github.com/ClickHouse/ClickHouse/pull/5625) ([akuzm](https://github.com/akuzm)) + +#### Experimental features +* New query processing pipeline. Use `experimental_use_processors=1` option to enable it. Use for your own trouble. [#4914](https://github.com/ClickHouse/ClickHouse/pull/4914) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) + +#### Bug Fix +* Kafka integration has been fixed in this version. +* Fixed `DoubleDelta` encoding of `Int64` for large `DoubleDelta` values, improved `DoubleDelta` encoding for random data for `Int32`. [#5998](https://github.com/ClickHouse/ClickHouse/pull/5998) ([Vasily Nemkov](https://github.com/Enmk)) +* Fixed overestimation of `max_rows_to_read` if the setting `merge_tree_uniform_read_distribution` is set to 0. [#6019](https://github.com/ClickHouse/ClickHouse/pull/6019) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +#### Improvement +* Throws an exception if `config.d` file doesn't have the corresponding root element as the config file [#6123](https://github.com/ClickHouse/ClickHouse/pull/6123) ([dimarub2000](https://github.com/dimarub2000)) + +#### Performance Improvement +* Optimize `count()`. Now it uses the smallest column (if possible). [#6028](https://github.com/ClickHouse/ClickHouse/pull/6028) ([Amos Bird](https://github.com/amosbird)) + +#### Build/Testing/Packaging Improvement +* Report memory usage in performance tests. [#5899](https://github.com/ClickHouse/ClickHouse/pull/5899) ([akuzm](https://github.com/akuzm)) +* Fix build with external `libcxx` [#6010](https://github.com/ClickHouse/ClickHouse/pull/6010) ([Ivan](https://github.com/abyss7)) +* Fix shared build with `rdkafka` library [#6101](https://github.com/ClickHouse/ClickHouse/pull/6101) ([Ivan](https://github.com/abyss7)) + +## ClickHouse release 19.11 + +### ClickHouse release 19.11.13.74, 2019-11-01 + +#### Bug Fix +* Fixed rare crash in `ALTER MODIFY COLUMN` and vertical merge when one of merged/altered parts is empty (0 rows). [#6780](https://github.com/ClickHouse/ClickHouse/pull/6780) ([alesapin](https://github.com/alesapin)) +* Manual update of `SIMDJSON`. This fixes possible flooding of stderr files with bogus json diagnostic messages. [#7548](https://github.com/ClickHouse/ClickHouse/pull/7548) ([Alexander Kazakov](https://github.com/Akazz)) +* Fixed bug with `mrk` file extension for mutations ([alesapin](https://github.com/alesapin)) + +### ClickHouse release 19.11.12.69, 2019-10-02 + +#### Bug Fix +* Fixed performance degradation of index analysis on complex keys on large tables. This fixes [#6924](https://github.com/ClickHouse/ClickHouse/issues/6924). [#7075](https://github.com/ClickHouse/ClickHouse/pull/7075) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Avoid rare SIGSEGV while sending data in tables with Distributed engine (`Failed to send batch: file with index XXXXX is absent`). [#7032](https://github.com/ClickHouse/ClickHouse/pull/7032) ([Azat Khuzhin](https://github.com/azat)) +* Fix `Unknown identifier` with multiple joins. This fixes [#5254](https://github.com/ClickHouse/ClickHouse/issues/5254). [#7022](https://github.com/ClickHouse/ClickHouse/pull/7022) ([Artem Zuikov](https://github.com/4ertus2)) + +### ClickHouse release 19.11.11.57, 2019-09-13 +* Fix logical error causing segfaults when selecting from Kafka empty topic. [#6902](https://github.com/ClickHouse/ClickHouse/issues/6902) [#6909](https://github.com/ClickHouse/ClickHouse/pull/6909) ([Ivan](https://github.com/abyss7)) +* Fix for function `АrrayEnumerateUniqRanked` with empty arrays in params. [#6928](https://github.com/ClickHouse/ClickHouse/pull/6928) ([proller](https://github.com/proller)) + +### ClickHouse release 19.11.10.54, 2019-09-10 + +#### Bug Fix +* Do store offsets for Kafka messages manually to be able to commit them all at once for all partitions. Fixes potential duplication in "one consumer - many partitions" scenario. [#6872](https://github.com/ClickHouse/ClickHouse/pull/6872) ([Ivan](https://github.com/abyss7)) + +### ClickHouse release 19.11.9.52, 2019-09-6 +* Improve error handling in cache dictionaries. [#6737](https://github.com/ClickHouse/ClickHouse/pull/6737) ([Vitaly Baranov](https://github.com/vitlibar)) +* Fixed bug in function `arrayEnumerateUniqRanked`. [#6779](https://github.com/ClickHouse/ClickHouse/pull/6779) ([proller](https://github.com/proller)) +* Fix `JSONExtract` function while extracting a `Tuple` from JSON. [#6718](https://github.com/ClickHouse/ClickHouse/pull/6718) ([Vitaly Baranov](https://github.com/vitlibar)) +* Fixed possible data loss after `ALTER DELETE` query on table with skipping index. [#6224](https://github.com/ClickHouse/ClickHouse/issues/6224) [#6282](https://github.com/ClickHouse/ClickHouse/pull/6282) ([Nikita Vasilev](https://github.com/nikvas0)) +* Fixed performance test. [#6392](https://github.com/ClickHouse/ClickHouse/pull/6392) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Parquet: Fix reading boolean columns. [#6579](https://github.com/ClickHouse/ClickHouse/pull/6579) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed wrong behaviour of `nullIf` function for constant arguments. [#6518](https://github.com/ClickHouse/ClickHouse/pull/6518) ([Guillaume Tassery](https://github.com/YiuRULE)) [#6580](https://github.com/ClickHouse/ClickHouse/pull/6580) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix Kafka messages duplication problem on normal server restart. [#6597](https://github.com/ClickHouse/ClickHouse/pull/6597) ([Ivan](https://github.com/abyss7)) +* Fixed an issue when long `ALTER UPDATE` or `ALTER DELETE` may prevent regular merges to run. Prevent mutations from executing if there is no enough free threads available. [#6502](https://github.com/ClickHouse/ClickHouse/issues/6502) [#6617](https://github.com/ClickHouse/ClickHouse/pull/6617) ([tavplubix](https://github.com/tavplubix)) +* Fixed error with processing "timezone" in server configuration file. [#6709](https://github.com/ClickHouse/ClickHouse/pull/6709) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix kafka tests. [#6805](https://github.com/ClickHouse/ClickHouse/pull/6805) ([Ivan](https://github.com/abyss7)) + +#### Security Fix +* If the attacker has write access to ZooKeeper and is able to run custom server available from the network where ClickHouse runs, it can create custom-built malicious server that will act as ClickHouse replica and register it in ZooKeeper. When another replica will fetch data part from malicious replica, it can force clickhouse-server to write to arbitrary path on filesystem. Found by Eldar Zaitov, information security team at Yandex. [#6247](https://github.com/ClickHouse/ClickHouse/pull/6247) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +### ClickHouse release 19.11.8.46, 2019-08-22 + +#### Bug Fix +* Fix `ALTER TABLE ... UPDATE` query for tables with `enable_mixed_granularity_parts=1`. [#6543](https://github.com/ClickHouse/ClickHouse/pull/6543) ([alesapin](https://github.com/alesapin)) +* Fix NPE when using IN clause with a subquery with a tuple. [#6125](https://github.com/ClickHouse/ClickHouse/issues/6125) [#6550](https://github.com/ClickHouse/ClickHouse/pull/6550) ([tavplubix](https://github.com/tavplubix)) +* Fixed an issue that if a stale replica becomes alive, it may still have data parts that were removed by DROP PARTITION. [#6522](https://github.com/ClickHouse/ClickHouse/issues/6522) [#6523](https://github.com/ClickHouse/ClickHouse/pull/6523) ([tavplubix](https://github.com/tavplubix)) +* Fixed issue with parsing CSV [#6426](https://github.com/ClickHouse/ClickHouse/issues/6426) [#6559](https://github.com/ClickHouse/ClickHouse/pull/6559) ([tavplubix](https://github.com/tavplubix)) +* Fixed data race in system.parts table and ALTER query. This fixes [#6245](https://github.com/ClickHouse/ClickHouse/issues/6245). [#6513](https://github.com/ClickHouse/ClickHouse/pull/6513) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed wrong code in mutations that may lead to memory corruption. Fixed segfault with read of address `0x14c0` that may happed due to concurrent `DROP TABLE` and `SELECT` from `system.parts` or `system.parts_columns`. Fixed race condition in preparation of mutation queries. Fixed deadlock caused by `OPTIMIZE` of Replicated tables and concurrent modification operations like ALTERs. [#6514](https://github.com/ClickHouse/ClickHouse/pull/6514) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +### ClickHouse release 19.11.7.40, 2019-08-14 + +#### Bug fix +* Kafka integration has been fixed in this version. +* Fix segfault when using `arrayReduce` for constant arguments. [#6326](https://github.com/ClickHouse/ClickHouse/pull/6326) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed `toFloat()` monotonicity. [#6374](https://github.com/ClickHouse/ClickHouse/pull/6374) ([dimarub2000](https://github.com/dimarub2000)) +* Fix segfault with enabled `optimize_skip_unused_shards` and missing sharding key. [#6384](https://github.com/ClickHouse/ClickHouse/pull/6384) ([CurtizJ](https://github.com/CurtizJ)) +* Fixed logic of `arrayEnumerateUniqRanked` function. [#6423](https://github.com/ClickHouse/ClickHouse/pull/6423) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Removed extra verbose logging from MySQL handler. [#6389](https://github.com/ClickHouse/ClickHouse/pull/6389) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix wrong behavior and possible segfaults in `topK` and `topKWeighted` aggregated functions. [#6404](https://github.com/ClickHouse/ClickHouse/pull/6404) ([CurtizJ](https://github.com/CurtizJ)) +* Do not expose virtual columns in `system.columns` table. This is required for backward compatibility. [#6406](https://github.com/ClickHouse/ClickHouse/pull/6406) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix bug with memory allocation for string fields in complex key cache dictionary. [#6447](https://github.com/ClickHouse/ClickHouse/pull/6447) ([alesapin](https://github.com/alesapin)) +* Fix bug with enabling adaptive granularity when creating new replica for `Replicated*MergeTree` table. [#6452](https://github.com/ClickHouse/ClickHouse/pull/6452) ([alesapin](https://github.com/alesapin)) +* Fix infinite loop when reading Kafka messages. [#6354](https://github.com/ClickHouse/ClickHouse/pull/6354) ([abyss7](https://github.com/abyss7)) +* Fixed the possibility of a fabricated query to cause server crash due to stack overflow in SQL parser and possibility of stack overflow in `Merge` and `Distributed` tables [#6433](https://github.com/ClickHouse/ClickHouse/pull/6433) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed Gorilla encoding error on small sequences. [#6444](https://github.com/ClickHouse/ClickHouse/pull/6444) ([Enmk](https://github.com/Enmk)) + +#### Improvement +* Allow user to override `poll_interval` and `idle_connection_timeout` settings on connection. [#6230](https://github.com/ClickHouse/ClickHouse/pull/6230) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +### ClickHouse release 19.11.5.28, 2019-08-05 + +#### Bug fix +* Fixed the possibility of hanging queries when server is overloaded. [#6301](https://github.com/ClickHouse/ClickHouse/pull/6301) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix FPE in yandexConsistentHash function. This fixes [#6304](https://github.com/ClickHouse/ClickHouse/issues/6304). [#6126](https://github.com/ClickHouse/ClickHouse/pull/6126) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed bug in conversion of `LowCardinality` types in `AggregateFunctionFactory`. This fixes [#6257](https://github.com/ClickHouse/ClickHouse/issues/6257). [#6281](https://github.com/ClickHouse/ClickHouse/pull/6281) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Fix parsing of `bool` settings from `true` and `false` strings in configuration files. [#6278](https://github.com/ClickHouse/ClickHouse/pull/6278) ([alesapin](https://github.com/alesapin)) +* Fix rare bug with incompatible stream headers in queries to `Distributed` table over `MergeTree` table when part of `WHERE` moves to `PREWHERE`. [#6236](https://github.com/ClickHouse/ClickHouse/pull/6236) ([alesapin](https://github.com/alesapin)) +* Fixed overflow in integer division of signed type to unsigned type. This fixes [#6214](https://github.com/ClickHouse/ClickHouse/issues/6214). [#6233](https://github.com/ClickHouse/ClickHouse/pull/6233) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +#### Backward Incompatible Change +* `Kafka` still broken. + +### ClickHouse release 19.11.4.24, 2019-08-01 + +#### Bug Fix +* Fix bug with writing secondary indices marks with adaptive granularity. [#6126](https://github.com/ClickHouse/ClickHouse/pull/6126) ([alesapin](https://github.com/alesapin)) +* Fix `WITH ROLLUP` and `WITH CUBE` modifiers of `GROUP BY` with two-level aggregation. [#6225](https://github.com/ClickHouse/ClickHouse/pull/6225) ([Anton Popov](https://github.com/CurtizJ)) +* Fixed hang in `JSONExtractRaw` function. Fixed [#6195](https://github.com/ClickHouse/ClickHouse/issues/6195) [#6198](https://github.com/ClickHouse/ClickHouse/pull/6198) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix segfault in ExternalLoader::reloadOutdated(). [#6082](https://github.com/ClickHouse/ClickHouse/pull/6082) ([Vitaly Baranov](https://github.com/vitlibar)) +* Fixed the case when server may close listening sockets but not shutdown and continue serving remaining queries. You may end up with two running clickhouse-server processes. Sometimes, the server may return an error `bad_function_call` for remaining queries. [#6231](https://github.com/ClickHouse/ClickHouse/pull/6231) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed useless and incorrect condition on update field for initial loading of external dictionaries via ODBC, MySQL, ClickHouse and HTTP. This fixes [#6069](https://github.com/ClickHouse/ClickHouse/issues/6069) [#6083](https://github.com/ClickHouse/ClickHouse/pull/6083) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed irrelevant exception in cast of `LowCardinality(Nullable)` to not-Nullable column in case if it doesn't contain Nulls (e.g. in query like `SELECT CAST(CAST('Hello' AS LowCardinality(Nullable(String))) AS String)`. [#6094](https://github.com/ClickHouse/ClickHouse/issues/6094) [#6119](https://github.com/ClickHouse/ClickHouse/pull/6119) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Fix non-deterministic result of "uniq" aggregate function in extreme rare cases. The bug was present in all ClickHouse versions. [#6058](https://github.com/ClickHouse/ClickHouse/pull/6058) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Segfault when we set a little bit too high CIDR on the function `IPv6CIDRToRange`. [#6068](https://github.com/ClickHouse/ClickHouse/pull/6068) ([Guillaume Tassery](https://github.com/YiuRULE)) +* Fixed small memory leak when server throw many exceptions from many different contexts. [#6144](https://github.com/ClickHouse/ClickHouse/pull/6144) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix the situation when consumer got paused before subscription and not resumed afterwards. [#6075](https://github.com/ClickHouse/ClickHouse/pull/6075) ([Ivan](https://github.com/abyss7)) Note that Kafka is broken in this version. +* Clearing the Kafka data buffer from the previous read operation that was completed with an error [#6026](https://github.com/ClickHouse/ClickHouse/pull/6026) ([Nikolay](https://github.com/bopohaa)) Note that Kafka is broken in this version. +* Since `StorageMergeTree::background_task_handle` is initialized in `startup()` the `MergeTreeBlockOutputStream::write()` may try to use it before initialization. Just check if it is initialized. [#6080](https://github.com/ClickHouse/ClickHouse/pull/6080) ([Ivan](https://github.com/abyss7)) + +#### Build/Testing/Packaging Improvement +* Added official `rpm` packages. [#5740](https://github.com/ClickHouse/ClickHouse/pull/5740) ([proller](https://github.com/proller)) ([alesapin](https://github.com/alesapin)) +* Add an ability to build `.rpm` and `.tgz` packages with `packager` script. [#5769](https://github.com/ClickHouse/ClickHouse/pull/5769) ([alesapin](https://github.com/alesapin)) +* Fixes for "Arcadia" build system. [#6223](https://github.com/ClickHouse/ClickHouse/pull/6223) ([proller](https://github.com/proller)) + +#### Backward Incompatible Change +* `Kafka` is broken in this version. + + +### ClickHouse release 19.11.3.11, 2019-07-18 + +#### New Feature +* Added support for prepared statements. [#5331](https://github.com/ClickHouse/ClickHouse/pull/5331/) ([Alexander](https://github.com/sanych73)) [#5630](https://github.com/ClickHouse/ClickHouse/pull/5630) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* `DoubleDelta` and `Gorilla` column codecs [#5600](https://github.com/ClickHouse/ClickHouse/pull/5600) ([Vasily Nemkov](https://github.com/Enmk)) +* Added `os_thread_priority` setting that allows to control the "nice" value of query processing threads that is used by OS to adjust dynamic scheduling priority. It requires `CAP_SYS_NICE` capabilities to work. This implements [#5858](https://github.com/ClickHouse/ClickHouse/issues/5858) [#5909](https://github.com/ClickHouse/ClickHouse/pull/5909) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Implement `_topic`, `_offset`, `_key` columns for Kafka engine [#5382](https://github.com/ClickHouse/ClickHouse/pull/5382) ([Ivan](https://github.com/abyss7)) Note that Kafka is broken in this version. +* Add aggregate function combinator `-Resample` [#5590](https://github.com/ClickHouse/ClickHouse/pull/5590) ([hcz](https://github.com/hczhcz)) +* Aggregate functions `groupArrayMovingSum(win_size)(x)` and `groupArrayMovingAvg(win_size)(x)`, which calculate moving sum/avg with or without window-size limitation. [#5595](https://github.com/ClickHouse/ClickHouse/pull/5595) ([inv2004](https://github.com/inv2004)) +* Add synonim `arrayFlatten` <-> `flatten` [#5764](https://github.com/ClickHouse/ClickHouse/pull/5764) ([hcz](https://github.com/hczhcz)) +* Intergate H3 function `geoToH3` from Uber. [#4724](https://github.com/ClickHouse/ClickHouse/pull/4724) ([Remen Ivan](https://github.com/BHYCHIK)) [#5805](https://github.com/ClickHouse/ClickHouse/pull/5805) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +#### Bug Fix +* Implement DNS cache with asynchronous update. Separate thread resolves all hosts and updates DNS cache with period (setting `dns_cache_update_period`). It should help, when ip of hosts changes frequently. [#5857](https://github.com/ClickHouse/ClickHouse/pull/5857) ([Anton Popov](https://github.com/CurtizJ)) +* Fix segfault in `Delta` codec which affects columns with values less than 32 bits size. The bug led to random memory corruption. [#5786](https://github.com/ClickHouse/ClickHouse/pull/5786) ([alesapin](https://github.com/alesapin)) +* Fix segfault in TTL merge with non-physical columns in block. [#5819](https://github.com/ClickHouse/ClickHouse/pull/5819) ([Anton Popov](https://github.com/CurtizJ)) +* Fix rare bug in checking of part with `LowCardinality` column. Previously `checkDataPart` always fails for part with `LowCardinality` column. [#5832](https://github.com/ClickHouse/ClickHouse/pull/5832) ([alesapin](https://github.com/alesapin)) +* Avoid hanging connections when server thread pool is full. It is important for connections from `remote` table function or connections to a shard without replicas when there is long connection timeout. This fixes [#5878](https://github.com/ClickHouse/ClickHouse/issues/5878) [#5881](https://github.com/ClickHouse/ClickHouse/pull/5881) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Support for constant arguments to `evalMLModel` function. This fixes [#5817](https://github.com/ClickHouse/ClickHouse/issues/5817) [#5820](https://github.com/ClickHouse/ClickHouse/pull/5820) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed the issue when ClickHouse determines default time zone as `UCT` instead of `UTC`. This fixes [#5804](https://github.com/ClickHouse/ClickHouse/issues/5804). [#5828](https://github.com/ClickHouse/ClickHouse/pull/5828) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed buffer underflow in `visitParamExtractRaw`. This fixes [#5901](https://github.com/ClickHouse/ClickHouse/issues/5901) [#5902](https://github.com/ClickHouse/ClickHouse/pull/5902) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Now distributed `DROP/ALTER/TRUNCATE/OPTIMIZE ON CLUSTER` queries will be executed directly on leader replica. [#5757](https://github.com/ClickHouse/ClickHouse/pull/5757) ([alesapin](https://github.com/alesapin)) +* Fix `coalesce` for `ColumnConst` with `ColumnNullable` + related changes. [#5755](https://github.com/ClickHouse/ClickHouse/pull/5755) ([Artem Zuikov](https://github.com/4ertus2)) +* Fix the `ReadBufferFromKafkaConsumer` so that it keeps reading new messages after `commit()` even if it was stalled before [#5852](https://github.com/ClickHouse/ClickHouse/pull/5852) ([Ivan](https://github.com/abyss7)) +* Fix `FULL` and `RIGHT` JOIN results when joining on `Nullable` keys in right table. [#5859](https://github.com/ClickHouse/ClickHouse/pull/5859) ([Artem Zuikov](https://github.com/4ertus2)) +* Possible fix of infinite sleeping of low-priority queries. [#5842](https://github.com/ClickHouse/ClickHouse/pull/5842) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix race condition, which cause that some queries may not appear in query_log after `SYSTEM FLUSH LOGS` query. [#5456](https://github.com/ClickHouse/ClickHouse/issues/5456) [#5685](https://github.com/ClickHouse/ClickHouse/pull/5685) ([Anton Popov](https://github.com/CurtizJ)) +* Fixed `heap-use-after-free` ASan warning in ClusterCopier caused by watch which try to use already removed copier object. [#5871](https://github.com/ClickHouse/ClickHouse/pull/5871) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Fixed wrong `StringRef` pointer returned by some implementations of `IColumn::deserializeAndInsertFromArena`. This bug affected only unit-tests. [#5973](https://github.com/ClickHouse/ClickHouse/pull/5973) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Prevent source and intermediate array join columns of masking same name columns. [#5941](https://github.com/ClickHouse/ClickHouse/pull/5941) ([Artem Zuikov](https://github.com/4ertus2)) +* Fix insert and select query to MySQL engine with MySQL style identifier quoting. [#5704](https://github.com/ClickHouse/ClickHouse/pull/5704) ([Winter Zhang](https://github.com/zhang2014)) +* Now `CHECK TABLE` query can work with MergeTree engine family. It returns check status and message if any for each part (or file in case of simplier engines). Also, fix bug in fetch of a broken part. [#5865](https://github.com/ClickHouse/ClickHouse/pull/5865) ([alesapin](https://github.com/alesapin)) +* Fix SPLIT_SHARED_LIBRARIES runtime [#5793](https://github.com/ClickHouse/ClickHouse/pull/5793) ([Danila Kutenin](https://github.com/danlark1)) +* Fixed time zone initialization when `/etc/localtime` is a relative symlink like `../usr/share/zoneinfo/Europe/Moscow` [#5922](https://github.com/ClickHouse/ClickHouse/pull/5922) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* clickhouse-copier: Fix use-after free on shutdown [#5752](https://github.com/ClickHouse/ClickHouse/pull/5752) ([proller](https://github.com/proller)) +* Updated `simdjson`. Fixed the issue that some invalid JSONs with zero bytes successfully parse. [#5938](https://github.com/ClickHouse/ClickHouse/pull/5938) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix shutdown of SystemLogs [#5802](https://github.com/ClickHouse/ClickHouse/pull/5802) ([Anton Popov](https://github.com/CurtizJ)) +* Fix hanging when condition in invalidate_query depends on a dictionary. [#6011](https://github.com/ClickHouse/ClickHouse/pull/6011) ([Vitaly Baranov](https://github.com/vitlibar)) + +#### Improvement +* Allow unresolvable addresses in cluster configuration. They will be considered unavailable and tried to resolve at every connection attempt. This is especially useful for Kubernetes. This fixes [#5714](https://github.com/ClickHouse/ClickHouse/issues/5714) [#5924](https://github.com/ClickHouse/ClickHouse/pull/5924) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Close idle TCP connections (with one hour timeout by default). This is especially important for large clusters with multiple distributed tables on every server, because every server can possibly keep a connection pool to every other server, and after peak query concurrency, connections will stall. This fixes [#5879](https://github.com/ClickHouse/ClickHouse/issues/5879) [#5880](https://github.com/ClickHouse/ClickHouse/pull/5880) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Better quality of `topK` function. Changed the SavingSpace set behavior to remove the last element if the new element have a bigger weight. [#5833](https://github.com/ClickHouse/ClickHouse/issues/5833) [#5850](https://github.com/ClickHouse/ClickHouse/pull/5850) ([Guillaume Tassery](https://github.com/YiuRULE)) +* URL functions to work with domains now can work for incomplete URLs without scheme [#5725](https://github.com/ClickHouse/ClickHouse/pull/5725) ([alesapin](https://github.com/alesapin)) +* Checksums added to the `system.parts_columns` table. [#5874](https://github.com/ClickHouse/ClickHouse/pull/5874) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) +* Added `Enum` data type as a synonim for `Enum8` or `Enum16`. [#5886](https://github.com/ClickHouse/ClickHouse/pull/5886) ([dimarub2000](https://github.com/dimarub2000)) +* Full bit transpose variant for `T64` codec. Could lead to better compression with `zstd`. [#5742](https://github.com/ClickHouse/ClickHouse/pull/5742) ([Artem Zuikov](https://github.com/4ertus2)) +* Condition on `startsWith` function now can uses primary key. This fixes [#5310](https://github.com/ClickHouse/ClickHouse/issues/5310) and [#5882](https://github.com/ClickHouse/ClickHouse/issues/5882) [#5919](https://github.com/ClickHouse/ClickHouse/pull/5919) ([dimarub2000](https://github.com/dimarub2000)) +* Allow to use `clickhouse-copier` with cross-replication cluster topology by permitting empty database name. [#5745](https://github.com/ClickHouse/ClickHouse/pull/5745) ([nvartolomei](https://github.com/nvartolomei)) +* Use `UTC` as default timezone on a system without `tzdata` (e.g. bare Docker container). Before this patch, error message `Could not determine local time zone` was printed and server or client refused to start. [#5827](https://github.com/ClickHouse/ClickHouse/pull/5827) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Returned back support for floating point argument in function `quantileTiming` for backward compatibility. [#5911](https://github.com/ClickHouse/ClickHouse/pull/5911) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Show which table is missing column in error messages. [#5768](https://github.com/ClickHouse/ClickHouse/pull/5768) ([Ivan](https://github.com/abyss7)) +* Disallow run query with same query_id by various users [#5430](https://github.com/ClickHouse/ClickHouse/pull/5430) ([proller](https://github.com/proller)) +* More robust code for sending metrics to Graphite. It will work even during long multiple `RENAME TABLE` operation. [#5875](https://github.com/ClickHouse/ClickHouse/pull/5875) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* More informative error messages will be displayed when ThreadPool cannot schedule a task for execution. This fixes [#5305](https://github.com/ClickHouse/ClickHouse/issues/5305) [#5801](https://github.com/ClickHouse/ClickHouse/pull/5801) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Inverting ngramSearch to be more intuitive [#5807](https://github.com/ClickHouse/ClickHouse/pull/5807) ([Danila Kutenin](https://github.com/danlark1)) +* Add user parsing in HDFS engine builder [#5946](https://github.com/ClickHouse/ClickHouse/pull/5946) ([akonyaev90](https://github.com/akonyaev90)) +* Update default value of `max_ast_elements parameter` [#5933](https://github.com/ClickHouse/ClickHouse/pull/5933) ([Artem Konovalov](https://github.com/izebit)) +* Added a notion of obsolete settings. The obsolete setting `allow_experimental_low_cardinality_type` can be used with no effect. [0f15c01c6802f7ce1a1494c12c846be8c98944cd](https://github.com/ClickHouse/ClickHouse/commit/0f15c01c6802f7ce1a1494c12c846be8c98944cd) [Alexey Milovidov](https://github.com/alexey-milovidov) + +#### Performance Improvement +* Increase number of streams to SELECT from Merge table for more uniform distribution of threads. Added setting `max_streams_multiplier_for_merge_tables`. This fixes [#5797](https://github.com/ClickHouse/ClickHouse/issues/5797) [#5915](https://github.com/ClickHouse/ClickHouse/pull/5915) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +#### Build/Testing/Packaging Improvement +* Add a backward compatibility test for client-server interaction with different versions of clickhouse. [#5868](https://github.com/ClickHouse/ClickHouse/pull/5868) ([alesapin](https://github.com/alesapin)) +* Test coverage information in every commit and pull request. [#5896](https://github.com/ClickHouse/ClickHouse/pull/5896) ([alesapin](https://github.com/alesapin)) +* Cooperate with address sanitizer to support our custom allocators (`Arena` and `ArenaWithFreeLists`) for better debugging of "use-after-free" errors. [#5728](https://github.com/ClickHouse/ClickHouse/pull/5728) ([akuzm](https://github.com/akuzm)) +* Switch to [LLVM libunwind implementation](https://github.com/llvm-mirror/libunwind) for C++ exception handling and for stack traces printing [#4828](https://github.com/ClickHouse/ClickHouse/pull/4828) ([Nikita Lapkov](https://github.com/laplab)) +* Add two more warnings from -Weverything [#5923](https://github.com/ClickHouse/ClickHouse/pull/5923) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Allow to build ClickHouse with Memory Sanitizer. [#3949](https://github.com/ClickHouse/ClickHouse/pull/3949) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed ubsan report about `bitTest` function in fuzz test. [#5943](https://github.com/ClickHouse/ClickHouse/pull/5943) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Docker: added possibility to init a ClickHouse instance which requires authentication. [#5727](https://github.com/ClickHouse/ClickHouse/pull/5727) ([Korviakov Andrey](https://github.com/shurshun)) +* Update librdkafka to version 1.1.0 [#5872](https://github.com/ClickHouse/ClickHouse/pull/5872) ([Ivan](https://github.com/abyss7)) +* Add global timeout for integration tests and disable some of them in tests code. [#5741](https://github.com/ClickHouse/ClickHouse/pull/5741) ([alesapin](https://github.com/alesapin)) +* Fix some ThreadSanitizer failures. [#5854](https://github.com/ClickHouse/ClickHouse/pull/5854) ([akuzm](https://github.com/akuzm)) +* The `--no-undefined` option forces the linker to check all external names for existence while linking. It's very useful to track real dependencies between libraries in the split build mode. [#5855](https://github.com/ClickHouse/ClickHouse/pull/5855) ([Ivan](https://github.com/abyss7)) +* Added performance test for [#5797](https://github.com/ClickHouse/ClickHouse/issues/5797) [#5914](https://github.com/ClickHouse/ClickHouse/pull/5914) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed compatibility with gcc-7. [#5840](https://github.com/ClickHouse/ClickHouse/pull/5840) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Added support for gcc-9. This fixes [#5717](https://github.com/ClickHouse/ClickHouse/issues/5717) [#5774](https://github.com/ClickHouse/ClickHouse/pull/5774) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed error when libunwind can be linked incorrectly. [#5948](https://github.com/ClickHouse/ClickHouse/pull/5948) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed a few warnings found by PVS-Studio. [#5921](https://github.com/ClickHouse/ClickHouse/pull/5921) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Added initial support for `clang-tidy` static analyzer. [#5806](https://github.com/ClickHouse/ClickHouse/pull/5806) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Convert BSD/Linux endian macros( 'be64toh' and 'htobe64') to the Mac OS X equivalents [#5785](https://github.com/ClickHouse/ClickHouse/pull/5785) ([Fu Chen](https://github.com/fredchenbj)) +* Improved integration tests guide. [#5796](https://github.com/ClickHouse/ClickHouse/pull/5796) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Fixing build at macosx + gcc9 [#5822](https://github.com/ClickHouse/ClickHouse/pull/5822) ([filimonov](https://github.com/filimonov)) +* Fix a hard-to-spot typo: aggreAGte -> aggregate. [#5753](https://github.com/ClickHouse/ClickHouse/pull/5753) ([akuzm](https://github.com/akuzm)) +* Fix freebsd build [#5760](https://github.com/ClickHouse/ClickHouse/pull/5760) ([proller](https://github.com/proller)) +* Add link to experimental YouTube channel to website [#5845](https://github.com/ClickHouse/ClickHouse/pull/5845) ([Ivan Blinkov](https://github.com/blinkov)) +* CMake: add option for coverage flags: WITH_COVERAGE [#5776](https://github.com/ClickHouse/ClickHouse/pull/5776) ([proller](https://github.com/proller)) +* Fix initial size of some inline PODArray's. [#5787](https://github.com/ClickHouse/ClickHouse/pull/5787) ([akuzm](https://github.com/akuzm)) +* clickhouse-server.postinst: fix os detection for centos 6 [#5788](https://github.com/ClickHouse/ClickHouse/pull/5788) ([proller](https://github.com/proller)) +* Added Arch linux package generation. [#5719](https://github.com/ClickHouse/ClickHouse/pull/5719) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Split Common/config.h by libs (dbms) [#5715](https://github.com/ClickHouse/ClickHouse/pull/5715) ([proller](https://github.com/proller)) +* Fixes for "Arcadia" build platform [#5795](https://github.com/ClickHouse/ClickHouse/pull/5795) ([proller](https://github.com/proller)) +* Fixes for unconventional build (gcc9, no submodules) [#5792](https://github.com/ClickHouse/ClickHouse/pull/5792) ([proller](https://github.com/proller)) +* Require explicit type in unalignedStore because it was proven to be bug-prone [#5791](https://github.com/ClickHouse/ClickHouse/pull/5791) ([akuzm](https://github.com/akuzm)) +* Fixes MacOS build [#5830](https://github.com/ClickHouse/ClickHouse/pull/5830) ([filimonov](https://github.com/filimonov)) +* Performance test concerning the new JIT feature with bigger dataset, as requested here [#5263](https://github.com/ClickHouse/ClickHouse/issues/5263) [#5887](https://github.com/ClickHouse/ClickHouse/pull/5887) ([Guillaume Tassery](https://github.com/YiuRULE)) +* Run stateful tests in stress test [12693e568722f11e19859742f56428455501fd2a](https://github.com/ClickHouse/ClickHouse/commit/12693e568722f11e19859742f56428455501fd2a) ([alesapin](https://github.com/alesapin)) + +#### Backward Incompatible Change +* `Kafka` is broken in this version. +* Enable `adaptive_index_granularity` = 10MB by default for new `MergeTree` tables. If you created new MergeTree tables on version 19.11+, downgrade to versions prior to 19.6 will be impossible. [#5628](https://github.com/ClickHouse/ClickHouse/pull/5628) ([alesapin](https://github.com/alesapin)) +* Removed obsolete undocumented embedded dictionaries that were used by Yandex.Metrica. The functions `OSIn`, `SEIn`, `OSToRoot`, `SEToRoot`, `OSHierarchy`, `SEHierarchy` are no longer available. If you are using these functions, write email to clickhouse-feedback@yandex-team.com. Note: at the last moment we decided to keep these functions for a while. [#5780](https://github.com/ClickHouse/ClickHouse/pull/5780) ([alexey-milovidov](https://github.com/alexey-milovidov)) + + +## ClickHouse release 19.10 +### ClickHouse release 19.10.1.5, 2019-07-12 + +#### New Feature +* Add new column codec: `T64`. Made for (U)IntX/EnumX/Data(Time)/DecimalX columns. It should be good for columns with constant or small range values. Codec itself allows enlarge or shrink data type without re-compression. [#5557](https://github.com/ClickHouse/ClickHouse/pull/5557) ([Artem Zuikov](https://github.com/4ertus2)) +* Add database engine `MySQL` that allow to view all the tables in remote MySQL server [#5599](https://github.com/ClickHouse/ClickHouse/pull/5599) ([Winter Zhang](https://github.com/zhang2014)) +* `bitmapContains` implementation. It's 2x faster than `bitmapHasAny` if the second bitmap contains one element. [#5535](https://github.com/ClickHouse/ClickHouse/pull/5535) ([Zhichang Yu](https://github.com/yuzhichang)) +* Support for `crc32` function (with behaviour exactly as in MySQL or PHP). Do not use it if you need a hash function. [#5661](https://github.com/ClickHouse/ClickHouse/pull/5661) ([Remen Ivan](https://github.com/BHYCHIK)) +* Implemented `SYSTEM START/STOP DISTRIBUTED SENDS` queries to control asynchronous inserts into `Distributed` tables. [#4935](https://github.com/ClickHouse/ClickHouse/pull/4935) ([Winter Zhang](https://github.com/zhang2014)) + +#### Bug Fix +* Ignore query execution limits and max parts size for merge limits while executing mutations. [#5659](https://github.com/ClickHouse/ClickHouse/pull/5659) ([Anton Popov](https://github.com/CurtizJ)) +* Fix bug which may lead to deduplication of normal blocks (extremely rare) and insertion of duplicate blocks (more often). [#5549](https://github.com/ClickHouse/ClickHouse/pull/5549) ([alesapin](https://github.com/alesapin)) +* Fix of function `arrayEnumerateUniqRanked` for arguments with empty arrays [#5559](https://github.com/ClickHouse/ClickHouse/pull/5559) ([proller](https://github.com/proller)) +* Don't subscribe to Kafka topics without intent to poll any messages. [#5698](https://github.com/ClickHouse/ClickHouse/pull/5698) ([Ivan](https://github.com/abyss7)) +* Make setting `join_use_nulls` get no effect for types that cannot be inside Nullable [#5700](https://github.com/ClickHouse/ClickHouse/pull/5700) ([Olga Khvostikova](https://github.com/stavrolia)) +* Fixed `Incorrect size of index granularity` errors [#5720](https://github.com/ClickHouse/ClickHouse/pull/5720) ([coraxster](https://github.com/coraxster)) +* Fix Float to Decimal convert overflow [#5607](https://github.com/ClickHouse/ClickHouse/pull/5607) ([coraxster](https://github.com/coraxster)) +* Flush buffer when `WriteBufferFromHDFS`'s destructor is called. This fixes writing into `HDFS`. [#5684](https://github.com/ClickHouse/ClickHouse/pull/5684) ([Xindong Peng](https://github.com/eejoin)) + +#### Improvement +* Treat empty cells in `CSV` as default values when the setting `input_format_defaults_for_omitted_fields` is enabled. [#5625](https://github.com/ClickHouse/ClickHouse/pull/5625) ([akuzm](https://github.com/akuzm)) +* Non-blocking loading of external dictionaries. [#5567](https://github.com/ClickHouse/ClickHouse/pull/5567) ([Vitaly Baranov](https://github.com/vitlibar)) +* Network timeouts can be dynamically changed for already established connections according to the settings. [#4558](https://github.com/ClickHouse/ClickHouse/pull/4558) ([Konstantin Podshumok](https://github.com/podshumok)) +* Using "public_suffix_list" for functions `firstSignificantSubdomain`, `cutToFirstSignificantSubdomain`. It's using a perfect hash table generated by `gperf` with a list generated from the file: [https://publicsuffix.org/list/public_suffix_list.dat](https://publicsuffix.org/list/public_suffix_list.dat). (for example, now we recognize the domain `ac.uk` as non-significant). [#5030](https://github.com/ClickHouse/ClickHouse/pull/5030) ([Guillaume Tassery](https://github.com/YiuRULE)) +* Adopted `IPv6` data type in system tables; unified client info columns in `system.processes` and `system.query_log` [#5640](https://github.com/ClickHouse/ClickHouse/pull/5640) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Using sessions for connections with MySQL compatibility protocol. #5476 [#5646](https://github.com/ClickHouse/ClickHouse/pull/5646) ([Yuriy Baranov](https://github.com/yurriy)) +* Support more `ALTER` queries `ON CLUSTER`. [#5593](https://github.com/ClickHouse/ClickHouse/pull/5593) [#5613](https://github.com/ClickHouse/ClickHouse/pull/5613) ([sundyli](https://github.com/sundy-li)) +* Support `` section in `clickhouse-local` config file. [#5540](https://github.com/ClickHouse/ClickHouse/pull/5540) ([proller](https://github.com/proller)) +* Allow run query with `remote` table function in `clickhouse-local` [#5627](https://github.com/ClickHouse/ClickHouse/pull/5627) ([proller](https://github.com/proller)) + +#### Performance Improvement +* Add the possibility to write the final mark at the end of MergeTree columns. It allows to avoid useless reads for keys that are out of table data range. It is enabled only if adaptive index granularity is in use. [#5624](https://github.com/ClickHouse/ClickHouse/pull/5624) ([alesapin](https://github.com/alesapin)) +* Improved performance of MergeTree tables on very slow filesystems by reducing number of `stat` syscalls. [#5648](https://github.com/ClickHouse/ClickHouse/pull/5648) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed performance degradation in reading from MergeTree tables that was introduced in version 19.6. Fixes #5631. [#5633](https://github.com/ClickHouse/ClickHouse/pull/5633) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +#### Build/Testing/Packaging Improvement +* Implemented `TestKeeper` as an implementation of ZooKeeper interface used for testing [#5643](https://github.com/ClickHouse/ClickHouse/pull/5643) ([alexey-milovidov](https://github.com/alexey-milovidov)) ([levushkin aleksej](https://github.com/alexey-milovidov)) +* From now on `.sql` tests can be run isolated by server, in parallel, with random database. It allows to run them faster, add new tests with custom server configurations, and be sure that different tests doesn't affect each other. [#5554](https://github.com/ClickHouse/ClickHouse/pull/5554) ([Ivan](https://github.com/abyss7)) +* Remove `` and `` from performance tests [#5672](https://github.com/ClickHouse/ClickHouse/pull/5672) ([Olga Khvostikova](https://github.com/stavrolia)) +* Fixed "select_format" performance test for `Pretty` formats [#5642](https://github.com/ClickHouse/ClickHouse/pull/5642) ([alexey-milovidov](https://github.com/alexey-milovidov)) + + +## ClickHouse release 19.9 +### ClickHouse release 19.9.3.31, 2019-07-05 + +#### Bug Fix +* Fix segfault in Delta codec which affects columns with values less than 32 bits size. The bug led to random memory corruption. [#5786](https://github.com/ClickHouse/ClickHouse/pull/5786) ([alesapin](https://github.com/alesapin)) +* Fix rare bug in checking of part with LowCardinality column. [#5832](https://github.com/ClickHouse/ClickHouse/pull/5832) ([alesapin](https://github.com/alesapin)) +* Fix segfault in TTL merge with non-physical columns in block. [#5819](https://github.com/ClickHouse/ClickHouse/pull/5819) ([Anton Popov](https://github.com/CurtizJ)) +* Fix potential infinite sleeping of low-priority queries. [#5842](https://github.com/ClickHouse/ClickHouse/pull/5842) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix how ClickHouse determines default time zone as UCT instead of UTC. [#5828](https://github.com/ClickHouse/ClickHouse/pull/5828) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix bug about executing distributed DROP/ALTER/TRUNCATE/OPTIMIZE ON CLUSTER queries on follower replica before leader replica. Now they will be executed directly on leader replica. [#5757](https://github.com/ClickHouse/ClickHouse/pull/5757) ([alesapin](https://github.com/alesapin)) +* Fix race condition, which cause that some queries may not appear in query_log instantly after SYSTEM FLUSH LOGS query. [#5685](https://github.com/ClickHouse/ClickHouse/pull/5685) ([Anton Popov](https://github.com/CurtizJ)) +* Added missing support for constant arguments to `evalMLModel` function. [#5820](https://github.com/ClickHouse/ClickHouse/pull/5820) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +### ClickHouse release 19.9.2.4, 2019-06-24 + +#### New Feature +* Print information about frozen parts in `system.parts` table. [#5471](https://github.com/ClickHouse/ClickHouse/pull/5471) ([proller](https://github.com/proller)) +* Ask client password on clickhouse-client start on tty if not set in arguments [#5092](https://github.com/ClickHouse/ClickHouse/pull/5092) ([proller](https://github.com/proller)) +* Implement `dictGet` and `dictGetOrDefault` functions for Decimal types. [#5394](https://github.com/ClickHouse/ClickHouse/pull/5394) ([Artem Zuikov](https://github.com/4ertus2)) + +#### Improvement +* Debian init: Add service stop timeout [#5522](https://github.com/ClickHouse/ClickHouse/pull/5522) ([proller](https://github.com/proller)) +* Add setting forbidden by default to create table with suspicious types for LowCardinality [#5448](https://github.com/ClickHouse/ClickHouse/pull/5448) ([Olga Khvostikova](https://github.com/stavrolia)) +* Regression functions return model weights when not used as State in function `evalMLMethod`. [#5411](https://github.com/ClickHouse/ClickHouse/pull/5411) ([Quid37](https://github.com/Quid37)) +* Rename and improve regression methods. [#5492](https://github.com/ClickHouse/ClickHouse/pull/5492) ([Quid37](https://github.com/Quid37)) +* Clearer interfaces of string searchers. [#5586](https://github.com/ClickHouse/ClickHouse/pull/5586) ([Danila Kutenin](https://github.com/danlark1)) + +#### Bug Fix +* Fix potential data loss in Kafka [#5445](https://github.com/ClickHouse/ClickHouse/pull/5445) ([Ivan](https://github.com/abyss7)) +* Fix potential infinite loop in `PrettySpace` format when called with zero columns [#5560](https://github.com/ClickHouse/ClickHouse/pull/5560) ([Olga Khvostikova](https://github.com/stavrolia)) +* Fixed UInt32 overflow bug in linear models. Allow eval ML model for non-const model argument. [#5516](https://github.com/ClickHouse/ClickHouse/pull/5516) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* `ALTER TABLE ... DROP INDEX IF EXISTS ...` should not raise an exception if provided index does not exist [#5524](https://github.com/ClickHouse/ClickHouse/pull/5524) ([Gleb Novikov](https://github.com/NanoBjorn)) +* Fix segfault with `bitmapHasAny` in scalar subquery [#5528](https://github.com/ClickHouse/ClickHouse/pull/5528) ([Zhichang Yu](https://github.com/yuzhichang)) +* Fixed error when replication connection pool doesn't retry to resolve host, even when DNS cache was dropped. [#5534](https://github.com/ClickHouse/ClickHouse/pull/5534) ([alesapin](https://github.com/alesapin)) +* Fixed `ALTER ... MODIFY TTL` on ReplicatedMergeTree. [#5539](https://github.com/ClickHouse/ClickHouse/pull/5539) ([Anton Popov](https://github.com/CurtizJ)) +* Fix INSERT into Distributed table with MATERIALIZED column [#5429](https://github.com/ClickHouse/ClickHouse/pull/5429) ([Azat Khuzhin](https://github.com/azat)) +* Fix bad alloc when truncate Join storage [#5437](https://github.com/ClickHouse/ClickHouse/pull/5437) ([TCeason](https://github.com/TCeason)) +* In recent versions of package tzdata some of files are symlinks now. The current mechanism for detecting default timezone gets broken and gives wrong names for some timezones. Now at least we force the timezone name to the contents of TZ if provided. [#5443](https://github.com/ClickHouse/ClickHouse/pull/5443) ([Ivan](https://github.com/abyss7)) +* Fix some extremely rare cases with MultiVolnitsky searcher when the constant needles in sum are at least 16KB long. The algorithm missed or overwrote the previous results which can lead to the incorrect result of `multiSearchAny`. [#5588](https://github.com/ClickHouse/ClickHouse/pull/5588) ([Danila Kutenin](https://github.com/danlark1)) +* Fix the issue when settings for ExternalData requests couldn't use ClickHouse settings. Also, for now, settings `date_time_input_format` and `low_cardinality_allow_in_native_format` cannot be used because of the ambiguity of names (in external data it can be interpreted as table format and in the query it can be a setting). [#5455](https://github.com/ClickHouse/ClickHouse/pull/5455) ([Danila Kutenin](https://github.com/danlark1)) +* Fix bug when parts were removed only from FS without dropping them from Zookeeper. [#5520](https://github.com/ClickHouse/ClickHouse/pull/5520) ([alesapin](https://github.com/alesapin)) +* Remove debug logging from MySQL protocol [#5478](https://github.com/ClickHouse/ClickHouse/pull/5478) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Skip ZNONODE during DDL query processing [#5489](https://github.com/ClickHouse/ClickHouse/pull/5489) ([Azat Khuzhin](https://github.com/azat)) +* Fix mix `UNION ALL` result column type. There were cases with inconsistent data and column types of resulting columns. [#5503](https://github.com/ClickHouse/ClickHouse/pull/5503) ([Artem Zuikov](https://github.com/4ertus2)) +* Throw an exception on wrong integers in `dictGetT` functions instead of crash. [#5446](https://github.com/ClickHouse/ClickHouse/pull/5446) ([Artem Zuikov](https://github.com/4ertus2)) +* Fix wrong element_count and load_factor for hashed dictionary in `system.dictionaries` table. [#5440](https://github.com/ClickHouse/ClickHouse/pull/5440) ([Azat Khuzhin](https://github.com/azat)) + +#### Build/Testing/Packaging Improvement +* Fixed build without `Brotli` HTTP compression support (`ENABLE_BROTLI=OFF` cmake variable). [#5521](https://github.com/ClickHouse/ClickHouse/pull/5521) ([Anton Yuzhaninov](https://github.com/citrin)) +* Include roaring.h as roaring/roaring.h [#5523](https://github.com/ClickHouse/ClickHouse/pull/5523) ([Orivej Desh](https://github.com/orivej)) +* Fix gcc9 warnings in hyperscan (#line directive is evil!) [#5546](https://github.com/ClickHouse/ClickHouse/pull/5546) ([Danila Kutenin](https://github.com/danlark1)) +* Fix all warnings when compiling with gcc-9. Fix some contrib issues. Fix gcc9 ICE and submit it to bugzilla. [#5498](https://github.com/ClickHouse/ClickHouse/pull/5498) ([Danila Kutenin](https://github.com/danlark1)) +* Fixed linking with lld [#5477](https://github.com/ClickHouse/ClickHouse/pull/5477) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Remove unused specializations in dictionaries [#5452](https://github.com/ClickHouse/ClickHouse/pull/5452) ([Artem Zuikov](https://github.com/4ertus2)) +* Improvement performance tests for formatting and parsing tables for different types of files [#5497](https://github.com/ClickHouse/ClickHouse/pull/5497) ([Olga Khvostikova](https://github.com/stavrolia)) +* Fixes for parallel test run [#5506](https://github.com/ClickHouse/ClickHouse/pull/5506) ([proller](https://github.com/proller)) +* Docker: use configs from clickhouse-test [#5531](https://github.com/ClickHouse/ClickHouse/pull/5531) ([proller](https://github.com/proller)) +* Fix compile for FreeBSD [#5447](https://github.com/ClickHouse/ClickHouse/pull/5447) ([proller](https://github.com/proller)) +* Upgrade boost to 1.70 [#5570](https://github.com/ClickHouse/ClickHouse/pull/5570) ([proller](https://github.com/proller)) +* Fix build clickhouse as submodule [#5574](https://github.com/ClickHouse/ClickHouse/pull/5574) ([proller](https://github.com/proller)) +* Improve JSONExtract performance tests [#5444](https://github.com/ClickHouse/ClickHouse/pull/5444) ([Vitaly Baranov](https://github.com/vitlibar)) + +## ClickHouse release 19.8 +### ClickHouse release 19.8.3.8, 2019-06-11 + +#### New Features +* Added functions to work with JSON [#4686](https://github.com/ClickHouse/ClickHouse/pull/4686) ([hcz](https://github.com/hczhcz)) [#5124](https://github.com/ClickHouse/ClickHouse/pull/5124). ([Vitaly Baranov](https://github.com/vitlibar)) +* Add a function basename, with a similar behaviour to a basename function, which exists in a lot of languages (`os.path.basename` in python, `basename` in PHP, etc...). Work with both an UNIX-like path or a Windows path. [#5136](https://github.com/ClickHouse/ClickHouse/pull/5136) ([Guillaume Tassery](https://github.com/YiuRULE)) +* Added `LIMIT n, m BY` or `LIMIT m OFFSET n BY` syntax to set offset of n for LIMIT BY clause. [#5138](https://github.com/ClickHouse/ClickHouse/pull/5138) ([Anton Popov](https://github.com/CurtizJ)) +* Added new data type `SimpleAggregateFunction`, which allows to have columns with light aggregation in an `AggregatingMergeTree`. This can only be used with simple functions like `any`, `anyLast`, `sum`, `min`, `max`. [#4629](https://github.com/ClickHouse/ClickHouse/pull/4629) ([Boris Granveaud](https://github.com/bgranvea)) +* Added support for non-constant arguments in function `ngramDistance` [#5198](https://github.com/ClickHouse/ClickHouse/pull/5198) ([Danila Kutenin](https://github.com/danlark1)) +* Added functions `skewPop`, `skewSamp`, `kurtPop` and `kurtSamp` to compute for sequence skewness, sample skewness, kurtosis and sample kurtosis respectively. [#5200](https://github.com/ClickHouse/ClickHouse/pull/5200) ([hcz](https://github.com/hczhcz)) +* Support rename operation for `MaterializeView` storage. [#5209](https://github.com/ClickHouse/ClickHouse/pull/5209) ([Guillaume Tassery](https://github.com/YiuRULE)) +* Added server which allows connecting to ClickHouse using MySQL client. [#4715](https://github.com/ClickHouse/ClickHouse/pull/4715) ([Yuriy Baranov](https://github.com/yurriy)) +* Add `toDecimal*OrZero` and `toDecimal*OrNull` functions. [#5291](https://github.com/ClickHouse/ClickHouse/pull/5291) ([Artem Zuikov](https://github.com/4ertus2)) +* Support Decimal types in functions: `quantile`, `quantiles`, `median`, `quantileExactWeighted`, `quantilesExactWeighted`, medianExactWeighted. [#5304](https://github.com/ClickHouse/ClickHouse/pull/5304) ([Artem Zuikov](https://github.com/4ertus2)) +* Added `toValidUTF8` function, which replaces all invalid UTF-8 characters by replacement character � (U+FFFD). [#5322](https://github.com/ClickHouse/ClickHouse/pull/5322) ([Danila Kutenin](https://github.com/danlark1)) +* Added `format` function. Formatting constant pattern (simplified Python format pattern) with the strings listed in the arguments. [#5330](https://github.com/ClickHouse/ClickHouse/pull/5330) ([Danila Kutenin](https://github.com/danlark1)) +* Added `system.detached_parts` table containing information about detached parts of `MergeTree` tables. [#5353](https://github.com/ClickHouse/ClickHouse/pull/5353) ([akuzm](https://github.com/akuzm)) +* Added `ngramSearch` function to calculate the non-symmetric difference between needle and haystack. [#5418](https://github.com/ClickHouse/ClickHouse/pull/5418)[#5422](https://github.com/ClickHouse/ClickHouse/pull/5422) ([Danila Kutenin](https://github.com/danlark1)) +* Implementation of basic machine learning methods (stochastic linear regression and logistic regression) using aggregate functions interface. Has different strategies for updating model weights (simple gradient descent, momentum method, Nesterov method). Also supports mini-batches of custom size. [#4943](https://github.com/ClickHouse/ClickHouse/pull/4943) ([Quid37](https://github.com/Quid37)) +* Implementation of `geohashEncode` and `geohashDecode` functions. [#5003](https://github.com/ClickHouse/ClickHouse/pull/5003) ([Vasily Nemkov](https://github.com/Enmk)) +* Added aggregate function `timeSeriesGroupSum`, which can aggregate different time series that sample timestamp not alignment. It will use linear interpolation between two sample timestamp and then sum time-series together. Added aggregate function `timeSeriesGroupRateSum`, which calculates the rate of time-series and then sum rates together. [#4542](https://github.com/ClickHouse/ClickHouse/pull/4542) ([Yangkuan Liu](https://github.com/LiuYangkuan)) +* Added functions `IPv4CIDRtoIPv4Range` and `IPv6CIDRtoIPv6Range` to calculate the lower and higher bounds for an IP in the subnet using a CIDR. [#5095](https://github.com/ClickHouse/ClickHouse/pull/5095) ([Guillaume Tassery](https://github.com/YiuRULE)) +* Add a X-ClickHouse-Summary header when we send a query using HTTP with enabled setting `send_progress_in_http_headers`. Return the usual information of X-ClickHouse-Progress, with additional information like how many rows and bytes were inserted in the query. [#5116](https://github.com/ClickHouse/ClickHouse/pull/5116) ([Guillaume Tassery](https://github.com/YiuRULE)) + +#### Improvements +* Added `max_parts_in_total` setting for MergeTree family of tables (default: 100 000) that prevents unsafe specification of partition key #5166. [#5171](https://github.com/ClickHouse/ClickHouse/pull/5171) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* `clickhouse-obfuscator`: derive seed for individual columns by combining initial seed with column name, not column position. This is intended to transform datasets with multiple related tables, so that tables will remain JOINable after transformation. [#5178](https://github.com/ClickHouse/ClickHouse/pull/5178) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Added functions `JSONExtractRaw`, `JSONExtractKeyAndValues`. Renamed functions `jsonExtract` to `JSONExtract`. When something goes wrong these functions return the correspondent values, not `NULL`. Modified function `JSONExtract`, now it gets the return type from its last parameter and doesn't inject nullables. Implemented fallback to RapidJSON in case AVX2 instructions are not available. Simdjson library updated to a new version. [#5235](https://github.com/ClickHouse/ClickHouse/pull/5235) ([Vitaly Baranov](https://github.com/vitlibar)) +* Now `if` and `multiIf` functions don't rely on the condition's `Nullable`, but rely on the branches for sql compatibility. [#5238](https://github.com/ClickHouse/ClickHouse/pull/5238) ([Jian Wu](https://github.com/janplus)) +* `In` predicate now generates `Null` result from `Null` input like the `Equal` function. [#5152](https://github.com/ClickHouse/ClickHouse/pull/5152) ([Jian Wu](https://github.com/janplus)) +* Check the time limit every (flush_interval / poll_timeout) number of rows from Kafka. This allows to break the reading from Kafka consumer more frequently and to check the time limits for the top-level streams [#5249](https://github.com/ClickHouse/ClickHouse/pull/5249) ([Ivan](https://github.com/abyss7)) +* Link rdkafka with bundled SASL. It should allow to use SASL SCRAM authentication [#5253](https://github.com/ClickHouse/ClickHouse/pull/5253) ([Ivan](https://github.com/abyss7)) +* Batched version of RowRefList for ALL JOINS. [#5267](https://github.com/ClickHouse/ClickHouse/pull/5267) ([Artem Zuikov](https://github.com/4ertus2)) +* clickhouse-server: more informative listen error messages. [#5268](https://github.com/ClickHouse/ClickHouse/pull/5268) ([proller](https://github.com/proller)) +* Support dictionaries in clickhouse-copier for functions in `` [#5270](https://github.com/ClickHouse/ClickHouse/pull/5270) ([proller](https://github.com/proller)) +* Add new setting `kafka_commit_every_batch` to regulate Kafka committing policy. +It allows to set commit mode: after every batch of messages is handled, or after the whole block is written to the storage. It's a trade-off between losing some messages or reading them twice in some extreme situations. [#5308](https://github.com/ClickHouse/ClickHouse/pull/5308) ([Ivan](https://github.com/abyss7)) +* Make `windowFunnel` support other Unsigned Integer Types. [#5320](https://github.com/ClickHouse/ClickHouse/pull/5320) ([sundyli](https://github.com/sundy-li)) +* Allow to shadow virtual column `_table` in Merge engine. [#5325](https://github.com/ClickHouse/ClickHouse/pull/5325) ([Ivan](https://github.com/abyss7)) +* Make `sequenceMatch` aggregate functions support other unsigned Integer types [#5339](https://github.com/ClickHouse/ClickHouse/pull/5339) ([sundyli](https://github.com/sundy-li)) +* Better error messages if checksum mismatch is most likely caused by hardware failures. [#5355](https://github.com/ClickHouse/ClickHouse/pull/5355) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Check that underlying tables support sampling for `StorageMerge` [#5366](https://github.com/ClickHouse/ClickHouse/pull/5366) ([Ivan](https://github.com/abyss7)) +* Сlose MySQL connections after their usage in external dictionaries. It is related to issue #893. [#5395](https://github.com/ClickHouse/ClickHouse/pull/5395) ([Clément Rodriguez](https://github.com/clemrodriguez)) +* Improvements of MySQL Wire Protocol. Changed name of format to MySQLWire. Using RAII for calling RSA_free. Disabling SSL if context cannot be created. [#5419](https://github.com/ClickHouse/ClickHouse/pull/5419) ([Yuriy Baranov](https://github.com/yurriy)) +* clickhouse-client: allow to run with unaccessable history file (read-only, no disk space, file is directory, ...). [#5431](https://github.com/ClickHouse/ClickHouse/pull/5431) ([proller](https://github.com/proller)) +* Respect query settings in asynchronous INSERTs into Distributed tables. [#4936](https://github.com/ClickHouse/ClickHouse/pull/4936) ([TCeason](https://github.com/TCeason)) +* Renamed functions `leastSqr` to `simpleLinearRegression`, `LinearRegression` to `linearRegression`, `LogisticRegression` to `logisticRegression`. [#5391](https://github.com/ClickHouse/ClickHouse/pull/5391) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) + +#### Performance Improvements +* Parallelize processing of parts of non-replicated MergeTree tables in ALTER MODIFY query. [#4639](https://github.com/ClickHouse/ClickHouse/pull/4639) ([Ivan Kush](https://github.com/IvanKush)) +* Optimizations in regular expressions extraction. [#5193](https://github.com/ClickHouse/ClickHouse/pull/5193) [#5191](https://github.com/ClickHouse/ClickHouse/pull/5191) ([Danila Kutenin](https://github.com/danlark1)) +* Do not add right join key column to join result if it's used only in join on section. [#5260](https://github.com/ClickHouse/ClickHouse/pull/5260) ([Artem Zuikov](https://github.com/4ertus2)) +* Freeze the Kafka buffer after first empty response. It avoids multiple invokations of `ReadBuffer::next()` for empty result in some row-parsing streams. [#5283](https://github.com/ClickHouse/ClickHouse/pull/5283) ([Ivan](https://github.com/abyss7)) +* `concat` function optimization for multiple arguments. [#5357](https://github.com/ClickHouse/ClickHouse/pull/5357) ([Danila Kutenin](https://github.com/danlark1)) +* Query optimisation. Allow push down IN statement while rewriting commа/cross join into inner one. [#5396](https://github.com/ClickHouse/ClickHouse/pull/5396) ([Artem Zuikov](https://github.com/4ertus2)) +* Upgrade our LZ4 implementation with reference one to have faster decompression. [#5070](https://github.com/ClickHouse/ClickHouse/pull/5070) ([Danila Kutenin](https://github.com/danlark1)) +* Implemented MSD radix sort (based on kxsort), and partial sorting. [#5129](https://github.com/ClickHouse/ClickHouse/pull/5129) ([Evgenii Pravda](https://github.com/kvinty)) + +#### Bug Fixes +* Fix push require columns with join [#5192](https://github.com/ClickHouse/ClickHouse/pull/5192) ([Winter Zhang](https://github.com/zhang2014)) +* Fixed bug, when ClickHouse is run by systemd, the command `sudo service clickhouse-server forcerestart` was not working as expected. [#5204](https://github.com/ClickHouse/ClickHouse/pull/5204) ([proller](https://github.com/proller)) +* Fix http error codes in DataPartsExchange (interserver http server on 9009 port always returned code 200, even on errors). [#5216](https://github.com/ClickHouse/ClickHouse/pull/5216) ([proller](https://github.com/proller)) +* Fix SimpleAggregateFunction for String longer than MAX_SMALL_STRING_SIZE [#5311](https://github.com/ClickHouse/ClickHouse/pull/5311) ([Azat Khuzhin](https://github.com/azat)) +* Fix error for `Decimal` to `Nullable(Decimal)` conversion in IN. Support other Decimal to Decimal conversions (including different scales). [#5350](https://github.com/ClickHouse/ClickHouse/pull/5350) ([Artem Zuikov](https://github.com/4ertus2)) +* Fixed FPU clobbering in simdjson library that lead to wrong calculation of `uniqHLL` and `uniqCombined` aggregate function and math functions such as `log`. [#5354](https://github.com/ClickHouse/ClickHouse/pull/5354) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed handling mixed const/nonconst cases in JSON functions. [#5435](https://github.com/ClickHouse/ClickHouse/pull/5435) ([Vitaly Baranov](https://github.com/vitlibar)) +* Fix `retention` function. Now all conditions that satisfy in a row of data are added to the data state. [#5119](https://github.com/ClickHouse/ClickHouse/pull/5119) ([小路](https://github.com/nicelulu)) +* Fix result type for `quantileExact` with Decimals. [#5304](https://github.com/ClickHouse/ClickHouse/pull/5304) ([Artem Zuikov](https://github.com/4ertus2)) + +#### Documentation +* Translate documentation for `CollapsingMergeTree` to chinese. [#5168](https://github.com/ClickHouse/ClickHouse/pull/5168) ([张风啸](https://github.com/AlexZFX)) +* Translate some documentation about table engines to chinese. + [#5134](https://github.com/ClickHouse/ClickHouse/pull/5134) + [#5328](https://github.com/ClickHouse/ClickHouse/pull/5328) + ([never lee](https://github.com/neverlee)) + + +#### Build/Testing/Packaging Improvements +* Fix some sanitizer reports that show probable use-after-free.[#5139](https://github.com/ClickHouse/ClickHouse/pull/5139) [#5143](https://github.com/ClickHouse/ClickHouse/pull/5143) [#5393](https://github.com/ClickHouse/ClickHouse/pull/5393) ([Ivan](https://github.com/abyss7)) +* Move performance tests out of separate directories for convenience. [#5158](https://github.com/ClickHouse/ClickHouse/pull/5158) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix incorrect performance tests. [#5255](https://github.com/ClickHouse/ClickHouse/pull/5255) ([alesapin](https://github.com/alesapin)) +* Added a tool to calculate checksums caused by bit flips to debug hardware issues. [#5334](https://github.com/ClickHouse/ClickHouse/pull/5334) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Make runner script more usable. [#5340](https://github.com/ClickHouse/ClickHouse/pull/5340)[#5360](https://github.com/ClickHouse/ClickHouse/pull/5360) ([filimonov](https://github.com/filimonov)) +* Add small instruction how to write performance tests. [#5408](https://github.com/ClickHouse/ClickHouse/pull/5408) ([alesapin](https://github.com/alesapin)) +* Add ability to make substitutions in create, fill and drop query in performance tests [#5367](https://github.com/ClickHouse/ClickHouse/pull/5367) ([Olga Khvostikova](https://github.com/stavrolia)) + +## ClickHouse release 19.7 + +### ClickHouse release 19.7.5.29, 2019-07-05 + +#### Bug Fix +* Fix performance regression in some queries with JOIN. [#5192](https://github.com/ClickHouse/ClickHouse/pull/5192) ([Winter Zhang](https://github.com/zhang2014)) + +### ClickHouse release 19.7.5.27, 2019-06-09 + +#### New features +* Added bitmap related functions `bitmapHasAny` and `bitmapHasAll` analogous to `hasAny` and `hasAll` functions for arrays. [#5279](https://github.com/ClickHouse/ClickHouse/pull/5279) ([Sergi Vladykin](https://github.com/svladykin)) + +#### Bug Fixes +* Fix segfault on `minmax` INDEX with Null value. [#5246](https://github.com/ClickHouse/ClickHouse/pull/5246) ([Nikita Vasilev](https://github.com/nikvas0)) +* Mark all input columns in LIMIT BY as required output. It fixes 'Not found column' error in some distributed queries. [#5407](https://github.com/ClickHouse/ClickHouse/pull/5407) ([Constantin S. Pan](https://github.com/kvap)) +* Fix "Column '0' already exists" error in `SELECT .. PREWHERE` on column with DEFAULT [#5397](https://github.com/ClickHouse/ClickHouse/pull/5397) ([proller](https://github.com/proller)) +* Fix `ALTER MODIFY TTL` query on `ReplicatedMergeTree`. [#5539](https://github.com/ClickHouse/ClickHouse/pull/5539/commits) ([Anton Popov](https://github.com/CurtizJ)) +* Don't crash the server when Kafka consumers have failed to start. [#5285](https://github.com/ClickHouse/ClickHouse/pull/5285) ([Ivan](https://github.com/abyss7)) +* Fixed bitmap functions produce wrong result. [#5359](https://github.com/ClickHouse/ClickHouse/pull/5359) ([Andy Yang](https://github.com/andyyzh)) +* Fix element_count for hashed dictionary (do not include duplicates) [#5440](https://github.com/ClickHouse/ClickHouse/pull/5440) ([Azat Khuzhin](https://github.com/azat)) +* Use contents of environment variable TZ as the name for timezone. It helps to correctly detect default timezone in some cases.[#5443](https://github.com/ClickHouse/ClickHouse/pull/5443) ([Ivan](https://github.com/abyss7)) +* Do not try to convert integers in `dictGetT` functions, because it doesn't work correctly. Throw an exception instead. [#5446](https://github.com/ClickHouse/ClickHouse/pull/5446) ([Artem Zuikov](https://github.com/4ertus2)) +* Fix settings in ExternalData HTTP request. [#5455](https://github.com/ClickHouse/ClickHouse/pull/5455) ([Danila + Kutenin](https://github.com/danlark1)) +* Fix bug when parts were removed only from FS without dropping them from Zookeeper. [#5520](https://github.com/ClickHouse/ClickHouse/pull/5520) ([alesapin](https://github.com/alesapin)) +* Fix segmentation fault in `bitmapHasAny` function. [#5528](https://github.com/ClickHouse/ClickHouse/pull/5528) ([Zhichang Yu](https://github.com/yuzhichang)) +* Fixed error when replication connection pool doesn't retry to resolve host, even when DNS cache was dropped. [#5534](https://github.com/ClickHouse/ClickHouse/pull/5534) ([alesapin](https://github.com/alesapin)) +* Fixed `DROP INDEX IF EXISTS` query. Now `ALTER TABLE ... DROP INDEX IF EXISTS ...` query doesn't raise an exception if provided index does not exist. [#5524](https://github.com/ClickHouse/ClickHouse/pull/5524) ([Gleb Novikov](https://github.com/NanoBjorn)) +* Fix union all supertype column. There were cases with inconsistent data and column types of resulting columns. [#5503](https://github.com/ClickHouse/ClickHouse/pull/5503) ([Artem Zuikov](https://github.com/4ertus2)) +* Skip ZNONODE during DDL query processing. Before if another node removes the znode in task queue, the one that +did not process it, but already get list of children, will terminate the DDLWorker thread. [#5489](https://github.com/ClickHouse/ClickHouse/pull/5489) ([Azat Khuzhin](https://github.com/azat)) +* Fix INSERT into Distributed() table with MATERIALIZED column. [#5429](https://github.com/ClickHouse/ClickHouse/pull/5429) ([Azat Khuzhin](https://github.com/azat)) + +### ClickHouse release 19.7.3.9, 2019-05-30 + +#### New Features +* Allow to limit the range of a setting that can be specified by user. + These constraints can be set up in user settings profile. +[#4931](https://github.com/ClickHouse/ClickHouse/pull/4931) ([Vitaly +Baranov](https://github.com/vitlibar)) +* Add a second version of the function `groupUniqArray` with an optional + `max_size` parameter that limits the size of the resulting array. This +behavior is similar to `groupArray(max_size)(x)` function. +[#5026](https://github.com/ClickHouse/ClickHouse/pull/5026) ([Guillaume +Tassery](https://github.com/YiuRULE)) +* For TSVWithNames/CSVWithNames input file formats, column order can now be + determined from file header. This is controlled by +`input_format_with_names_use_header` parameter. +[#5081](https://github.com/ClickHouse/ClickHouse/pull/5081) +([Alexander](https://github.com/Akazz)) + +#### Bug Fixes +* Crash with uncompressed_cache + JOIN during merge (#5197) +[#5133](https://github.com/ClickHouse/ClickHouse/pull/5133) ([Danila +Kutenin](https://github.com/danlark1)) +* Segmentation fault on a clickhouse-client query to system tables. #5066 +[#5127](https://github.com/ClickHouse/ClickHouse/pull/5127) +([Ivan](https://github.com/abyss7)) +* Data loss on heavy load via KafkaEngine (#4736) +[#5080](https://github.com/ClickHouse/ClickHouse/pull/5080) +([Ivan](https://github.com/abyss7)) +* Fixed very rare data race condition that could happen when executing a query with UNION ALL involving at least two SELECTs from system.columns, system.tables, system.parts, system.parts_tables or tables of Merge family and performing ALTER of columns of the related tables concurrently. [#5189](https://github.com/ClickHouse/ClickHouse/pull/5189) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +#### Performance Improvements +* Use radix sort for sorting by single numeric column in `ORDER BY` without + `LIMIT`. [#5106](https://github.com/ClickHouse/ClickHouse/pull/5106), +[#4439](https://github.com/ClickHouse/ClickHouse/pull/4439) +([Evgenii Pravda](https://github.com/kvinty), +[alexey-milovidov](https://github.com/alexey-milovidov)) + +#### Documentation +* Translate documentation for some table engines to Chinese. + [#5107](https://github.com/ClickHouse/ClickHouse/pull/5107), +[#5094](https://github.com/ClickHouse/ClickHouse/pull/5094), +[#5087](https://github.com/ClickHouse/ClickHouse/pull/5087) +([张风啸](https://github.com/AlexZFX)), +[#5068](https://github.com/ClickHouse/ClickHouse/pull/5068) ([never +lee](https://github.com/neverlee)) + +#### Build/Testing/Packaging Improvements +* Print UTF-8 characters properly in `clickhouse-test`. + [#5084](https://github.com/ClickHouse/ClickHouse/pull/5084) +([alexey-milovidov](https://github.com/alexey-milovidov)) +* Add command line parameter for clickhouse-client to always load suggestion + data. [#5102](https://github.com/ClickHouse/ClickHouse/pull/5102) +([alexey-milovidov](https://github.com/alexey-milovidov)) +* Resolve some of PVS-Studio warnings. + [#5082](https://github.com/ClickHouse/ClickHouse/pull/5082) +([alexey-milovidov](https://github.com/alexey-milovidov)) +* Update LZ4 [#5040](https://github.com/ClickHouse/ClickHouse/pull/5040) ([Danila + Kutenin](https://github.com/danlark1)) +* Add gperf to build requirements for upcoming pull request #5030. + [#5110](https://github.com/ClickHouse/ClickHouse/pull/5110) +([proller](https://github.com/proller)) + +## ClickHouse release 19.6 +### ClickHouse release 19.6.3.18, 2019-06-13 + +#### Bug Fixes +* Fixed IN condition pushdown for queries from table functions `mysql` and `odbc` and corresponding table engines. This fixes #3540 and #2384. [#5313](https://github.com/ClickHouse/ClickHouse/pull/5313) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix deadlock in Zookeeper. [#5297](https://github.com/ClickHouse/ClickHouse/pull/5297) ([github1youlc](https://github.com/github1youlc)) +* Allow quoted decimals in CSV. [#5284](https://github.com/ClickHouse/ClickHouse/pull/5284) ([Artem Zuikov](https://github.com/4ertus2) +* Disallow conversion from float Inf/NaN into Decimals (throw exception). [#5282](https://github.com/ClickHouse/ClickHouse/pull/5282) ([Artem Zuikov](https://github.com/4ertus2)) +* Fix data race in rename query. [#5247](https://github.com/ClickHouse/ClickHouse/pull/5247) ([Winter Zhang](https://github.com/zhang2014)) +* Temporarily disable LFAlloc. Usage of LFAlloc might lead to a lot of MAP_FAILED in allocating UncompressedCache and in a result to crashes of queries at high loaded servers. [cfdba93](https://github.com/ClickHouse/ClickHouse/commit/cfdba938ce22f16efeec504f7f90206a515b1280)([Danila Kutenin](https://github.com/danlark1)) + +### ClickHouse release 19.6.2.11, 2019-05-13 + +#### New Features +* TTL expressions for columns and tables. [#4212](https://github.com/ClickHouse/ClickHouse/pull/4212) ([Anton Popov](https://github.com/CurtizJ)) +* Added support for `brotli` compression for HTTP responses (Accept-Encoding: br) [#4388](https://github.com/ClickHouse/ClickHouse/pull/4388) ([Mikhail](https://github.com/fandyushin)) +* Added new function `isValidUTF8` for checking whether a set of bytes is correctly utf-8 encoded. [#4934](https://github.com/ClickHouse/ClickHouse/pull/4934) ([Danila Kutenin](https://github.com/danlark1)) +* Add new load balancing policy `first_or_random` which sends queries to the first specified host and if it's inaccessible send queries to random hosts of shard. Useful for cross-replication topology setups. [#5012](https://github.com/ClickHouse/ClickHouse/pull/5012) ([nvartolomei](https://github.com/nvartolomei)) + +#### Experimental Features +* Add setting `index_granularity_bytes` (adaptive index granularity) for MergeTree* tables family. [#4826](https://github.com/ClickHouse/ClickHouse/pull/4826) ([alesapin](https://github.com/alesapin)) + +#### Improvements +* Added support for non-constant and negative size and length arguments for function `substringUTF8`. [#4989](https://github.com/ClickHouse/ClickHouse/pull/4989) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Disable push-down to right table in left join, left table in right join, and both tables in full join. This fixes wrong JOIN results in some cases. [#4846](https://github.com/ClickHouse/ClickHouse/pull/4846) ([Ivan](https://github.com/abyss7)) +* `clickhouse-copier`: auto upload task configuration from `--task-file` option [#4876](https://github.com/ClickHouse/ClickHouse/pull/4876) ([proller](https://github.com/proller)) +* Added typos handler for storage factory and table functions factory. [#4891](https://github.com/ClickHouse/ClickHouse/pull/4891) ([Danila Kutenin](https://github.com/danlark1)) +* Support asterisks and qualified asterisks for multiple joins without subqueries [#4898](https://github.com/ClickHouse/ClickHouse/pull/4898) ([Artem Zuikov](https://github.com/4ertus2)) +* Make missing column error message more user friendly. [#4915](https://github.com/ClickHouse/ClickHouse/pull/4915) ([Artem Zuikov](https://github.com/4ertus2)) + +#### Performance Improvements +* Significant speedup of ASOF JOIN [#4924](https://github.com/ClickHouse/ClickHouse/pull/4924) ([Martijn Bakker](https://github.com/Gladdy)) + +#### Backward Incompatible Changes +* HTTP header `Query-Id` was renamed to `X-ClickHouse-Query-Id` for consistency. [#4972](https://github.com/ClickHouse/ClickHouse/pull/4972) ([Mikhail](https://github.com/fandyushin)) + +#### Bug Fixes +* Fixed potential null pointer dereference in `clickhouse-copier`. [#4900](https://github.com/ClickHouse/ClickHouse/pull/4900) ([proller](https://github.com/proller)) +* Fixed error on query with JOIN + ARRAY JOIN [#4938](https://github.com/ClickHouse/ClickHouse/pull/4938) ([Artem Zuikov](https://github.com/4ertus2)) +* Fixed hanging on start of the server when a dictionary depends on another dictionary via a database with engine=Dictionary. [#4962](https://github.com/ClickHouse/ClickHouse/pull/4962) ([Vitaly Baranov](https://github.com/vitlibar)) +* Partially fix distributed_product_mode = local. It's possible to allow columns of local tables in where/having/order by/... via table aliases. Throw exception if table does not have alias. There's not possible to access to the columns without table aliases yet. [#4986](https://github.com/ClickHouse/ClickHouse/pull/4986) ([Artem Zuikov](https://github.com/4ertus2)) +* Fix potentially wrong result for `SELECT DISTINCT` with `JOIN` [#5001](https://github.com/ClickHouse/ClickHouse/pull/5001) ([Artem Zuikov](https://github.com/4ertus2)) +* Fixed very rare data race condition that could happen when executing a query with UNION ALL involving at least two SELECTs from system.columns, system.tables, system.parts, system.parts_tables or tables of Merge family and performing ALTER of columns of the related tables concurrently. [#5189](https://github.com/ClickHouse/ClickHouse/pull/5189) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +#### Build/Testing/Packaging Improvements +* Fixed test failures when running clickhouse-server on different host [#4713](https://github.com/ClickHouse/ClickHouse/pull/4713) ([Vasily Nemkov](https://github.com/Enmk)) +* clickhouse-test: Disable color control sequences in non tty environment. [#4937](https://github.com/ClickHouse/ClickHouse/pull/4937) ([alesapin](https://github.com/alesapin)) +* clickhouse-test: Allow use any test database (remove `test.` qualification where it possible) [#5008](https://github.com/ClickHouse/ClickHouse/pull/5008) ([proller](https://github.com/proller)) +* Fix ubsan errors [#5037](https://github.com/ClickHouse/ClickHouse/pull/5037) ([Vitaly Baranov](https://github.com/vitlibar)) +* Yandex LFAlloc was added to ClickHouse to allocate MarkCache and UncompressedCache data in different ways to catch segfaults more reliable [#4995](https://github.com/ClickHouse/ClickHouse/pull/4995) ([Danila Kutenin](https://github.com/danlark1)) +* Python util to help with backports and changelogs. [#4949](https://github.com/ClickHouse/ClickHouse/pull/4949) ([Ivan](https://github.com/abyss7)) + + +## ClickHouse release 19.5 +### ClickHouse release 19.5.4.22, 2019-05-13 + +#### Bug fixes +* Fixed possible crash in bitmap* functions [#5220](https://github.com/ClickHouse/ClickHouse/pull/5220) [#5228](https://github.com/ClickHouse/ClickHouse/pull/5228) ([Andy Yang](https://github.com/andyyzh)) +* Fixed very rare data race condition that could happen when executing a query with UNION ALL involving at least two SELECTs from system.columns, system.tables, system.parts, system.parts_tables or tables of Merge family and performing ALTER of columns of the related tables concurrently. [#5189](https://github.com/ClickHouse/ClickHouse/pull/5189) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed error `Set for IN is not created yet in case of using single LowCardinality column in the left part of IN`. This error happened if LowCardinality column was the part of primary key. #5031 [#5154](https://github.com/ClickHouse/ClickHouse/pull/5154) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Modification of retention function: If a row satisfies both the first and NTH condition, only the first satisfied condition is added to the data state. Now all conditions that satisfy in a row of data are added to the data state. [#5119](https://github.com/ClickHouse/ClickHouse/pull/5119) ([小路](https://github.com/nicelulu)) + + +### ClickHouse release 19.5.3.8, 2019-04-18 + +#### Bug fixes +* Fixed type of setting `max_partitions_per_insert_block` from boolean to UInt64. [#5028](https://github.com/ClickHouse/ClickHouse/pull/5028) ([Mohammad Hossein Sekhavat](https://github.com/mhsekhavat)) + + +### ClickHouse release 19.5.2.6, 2019-04-15 + +#### New Features + +* [Hyperscan](https://github.com/intel/hyperscan) multiple regular expression matching was added (functions `multiMatchAny`, `multiMatchAnyIndex`, `multiFuzzyMatchAny`, `multiFuzzyMatchAnyIndex`). [#4780](https://github.com/ClickHouse/ClickHouse/pull/4780), [#4841](https://github.com/ClickHouse/ClickHouse/pull/4841) ([Danila Kutenin](https://github.com/danlark1)) +* `multiSearchFirstPosition` function was added. [#4780](https://github.com/ClickHouse/ClickHouse/pull/4780) ([Danila Kutenin](https://github.com/danlark1)) +* Implement the predefined expression filter per row for tables. [#4792](https://github.com/ClickHouse/ClickHouse/pull/4792) ([Ivan](https://github.com/abyss7)) +* A new type of data skipping indices based on bloom filters (can be used for `equal`, `in` and `like` functions). [#4499](https://github.com/ClickHouse/ClickHouse/pull/4499) ([Nikita Vasilev](https://github.com/nikvas0)) +* Added `ASOF JOIN` which allows to run queries that join to the most recent value known. [#4774](https://github.com/ClickHouse/ClickHouse/pull/4774) [#4867](https://github.com/ClickHouse/ClickHouse/pull/4867) [#4863](https://github.com/ClickHouse/ClickHouse/pull/4863) [#4875](https://github.com/ClickHouse/ClickHouse/pull/4875) ([Martijn Bakker](https://github.com/Gladdy), [Artem Zuikov](https://github.com/4ertus2)) +* Rewrite multiple `COMMA JOIN` to `CROSS JOIN`. Then rewrite them to `INNER JOIN` if possible. [#4661](https://github.com/ClickHouse/ClickHouse/pull/4661) ([Artem Zuikov](https://github.com/4ertus2)) + +#### Improvement + +* `topK` and `topKWeighted` now supports custom `loadFactor` (fixes issue [#4252](https://github.com/ClickHouse/ClickHouse/issues/4252)). [#4634](https://github.com/ClickHouse/ClickHouse/pull/4634) ([Kirill Danshin](https://github.com/kirillDanshin)) +* Allow to use `parallel_replicas_count > 1` even for tables without sampling (the setting is simply ignored for them). In previous versions it was lead to exception. [#4637](https://github.com/ClickHouse/ClickHouse/pull/4637) ([Alexey Elymanov](https://github.com/digitalist)) +* Support for `CREATE OR REPLACE VIEW`. Allow to create a view or set a new definition in a single statement. [#4654](https://github.com/ClickHouse/ClickHouse/pull/4654) ([Boris Granveaud](https://github.com/bgranvea)) +* `Buffer` table engine now supports `PREWHERE`. [#4671](https://github.com/ClickHouse/ClickHouse/pull/4671) ([Yangkuan Liu](https://github.com/LiuYangkuan)) +* Add ability to start replicated table without metadata in zookeeper in `readonly` mode. [#4691](https://github.com/ClickHouse/ClickHouse/pull/4691) ([alesapin](https://github.com/alesapin)) +* Fixed flicker of progress bar in clickhouse-client. The issue was most noticeable when using `FORMAT Null` with streaming queries. [#4811](https://github.com/ClickHouse/ClickHouse/pull/4811) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Allow to disable functions with `hyperscan` library on per user basis to limit potentially excessive and uncontrolled resource usage. [#4816](https://github.com/ClickHouse/ClickHouse/pull/4816) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Add version number logging in all errors. [#4824](https://github.com/ClickHouse/ClickHouse/pull/4824) ([proller](https://github.com/proller)) +* Added restriction to the `multiMatch` functions which requires string size to fit into `unsigned int`. Also added the number of arguments limit to the `multiSearch` functions. [#4834](https://github.com/ClickHouse/ClickHouse/pull/4834) ([Danila Kutenin](https://github.com/danlark1)) +* Improved usage of scratch space and error handling in Hyperscan. [#4866](https://github.com/ClickHouse/ClickHouse/pull/4866) ([Danila Kutenin](https://github.com/danlark1)) +* Fill `system.graphite_detentions` from a table config of `*GraphiteMergeTree` engine tables. [#4584](https://github.com/ClickHouse/ClickHouse/pull/4584) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) +* Rename `trigramDistance` function to `ngramDistance` and add more functions with `CaseInsensitive` and `UTF`. [#4602](https://github.com/ClickHouse/ClickHouse/pull/4602) ([Danila Kutenin](https://github.com/danlark1)) +* Improved data skipping indices calculation. [#4640](https://github.com/ClickHouse/ClickHouse/pull/4640) ([Nikita Vasilev](https://github.com/nikvas0)) +* Keep ordinary, `DEFAULT`, `MATERIALIZED` and `ALIAS` columns in a single list (fixes issue [#2867](https://github.com/ClickHouse/ClickHouse/issues/2867)). [#4707](https://github.com/ClickHouse/ClickHouse/pull/4707) ([Alex Zatelepin](https://github.com/ztlpn)) + +#### Bug Fix + +* Avoid `std::terminate` in case of memory allocation failure. Now `std::bad_alloc` exception is thrown as expected. [#4665](https://github.com/ClickHouse/ClickHouse/pull/4665) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixes capnproto reading from buffer. Sometimes files wasn't loaded successfully by HTTP. [#4674](https://github.com/ClickHouse/ClickHouse/pull/4674) ([Vladislav](https://github.com/smirnov-vs)) +* Fix error `Unknown log entry type: 0` after `OPTIMIZE TABLE FINAL` query. [#4683](https://github.com/ClickHouse/ClickHouse/pull/4683) ([Amos Bird](https://github.com/amosbird)) +* Wrong arguments to `hasAny` or `hasAll` functions may lead to segfault. [#4698](https://github.com/ClickHouse/ClickHouse/pull/4698) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Deadlock may happen while executing `DROP DATABASE dictionary` query. [#4701](https://github.com/ClickHouse/ClickHouse/pull/4701) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix undefined behavior in `median` and `quantile` functions. [#4702](https://github.com/ClickHouse/ClickHouse/pull/4702) ([hcz](https://github.com/hczhcz)) +* Fix compression level detection when `network_compression_method` in lowercase. Broken in v19.1. [#4706](https://github.com/ClickHouse/ClickHouse/pull/4706) ([proller](https://github.com/proller)) +* Fixed ignorance of `UTC` setting (fixes issue [#4658](https://github.com/ClickHouse/ClickHouse/issues/4658)). [#4718](https://github.com/ClickHouse/ClickHouse/pull/4718) ([proller](https://github.com/proller)) +* Fix `histogram` function behaviour with `Distributed` tables. [#4741](https://github.com/ClickHouse/ClickHouse/pull/4741) ([olegkv](https://github.com/olegkv)) +* Fixed tsan report `destroy of a locked mutex`. [#4742](https://github.com/ClickHouse/ClickHouse/pull/4742) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed TSan report on shutdown due to race condition in system logs usage. Fixed potential use-after-free on shutdown when part_log is enabled. [#4758](https://github.com/ClickHouse/ClickHouse/pull/4758) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix recheck parts in `ReplicatedMergeTreeAlterThread` in case of error. [#4772](https://github.com/ClickHouse/ClickHouse/pull/4772) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Arithmetic operations on intermediate aggregate function states were not working for constant arguments (such as subquery results). [#4776](https://github.com/ClickHouse/ClickHouse/pull/4776) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Always backquote column names in metadata. Otherwise it's impossible to create a table with column named `index` (server won't restart due to malformed `ATTACH` query in metadata). [#4782](https://github.com/ClickHouse/ClickHouse/pull/4782) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix crash in `ALTER ... MODIFY ORDER BY` on `Distributed` table. [#4790](https://github.com/ClickHouse/ClickHouse/pull/4790) ([TCeason](https://github.com/TCeason)) +* Fix segfault in `JOIN ON` with enabled `enable_optimize_predicate_expression`. [#4794](https://github.com/ClickHouse/ClickHouse/pull/4794) ([Winter Zhang](https://github.com/zhang2014)) +* Fix bug with adding an extraneous row after consuming a protobuf message from Kafka. [#4808](https://github.com/ClickHouse/ClickHouse/pull/4808) ([Vitaly Baranov](https://github.com/vitlibar)) +* Fix crash of `JOIN` on not-nullable vs nullable column. Fix `NULLs` in right keys in `ANY JOIN` + `join_use_nulls`. [#4815](https://github.com/ClickHouse/ClickHouse/pull/4815) ([Artem Zuikov](https://github.com/4ertus2)) +* Fix segmentation fault in `clickhouse-copier`. [#4835](https://github.com/ClickHouse/ClickHouse/pull/4835) ([proller](https://github.com/proller)) +* Fixed race condition in `SELECT` from `system.tables` if the table is renamed or altered concurrently. [#4836](https://github.com/ClickHouse/ClickHouse/pull/4836) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed data race when fetching data part that is already obsolete. [#4839](https://github.com/ClickHouse/ClickHouse/pull/4839) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed rare data race that can happen during `RENAME` table of MergeTree family. [#4844](https://github.com/ClickHouse/ClickHouse/pull/4844) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed segmentation fault in function `arrayIntersect`. Segmentation fault could happen if function was called with mixed constant and ordinary arguments. [#4847](https://github.com/ClickHouse/ClickHouse/pull/4847) ([Lixiang Qian](https://github.com/fancyqlx)) +* Fixed reading from `Array(LowCardinality)` column in rare case when column contained a long sequence of empty arrays. [#4850](https://github.com/ClickHouse/ClickHouse/pull/4850) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Fix crash in `FULL/RIGHT JOIN` when we joining on nullable vs not nullable. [#4855](https://github.com/ClickHouse/ClickHouse/pull/4855) ([Artem Zuikov](https://github.com/4ertus2)) +* Fix `No message received` exception while fetching parts between replicas. [#4856](https://github.com/ClickHouse/ClickHouse/pull/4856) ([alesapin](https://github.com/alesapin)) +* Fixed `arrayIntersect` function wrong result in case of several repeated values in single array. [#4871](https://github.com/ClickHouse/ClickHouse/pull/4871) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Fix a race condition during concurrent `ALTER COLUMN` queries that could lead to a server crash (fixes issue [#3421](https://github.com/ClickHouse/ClickHouse/issues/3421)). [#4592](https://github.com/ClickHouse/ClickHouse/pull/4592) ([Alex Zatelepin](https://github.com/ztlpn)) +* Fix incorrect result in `FULL/RIGHT JOIN` with const column. [#4723](https://github.com/ClickHouse/ClickHouse/pull/4723) ([Artem Zuikov](https://github.com/4ertus2)) +* Fix duplicates in `GLOBAL JOIN` with asterisk. [#4705](https://github.com/ClickHouse/ClickHouse/pull/4705) ([Artem Zuikov](https://github.com/4ertus2)) +* Fix parameter deduction in `ALTER MODIFY` of column `CODEC` when column type is not specified. [#4883](https://github.com/ClickHouse/ClickHouse/pull/4883) ([alesapin](https://github.com/alesapin)) +* Functions `cutQueryStringAndFragment()` and `queryStringAndFragment()` now works correctly when `URL` contains a fragment and no query. [#4894](https://github.com/ClickHouse/ClickHouse/pull/4894) ([Vitaly Baranov](https://github.com/vitlibar)) +* Fix rare bug when setting `min_bytes_to_use_direct_io` is greater than zero, which occures when thread have to seek backward in column file. [#4897](https://github.com/ClickHouse/ClickHouse/pull/4897) ([alesapin](https://github.com/alesapin)) +* Fix wrong argument types for aggregate functions with `LowCardinality` arguments (fixes issue [#4919](https://github.com/ClickHouse/ClickHouse/issues/4919)). [#4922](https://github.com/ClickHouse/ClickHouse/pull/4922) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Fix wrong name qualification in `GLOBAL JOIN`. [#4969](https://github.com/ClickHouse/ClickHouse/pull/4969) ([Artem Zuikov](https://github.com/4ertus2)) +* Fix function `toISOWeek` result for year 1970. [#4988](https://github.com/ClickHouse/ClickHouse/pull/4988) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix `DROP`, `TRUNCATE` and `OPTIMIZE` queries duplication, when executed on `ON CLUSTER` for `ReplicatedMergeTree*` tables family. [#4991](https://github.com/ClickHouse/ClickHouse/pull/4991) ([alesapin](https://github.com/alesapin)) + +#### Backward Incompatible Change + +* Rename setting `insert_sample_with_metadata` to setting `input_format_defaults_for_omitted_fields`. [#4771](https://github.com/ClickHouse/ClickHouse/pull/4771) ([Artem Zuikov](https://github.com/4ertus2)) +* Added setting `max_partitions_per_insert_block` (with value 100 by default). If inserted block contains larger number of partitions, an exception is thrown. Set it to 0 if you want to remove the limit (not recommended). [#4845](https://github.com/ClickHouse/ClickHouse/pull/4845) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Multi-search functions were renamed (`multiPosition` to `multiSearchAllPositions`, `multiSearch` to `multiSearchAny`, `firstMatch` to `multiSearchFirstIndex`). [#4780](https://github.com/ClickHouse/ClickHouse/pull/4780) ([Danila Kutenin](https://github.com/danlark1)) + +#### Performance Improvement + +* Optimize Volnitsky searcher by inlining, giving about 5-10% search improvement for queries with many needles or many similar bigrams. [#4862](https://github.com/ClickHouse/ClickHouse/pull/4862) ([Danila Kutenin](https://github.com/danlark1)) +* Fix performance issue when setting `use_uncompressed_cache` is greater than zero, which appeared when all read data contained in cache. [#4913](https://github.com/ClickHouse/ClickHouse/pull/4913) ([alesapin](https://github.com/alesapin)) + + +#### Build/Testing/Packaging Improvement + +* Hardening debug build: more granular memory mappings and ASLR; add memory protection for mark cache and index. This allows to find more memory stomping bugs in case when ASan and MSan cannot do it. [#4632](https://github.com/ClickHouse/ClickHouse/pull/4632) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Add support for cmake variables `ENABLE_PROTOBUF`, `ENABLE_PARQUET` and `ENABLE_BROTLI` which allows to enable/disable the above features (same as we can do for librdkafka, mysql, etc). [#4669](https://github.com/ClickHouse/ClickHouse/pull/4669) ([Silviu Caragea](https://github.com/silviucpp)) +* Add ability to print process list and stacktraces of all threads if some queries are hung after test run. [#4675](https://github.com/ClickHouse/ClickHouse/pull/4675) ([alesapin](https://github.com/alesapin)) +* Add retries on `Connection loss` error in `clickhouse-test`. [#4682](https://github.com/ClickHouse/ClickHouse/pull/4682) ([alesapin](https://github.com/alesapin)) +* Add freebsd build with vagrant and build with thread sanitizer to packager script. [#4712](https://github.com/ClickHouse/ClickHouse/pull/4712) [#4748](https://github.com/ClickHouse/ClickHouse/pull/4748) ([alesapin](https://github.com/alesapin)) +* Now user asked for password for user `'default'` during installation. [#4725](https://github.com/ClickHouse/ClickHouse/pull/4725) ([proller](https://github.com/proller)) +* Suppress warning in `rdkafka` library. [#4740](https://github.com/ClickHouse/ClickHouse/pull/4740) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Allow ability to build without ssl. [#4750](https://github.com/ClickHouse/ClickHouse/pull/4750) ([proller](https://github.com/proller)) +* Add a way to launch clickhouse-server image from a custom user. [#4753](https://github.com/ClickHouse/ClickHouse/pull/4753) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) +* Upgrade contrib boost to 1.69. [#4793](https://github.com/ClickHouse/ClickHouse/pull/4793) ([proller](https://github.com/proller)) +* Disable usage of `mremap` when compiled with Thread Sanitizer. Surprisingly enough, TSan does not intercept `mremap` (though it does intercept `mmap`, `munmap`) that leads to false positives. Fixed TSan report in stateful tests. [#4859](https://github.com/ClickHouse/ClickHouse/pull/4859) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Add test checking using format schema via HTTP interface. [#4864](https://github.com/ClickHouse/ClickHouse/pull/4864) ([Vitaly Baranov](https://github.com/vitlibar)) + +## ClickHouse release 19.4 +### ClickHouse release 19.4.4.33, 2019-04-17 + +#### Bug Fixes + +* Avoid `std::terminate` in case of memory allocation failure. Now `std::bad_alloc` exception is thrown as expected. [#4665](https://github.com/ClickHouse/ClickHouse/pull/4665) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixes capnproto reading from buffer. Sometimes files wasn't loaded successfully by HTTP. [#4674](https://github.com/ClickHouse/ClickHouse/pull/4674) ([Vladislav](https://github.com/smirnov-vs)) +* Fix error `Unknown log entry type: 0` after `OPTIMIZE TABLE FINAL` query. [#4683](https://github.com/ClickHouse/ClickHouse/pull/4683) ([Amos Bird](https://github.com/amosbird)) +* Wrong arguments to `hasAny` or `hasAll` functions may lead to segfault. [#4698](https://github.com/ClickHouse/ClickHouse/pull/4698) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Deadlock may happen while executing `DROP DATABASE dictionary` query. [#4701](https://github.com/ClickHouse/ClickHouse/pull/4701) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix undefined behavior in `median` and `quantile` functions. [#4702](https://github.com/ClickHouse/ClickHouse/pull/4702) ([hcz](https://github.com/hczhcz)) +* Fix compression level detection when `network_compression_method` in lowercase. Broken in v19.1. [#4706](https://github.com/ClickHouse/ClickHouse/pull/4706) ([proller](https://github.com/proller)) +* Fixed ignorance of `UTC` setting (fixes issue [#4658](https://github.com/ClickHouse/ClickHouse/issues/4658)). [#4718](https://github.com/ClickHouse/ClickHouse/pull/4718) ([proller](https://github.com/proller)) +* Fix `histogram` function behaviour with `Distributed` tables. [#4741](https://github.com/ClickHouse/ClickHouse/pull/4741) ([olegkv](https://github.com/olegkv)) +* Fixed tsan report `destroy of a locked mutex`. [#4742](https://github.com/ClickHouse/ClickHouse/pull/4742) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed TSan report on shutdown due to race condition in system logs usage. Fixed potential use-after-free on shutdown when part_log is enabled. [#4758](https://github.com/ClickHouse/ClickHouse/pull/4758) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix recheck parts in `ReplicatedMergeTreeAlterThread` in case of error. [#4772](https://github.com/ClickHouse/ClickHouse/pull/4772) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Arithmetic operations on intermediate aggregate function states were not working for constant arguments (such as subquery results). [#4776](https://github.com/ClickHouse/ClickHouse/pull/4776) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Always backquote column names in metadata. Otherwise it's impossible to create a table with column named `index` (server won't restart due to malformed `ATTACH` query in metadata). [#4782](https://github.com/ClickHouse/ClickHouse/pull/4782) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix crash in `ALTER ... MODIFY ORDER BY` on `Distributed` table. [#4790](https://github.com/ClickHouse/ClickHouse/pull/4790) ([TCeason](https://github.com/TCeason)) +* Fix segfault in `JOIN ON` with enabled `enable_optimize_predicate_expression`. [#4794](https://github.com/ClickHouse/ClickHouse/pull/4794) ([Winter Zhang](https://github.com/zhang2014)) +* Fix bug with adding an extraneous row after consuming a protobuf message from Kafka. [#4808](https://github.com/ClickHouse/ClickHouse/pull/4808) ([Vitaly Baranov](https://github.com/vitlibar)) +* Fix segmentation fault in `clickhouse-copier`. [#4835](https://github.com/ClickHouse/ClickHouse/pull/4835) ([proller](https://github.com/proller)) +* Fixed race condition in `SELECT` from `system.tables` if the table is renamed or altered concurrently. [#4836](https://github.com/ClickHouse/ClickHouse/pull/4836) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed data race when fetching data part that is already obsolete. [#4839](https://github.com/ClickHouse/ClickHouse/pull/4839) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed rare data race that can happen during `RENAME` table of MergeTree family. [#4844](https://github.com/ClickHouse/ClickHouse/pull/4844) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed segmentation fault in function `arrayIntersect`. Segmentation fault could happen if function was called with mixed constant and ordinary arguments. [#4847](https://github.com/ClickHouse/ClickHouse/pull/4847) ([Lixiang Qian](https://github.com/fancyqlx)) +* Fixed reading from `Array(LowCardinality)` column in rare case when column contained a long sequence of empty arrays. [#4850](https://github.com/ClickHouse/ClickHouse/pull/4850) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Fix `No message received` exception while fetching parts between replicas. [#4856](https://github.com/ClickHouse/ClickHouse/pull/4856) ([alesapin](https://github.com/alesapin)) +* Fixed `arrayIntersect` function wrong result in case of several repeated values in single array. [#4871](https://github.com/ClickHouse/ClickHouse/pull/4871) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Fix a race condition during concurrent `ALTER COLUMN` queries that could lead to a server crash (fixes issue [#3421](https://github.com/ClickHouse/ClickHouse/issues/3421)). [#4592](https://github.com/ClickHouse/ClickHouse/pull/4592) ([Alex Zatelepin](https://github.com/ztlpn)) +* Fix parameter deduction in `ALTER MODIFY` of column `CODEC` when column type is not specified. [#4883](https://github.com/ClickHouse/ClickHouse/pull/4883) ([alesapin](https://github.com/alesapin)) +* Functions `cutQueryStringAndFragment()` and `queryStringAndFragment()` now works correctly when `URL` contains a fragment and no query. [#4894](https://github.com/ClickHouse/ClickHouse/pull/4894) ([Vitaly Baranov](https://github.com/vitlibar)) +* Fix rare bug when setting `min_bytes_to_use_direct_io` is greater than zero, which occures when thread have to seek backward in column file. [#4897](https://github.com/ClickHouse/ClickHouse/pull/4897) ([alesapin](https://github.com/alesapin)) +* Fix wrong argument types for aggregate functions with `LowCardinality` arguments (fixes issue [#4919](https://github.com/ClickHouse/ClickHouse/issues/4919)). [#4922](https://github.com/ClickHouse/ClickHouse/pull/4922) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Fix function `toISOWeek` result for year 1970. [#4988](https://github.com/ClickHouse/ClickHouse/pull/4988) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix `DROP`, `TRUNCATE` and `OPTIMIZE` queries duplication, when executed on `ON CLUSTER` for `ReplicatedMergeTree*` tables family. [#4991](https://github.com/ClickHouse/ClickHouse/pull/4991) ([alesapin](https://github.com/alesapin)) + +#### Improvements + +* Keep ordinary, `DEFAULT`, `MATERIALIZED` and `ALIAS` columns in a single list (fixes issue [#2867](https://github.com/ClickHouse/ClickHouse/issues/2867)). [#4707](https://github.com/ClickHouse/ClickHouse/pull/4707) ([Alex Zatelepin](https://github.com/ztlpn)) + +### ClickHouse release 19.4.3.11, 2019-04-02 + +#### Bug Fixes + +* Fix crash in `FULL/RIGHT JOIN` when we joining on nullable vs not nullable. [#4855](https://github.com/ClickHouse/ClickHouse/pull/4855) ([Artem Zuikov](https://github.com/4ertus2)) +* Fix segmentation fault in `clickhouse-copier`. [#4835](https://github.com/ClickHouse/ClickHouse/pull/4835) ([proller](https://github.com/proller)) + +#### Build/Testing/Packaging Improvement + +* Add a way to launch clickhouse-server image from a custom user. [#4753](https://github.com/ClickHouse/ClickHouse/pull/4753) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) + +### ClickHouse release 19.4.2.7, 2019-03-30 + +#### Bug Fixes +* Fixed reading from `Array(LowCardinality)` column in rare case when column contained a long sequence of empty arrays. [#4850](https://github.com/ClickHouse/ClickHouse/pull/4850) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) + +### ClickHouse release 19.4.1.3, 2019-03-19 + +#### Bug Fixes +* Fixed remote queries which contain both `LIMIT BY` and `LIMIT`. Previously, if `LIMIT BY` and `LIMIT` were used for remote query, `LIMIT` could happen before `LIMIT BY`, which led to too filtered result. [#4708](https://github.com/ClickHouse/ClickHouse/pull/4708) ([Constantin S. Pan](https://github.com/kvap)) + +### ClickHouse release 19.4.0.49, 2019-03-09 + +#### New Features +* Added full support for `Protobuf` format (input and output, nested data structures). [#4174](https://github.com/ClickHouse/ClickHouse/pull/4174) [#4493](https://github.com/ClickHouse/ClickHouse/pull/4493) ([Vitaly Baranov](https://github.com/vitlibar)) +* Added bitmap functions with Roaring Bitmaps. [#4207](https://github.com/ClickHouse/ClickHouse/pull/4207) ([Andy Yang](https://github.com/andyyzh)) [#4568](https://github.com/ClickHouse/ClickHouse/pull/4568) ([Vitaly Baranov](https://github.com/vitlibar)) +* Parquet format support. [#4448](https://github.com/ClickHouse/ClickHouse/pull/4448) ([proller](https://github.com/proller)) +* N-gram distance was added for fuzzy string comparison. It is similar to q-gram metrics in R language. [#4466](https://github.com/ClickHouse/ClickHouse/pull/4466) ([Danila Kutenin](https://github.com/danlark1)) +* Combine rules for graphite rollup from dedicated aggregation and retention patterns. [#4426](https://github.com/ClickHouse/ClickHouse/pull/4426) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) +* Added `max_execution_speed` and `max_execution_speed_bytes` to limit resource usage. Added `min_execution_speed_bytes` setting to complement the `min_execution_speed`. [#4430](https://github.com/ClickHouse/ClickHouse/pull/4430) ([Winter Zhang](https://github.com/zhang2014)) +* Implemented function `flatten`. [#4555](https://github.com/ClickHouse/ClickHouse/pull/4555) [#4409](https://github.com/ClickHouse/ClickHouse/pull/4409) ([alexey-milovidov](https://github.com/alexey-milovidov), [kzon](https://github.com/kzon)) +* Added functions `arrayEnumerateDenseRanked` and `arrayEnumerateUniqRanked` (it's like `arrayEnumerateUniq` but allows to fine tune array depth to look inside multidimensional arrays). [#4475](https://github.com/ClickHouse/ClickHouse/pull/4475) ([proller](https://github.com/proller)) [#4601](https://github.com/ClickHouse/ClickHouse/pull/4601) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Multiple JOINS with some restrictions: no asterisks, no complex aliases in ON/WHERE/GROUP BY/... [#4462](https://github.com/ClickHouse/ClickHouse/pull/4462) ([Artem Zuikov](https://github.com/4ertus2)) + +#### Bug Fixes +* This release also contains all bug fixes from 19.3 and 19.1. +* Fixed bug in data skipping indices: order of granules after INSERT was incorrect. [#4407](https://github.com/ClickHouse/ClickHouse/pull/4407) ([Nikita Vasilev](https://github.com/nikvas0)) +* Fixed `set` index for `Nullable` and `LowCardinality` columns. Before it, `set` index with `Nullable` or `LowCardinality` column led to error `Data type must be deserialized with multiple streams` while selecting. [#4594](https://github.com/ClickHouse/ClickHouse/pull/4594) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Correctly set update_time on full `executable` dictionary update. [#4551](https://github.com/ClickHouse/ClickHouse/pull/4551) ([Tema Novikov](https://github.com/temoon)) +* Fix broken progress bar in 19.3. [#4627](https://github.com/ClickHouse/ClickHouse/pull/4627) ([filimonov](https://github.com/filimonov)) +* Fixed inconsistent values of MemoryTracker when memory region was shrinked, in certain cases. [#4619](https://github.com/ClickHouse/ClickHouse/pull/4619) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed undefined behaviour in ThreadPool. [#4612](https://github.com/ClickHouse/ClickHouse/pull/4612) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed a very rare crash with the message `mutex lock failed: Invalid argument` that could happen when a MergeTree table was dropped concurrently with a SELECT. [#4608](https://github.com/ClickHouse/ClickHouse/pull/4608) ([Alex Zatelepin](https://github.com/ztlpn)) +* ODBC driver compatibility with `LowCardinality` data type. [#4381](https://github.com/ClickHouse/ClickHouse/pull/4381) ([proller](https://github.com/proller)) +* FreeBSD: Fixup for `AIOcontextPool: Found io_event with unknown id 0` error. [#4438](https://github.com/ClickHouse/ClickHouse/pull/4438) ([urgordeadbeef](https://github.com/urgordeadbeef)) +* `system.part_log` table was created regardless to configuration. [#4483](https://github.com/ClickHouse/ClickHouse/pull/4483) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix undefined behaviour in `dictIsIn` function for cache dictionaries. [#4515](https://github.com/ClickHouse/ClickHouse/pull/4515) ([alesapin](https://github.com/alesapin)) +* Fixed a deadlock when a SELECT query locks the same table multiple times (e.g. from different threads or when executing multiple subqueries) and there is a concurrent DDL query. [#4535](https://github.com/ClickHouse/ClickHouse/pull/4535) ([Alex Zatelepin](https://github.com/ztlpn)) +* Disable compile_expressions by default until we get own `llvm` contrib and can test it with `clang` and `asan`. [#4579](https://github.com/ClickHouse/ClickHouse/pull/4579) ([alesapin](https://github.com/alesapin)) +* Prevent `std::terminate` when `invalidate_query` for `clickhouse` external dictionary source has returned wrong resultset (empty or more than one row or more than one column). Fixed issue when the `invalidate_query` was performed every five seconds regardless to the `lifetime`. [#4583](https://github.com/ClickHouse/ClickHouse/pull/4583) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Avoid deadlock when the `invalidate_query` for a dictionary with `clickhouse` source was involving `system.dictionaries` table or `Dictionaries` database (rare case). [#4599](https://github.com/ClickHouse/ClickHouse/pull/4599) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixes for CROSS JOIN with empty WHERE. [#4598](https://github.com/ClickHouse/ClickHouse/pull/4598) ([Artem Zuikov](https://github.com/4ertus2)) +* Fixed segfault in function "replicate" when constant argument is passed. [#4603](https://github.com/ClickHouse/ClickHouse/pull/4603) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix lambda function with predicate optimizer. [#4408](https://github.com/ClickHouse/ClickHouse/pull/4408) ([Winter Zhang](https://github.com/zhang2014)) +* Multiple JOINs multiple fixes. [#4595](https://github.com/ClickHouse/ClickHouse/pull/4595) ([Artem Zuikov](https://github.com/4ertus2)) + +#### Improvements +* Support aliases in JOIN ON section for right table columns. [#4412](https://github.com/ClickHouse/ClickHouse/pull/4412) ([Artem Zuikov](https://github.com/4ertus2)) +* Result of multiple JOINs need correct result names to be used in subselects. Replace flat aliases with source names in result. [#4474](https://github.com/ClickHouse/ClickHouse/pull/4474) ([Artem Zuikov](https://github.com/4ertus2)) +* Improve push-down logic for joined statements. [#4387](https://github.com/ClickHouse/ClickHouse/pull/4387) ([Ivan](https://github.com/abyss7)) + +#### Performance Improvements +* Improved heuristics of "move to PREWHERE" optimization. [#4405](https://github.com/ClickHouse/ClickHouse/pull/4405) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Use proper lookup tables that uses HashTable's API for 8-bit and 16-bit keys. [#4536](https://github.com/ClickHouse/ClickHouse/pull/4536) ([Amos Bird](https://github.com/amosbird)) +* Improved performance of string comparison. [#4564](https://github.com/ClickHouse/ClickHouse/pull/4564) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Cleanup distributed DDL queue in a separate thread so that it doesn't slow down the main loop that processes distributed DDL tasks. [#4502](https://github.com/ClickHouse/ClickHouse/pull/4502) ([Alex Zatelepin](https://github.com/ztlpn)) +* When `min_bytes_to_use_direct_io` is set to 1, not every file was opened with O_DIRECT mode because the data size to read was sometimes underestimated by the size of one compressed block. [#4526](https://github.com/ClickHouse/ClickHouse/pull/4526) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +#### Build/Testing/Packaging Improvement +* Added support for clang-9 [#4604](https://github.com/ClickHouse/ClickHouse/pull/4604) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix wrong `__asm__` instructions (again) [#4621](https://github.com/ClickHouse/ClickHouse/pull/4621) ([Konstantin Podshumok](https://github.com/podshumok)) +* Add ability to specify settings for `clickhouse-performance-test` from command line. [#4437](https://github.com/ClickHouse/ClickHouse/pull/4437) ([alesapin](https://github.com/alesapin)) +* Add dictionaries tests to integration tests. [#4477](https://github.com/ClickHouse/ClickHouse/pull/4477) ([alesapin](https://github.com/alesapin)) +* Added queries from the benchmark on the website to automated performance tests. [#4496](https://github.com/ClickHouse/ClickHouse/pull/4496) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* `xxhash.h` does not exist in external lz4 because it is an implementation detail and its symbols are namespaced with `XXH_NAMESPACE` macro. When lz4 is external, xxHash has to be external too, and the dependents have to link to it. [#4495](https://github.com/ClickHouse/ClickHouse/pull/4495) ([Orivej Desh](https://github.com/orivej)) +* Fixed a case when `quantileTiming` aggregate function can be called with negative or floating point argument (this fixes fuzz test with undefined behaviour sanitizer). [#4506](https://github.com/ClickHouse/ClickHouse/pull/4506) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Spelling error correction. [#4531](https://github.com/ClickHouse/ClickHouse/pull/4531) ([sdk2](https://github.com/sdk2)) +* Fix compilation on Mac. [#4371](https://github.com/ClickHouse/ClickHouse/pull/4371) ([Vitaly Baranov](https://github.com/vitlibar)) +* Build fixes for FreeBSD and various unusual build configurations. [#4444](https://github.com/ClickHouse/ClickHouse/pull/4444) ([proller](https://github.com/proller)) + +## ClickHouse release 19.3 +### ClickHouse release 19.3.9.1, 2019-04-02 + +#### Bug Fixes + +* Fix crash in `FULL/RIGHT JOIN` when we joining on nullable vs not nullable. [#4855](https://github.com/ClickHouse/ClickHouse/pull/4855) ([Artem Zuikov](https://github.com/4ertus2)) +* Fix segmentation fault in `clickhouse-copier`. [#4835](https://github.com/ClickHouse/ClickHouse/pull/4835) ([proller](https://github.com/proller)) +* Fixed reading from `Array(LowCardinality)` column in rare case when column contained a long sequence of empty arrays. [#4850](https://github.com/ClickHouse/ClickHouse/pull/4850) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) + +#### Build/Testing/Packaging Improvement + +* Add a way to launch clickhouse-server image from a custom user [#4753](https://github.com/ClickHouse/ClickHouse/pull/4753) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) + + +### ClickHouse release 19.3.7, 2019-03-12 + +#### Bug fixes + +* Fixed error in #3920. This error manifests itself as random cache corruption (messages `Unknown codec family code`, `Cannot seek through file`) and segfaults. This bug first appeared in version 19.1 and is present in versions up to 19.1.10 and 19.3.6. [#4623](https://github.com/ClickHouse/ClickHouse/pull/4623) ([alexey-milovidov](https://github.com/alexey-milovidov)) + + +### ClickHouse release 19.3.6, 2019-03-02 + +#### Bug fixes + +* When there are more than 1000 threads in a thread pool, `std::terminate` may happen on thread exit. [Azat Khuzhin](https://github.com/azat) [#4485](https://github.com/ClickHouse/ClickHouse/pull/4485) [#4505](https://github.com/ClickHouse/ClickHouse/pull/4505) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Now it's possible to create `ReplicatedMergeTree*` tables with comments on columns without defaults and tables with columns codecs without comments and defaults. Also fix comparison of codecs. [#4523](https://github.com/ClickHouse/ClickHouse/pull/4523) ([alesapin](https://github.com/alesapin)) +* Fixed crash on JOIN with array or tuple. [#4552](https://github.com/ClickHouse/ClickHouse/pull/4552) ([Artem Zuikov](https://github.com/4ertus2)) +* Fixed crash in clickhouse-copier with the message `ThreadStatus not created`. [#4540](https://github.com/ClickHouse/ClickHouse/pull/4540) ([Artem Zuikov](https://github.com/4ertus2)) +* Fixed hangup on server shutdown if distributed DDLs were used. [#4472](https://github.com/ClickHouse/ClickHouse/pull/4472) ([Alex Zatelepin](https://github.com/ztlpn)) +* Incorrect column numbers were printed in error message about text format parsing for columns with number greater than 10. [#4484](https://github.com/ClickHouse/ClickHouse/pull/4484) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +#### Build/Testing/Packaging Improvements + +* Fixed build with AVX enabled. [#4527](https://github.com/ClickHouse/ClickHouse/pull/4527) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Enable extended accounting and IO accounting based on good known version instead of kernel under which it is compiled. [#4541](https://github.com/ClickHouse/ClickHouse/pull/4541) ([nvartolomei](https://github.com/nvartolomei)) +* Allow to skip setting of core_dump.size_limit, warning instead of throw if limit set fail. [#4473](https://github.com/ClickHouse/ClickHouse/pull/4473) ([proller](https://github.com/proller)) +* Removed the `inline` tags of `void readBinary(...)` in `Field.cpp`. Also merged redundant `namespace DB` blocks. [#4530](https://github.com/ClickHouse/ClickHouse/pull/4530) ([hcz](https://github.com/hczhcz)) + + +### ClickHouse release 19.3.5, 2019-02-21 + +#### Bug fixes +* Fixed bug with large http insert queries processing. [#4454](https://github.com/ClickHouse/ClickHouse/pull/4454) ([alesapin](https://github.com/alesapin)) +* Fixed backward incompatibility with old versions due to wrong implementation of `send_logs_level` setting. [#4445](https://github.com/ClickHouse/ClickHouse/pull/4445) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed backward incompatibility of table function `remote` introduced with column comments. [#4446](https://github.com/ClickHouse/ClickHouse/pull/4446) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +### ClickHouse release 19.3.4, 2019-02-16 + +#### Improvements +* Table index size is not accounted for memory limits when doing `ATTACH TABLE` query. Avoided the possibility that a table cannot be attached after being detached. [#4396](https://github.com/ClickHouse/ClickHouse/pull/4396) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Slightly raised up the limit on max string and array size received from ZooKeeper. It allows to continue to work with increased size of `CLIENT_JVMFLAGS=-Djute.maxbuffer=...` on ZooKeeper. [#4398](https://github.com/ClickHouse/ClickHouse/pull/4398) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Allow to repair abandoned replica even if it already has huge number of nodes in its queue. [#4399](https://github.com/ClickHouse/ClickHouse/pull/4399) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Add one required argument to `SET` index (max stored rows number). [#4386](https://github.com/ClickHouse/ClickHouse/pull/4386) ([Nikita Vasilev](https://github.com/nikvas0)) + +#### Bug Fixes +* Fixed `WITH ROLLUP` result for group by single `LowCardinality` key. [#4384](https://github.com/ClickHouse/ClickHouse/pull/4384) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Fixed bug in the set index (dropping a granule if it contains more than `max_rows` rows). [#4386](https://github.com/ClickHouse/ClickHouse/pull/4386) ([Nikita Vasilev](https://github.com/nikvas0)) +* A lot of FreeBSD build fixes. [#4397](https://github.com/ClickHouse/ClickHouse/pull/4397) ([proller](https://github.com/proller)) +* Fixed aliases substitution in queries with subquery containing same alias (issue [#4110](https://github.com/ClickHouse/ClickHouse/issues/4110)). [#4351](https://github.com/ClickHouse/ClickHouse/pull/4351) ([Artem Zuikov](https://github.com/4ertus2)) + +#### Build/Testing/Packaging Improvements +* Add ability to run `clickhouse-server` for stateless tests in docker image. [#4347](https://github.com/ClickHouse/ClickHouse/pull/4347) ([Vasily Nemkov](https://github.com/Enmk)) + +### ClickHouse release 19.3.3, 2019-02-13 + +#### New Features +* Added the `KILL MUTATION` statement that allows removing mutations that are for some reasons stuck. Added `latest_failed_part`, `latest_fail_time`, `latest_fail_reason` fields to the `system.mutations` table for easier troubleshooting. [#4287](https://github.com/ClickHouse/ClickHouse/pull/4287) ([Alex Zatelepin](https://github.com/ztlpn)) +* Added aggregate function `entropy` which computes Shannon entropy. [#4238](https://github.com/ClickHouse/ClickHouse/pull/4238) ([Quid37](https://github.com/Quid37)) +* Added ability to send queries `INSERT INTO tbl VALUES (....` to server without splitting on `query` and `data` parts. [#4301](https://github.com/ClickHouse/ClickHouse/pull/4301) ([alesapin](https://github.com/alesapin)) +* Generic implementation of `arrayWithConstant` function was added. [#4322](https://github.com/ClickHouse/ClickHouse/pull/4322) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Implemented `NOT BETWEEN` comparison operator. [#4228](https://github.com/ClickHouse/ClickHouse/pull/4228) ([Dmitry Naumov](https://github.com/nezed)) +* Implement `sumMapFiltered` in order to be able to limit the number of keys for which values will be summed by `sumMap`. [#4129](https://github.com/ClickHouse/ClickHouse/pull/4129) ([Léo Ercolanelli](https://github.com/ercolanelli-leo)) +* Added support of `Nullable` types in `mysql` table function. [#4198](https://github.com/ClickHouse/ClickHouse/pull/4198) ([Emmanuel Donin de Rosière](https://github.com/edonin)) +* Support for arbitrary constant expressions in `LIMIT` clause. [#4246](https://github.com/ClickHouse/ClickHouse/pull/4246) ([k3box](https://github.com/k3box)) +* Added `topKWeighted` aggregate function that takes additional argument with (unsigned integer) weight. [#4245](https://github.com/ClickHouse/ClickHouse/pull/4245) ([Andrew Golman](https://github.com/andrewgolman)) +* `StorageJoin` now supports `join_any_take_last_row` setting that allows overwriting existing values of the same key. [#3973](https://github.com/ClickHouse/ClickHouse/pull/3973) ([Amos Bird](https://github.com/amosbird) +* Added function `toStartOfInterval`. [#4304](https://github.com/ClickHouse/ClickHouse/pull/4304) ([Vitaly Baranov](https://github.com/vitlibar)) +* Added `RowBinaryWithNamesAndTypes` format. [#4200](https://github.com/ClickHouse/ClickHouse/pull/4200) ([Oleg V. Kozlyuk](https://github.com/DarkWanderer)) +* Added `IPv4` and `IPv6` data types. More effective implementations of `IPv*` functions. [#3669](https://github.com/ClickHouse/ClickHouse/pull/3669) ([Vasily Nemkov](https://github.com/Enmk)) +* Added function `toStartOfTenMinutes()`. [#4298](https://github.com/ClickHouse/ClickHouse/pull/4298) ([Vitaly Baranov](https://github.com/vitlibar)) +* Added `Protobuf` output format. [#4005](https://github.com/ClickHouse/ClickHouse/pull/4005) [#4158](https://github.com/ClickHouse/ClickHouse/pull/4158) ([Vitaly Baranov](https://github.com/vitlibar)) +* Added brotli support for HTTP interface for data import (INSERTs). [#4235](https://github.com/ClickHouse/ClickHouse/pull/4235) ([Mikhail ](https://github.com/fandyushin)) +* Added hints while user make typo in function name or type in command line client. [#4239](https://github.com/ClickHouse/ClickHouse/pull/4239) ([Danila Kutenin](https://github.com/danlark1)) +* Added `Query-Id` to Server's HTTP Response header. [#4231](https://github.com/ClickHouse/ClickHouse/pull/4231) ([Mikhail ](https://github.com/fandyushin)) + +#### Experimental features +* Added `minmax` and `set` data skipping indices for MergeTree table engines family. [#4143](https://github.com/ClickHouse/ClickHouse/pull/4143) ([Nikita Vasilev](https://github.com/nikvas0)) +* Added conversion of `CROSS JOIN` to `INNER JOIN` if possible. [#4221](https://github.com/ClickHouse/ClickHouse/pull/4221) [#4266](https://github.com/ClickHouse/ClickHouse/pull/4266) ([Artem Zuikov](https://github.com/4ertus2)) + +#### Bug Fixes +* Fixed `Not found column` for duplicate columns in `JOIN ON` section. [#4279](https://github.com/ClickHouse/ClickHouse/pull/4279) ([Artem Zuikov](https://github.com/4ertus2)) +* Make `START REPLICATED SENDS` command start replicated sends. [#4229](https://github.com/ClickHouse/ClickHouse/pull/4229) ([nvartolomei](https://github.com/nvartolomei)) +* Fixed aggregate functions execution with `Array(LowCardinality)` arguments. [#4055](https://github.com/ClickHouse/ClickHouse/pull/4055) ([KochetovNicolai](https://github.com/KochetovNicolai)) +* Fixed wrong behaviour when doing `INSERT ... SELECT ... FROM file(...)` query and file has `CSVWithNames` or `TSVWIthNames` format and the first data row is missing. [#4297](https://github.com/ClickHouse/ClickHouse/pull/4297) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed crash on dictionary reload if dictionary not available. This bug was appeared in 19.1.6. [#4188](https://github.com/ClickHouse/ClickHouse/pull/4188) ([proller](https://github.com/proller)) +* Fixed `ALL JOIN` with duplicates in right table. [#4184](https://github.com/ClickHouse/ClickHouse/pull/4184) ([Artem Zuikov](https://github.com/4ertus2)) +* Fixed segmentation fault with `use_uncompressed_cache=1` and exception with wrong uncompressed size. This bug was appeared in 19.1.6. [#4186](https://github.com/ClickHouse/ClickHouse/pull/4186) ([alesapin](https://github.com/alesapin)) +* Fixed `compile_expressions` bug with comparison of big (more than int16) dates. [#4341](https://github.com/ClickHouse/ClickHouse/pull/4341) ([alesapin](https://github.com/alesapin)) +* Fixed infinite loop when selecting from table function `numbers(0)`. [#4280](https://github.com/ClickHouse/ClickHouse/pull/4280) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Temporarily disable predicate optimization for `ORDER BY`. [#3890](https://github.com/ClickHouse/ClickHouse/pull/3890) ([Winter Zhang](https://github.com/zhang2014)) +* Fixed `Illegal instruction` error when using base64 functions on old CPUs. This error has been reproduced only when ClickHouse was compiled with gcc-8. [#4275](https://github.com/ClickHouse/ClickHouse/pull/4275) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed `No message received` error when interacting with PostgreSQL ODBC Driver through TLS connection. Also fixes segfault when using MySQL ODBC Driver. [#4170](https://github.com/ClickHouse/ClickHouse/pull/4170) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed incorrect result when `Date` and `DateTime` arguments are used in branches of conditional operator (function `if`). Added generic case for function `if`. [#4243](https://github.com/ClickHouse/ClickHouse/pull/4243) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* ClickHouse dictionaries now load within `clickhouse` process. [#4166](https://github.com/ClickHouse/ClickHouse/pull/4166) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed deadlock when `SELECT` from a table with `File` engine was retried after `No such file or directory` error. [#4161](https://github.com/ClickHouse/ClickHouse/pull/4161) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed race condition when selecting from `system.tables` may give `table doesn't exist` error. [#4313](https://github.com/ClickHouse/ClickHouse/pull/4313) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* `clickhouse-client` can segfault on exit while loading data for command line suggestions if it was run in interactive mode. [#4317](https://github.com/ClickHouse/ClickHouse/pull/4317) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed a bug when the execution of mutations containing `IN` operators was producing incorrect results. [#4099](https://github.com/ClickHouse/ClickHouse/pull/4099) ([Alex Zatelepin](https://github.com/ztlpn)) +* Fixed error: if there is a database with `Dictionary` engine, all dictionaries forced to load at server startup, and if there is a dictionary with ClickHouse source from localhost, the dictionary cannot load. [#4255](https://github.com/ClickHouse/ClickHouse/pull/4255) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed error when system logs are tried to create again at server shutdown. [#4254](https://github.com/ClickHouse/ClickHouse/pull/4254) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Correctly return the right type and properly handle locks in `joinGet` function. [#4153](https://github.com/ClickHouse/ClickHouse/pull/4153) ([Amos Bird](https://github.com/amosbird)) +* Added `sumMapWithOverflow` function. [#4151](https://github.com/ClickHouse/ClickHouse/pull/4151) ([Léo Ercolanelli](https://github.com/ercolanelli-leo)) +* Fixed segfault with `allow_experimental_multiple_joins_emulation`. [52de2c](https://github.com/ClickHouse/ClickHouse/commit/52de2cd927f7b5257dd67e175f0a5560a48840d0) ([Artem Zuikov](https://github.com/4ertus2)) +* Fixed bug with incorrect `Date` and `DateTime` comparison. [#4237](https://github.com/ClickHouse/ClickHouse/pull/4237) ([valexey](https://github.com/valexey)) +* Fixed fuzz test under undefined behavior sanitizer: added parameter type check for `quantile*Weighted` family of functions. [#4145](https://github.com/ClickHouse/ClickHouse/pull/4145) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed rare race condition when removing of old data parts can fail with `File not found` error. [#4378](https://github.com/ClickHouse/ClickHouse/pull/4378) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix install package with missing /etc/clickhouse-server/config.xml. [#4343](https://github.com/ClickHouse/ClickHouse/pull/4343) ([proller](https://github.com/proller)) + + +#### Build/Testing/Packaging Improvements +* Debian package: correct /etc/clickhouse-server/preprocessed link according to config. [#4205](https://github.com/ClickHouse/ClickHouse/pull/4205) ([proller](https://github.com/proller)) +* Various build fixes for FreeBSD. [#4225](https://github.com/ClickHouse/ClickHouse/pull/4225) ([proller](https://github.com/proller)) +* Added ability to create, fill and drop tables in perftest. [#4220](https://github.com/ClickHouse/ClickHouse/pull/4220) ([alesapin](https://github.com/alesapin)) +* Added a script to check for duplicate includes. [#4326](https://github.com/ClickHouse/ClickHouse/pull/4326) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Added ability to run queries by index in performance test. [#4264](https://github.com/ClickHouse/ClickHouse/pull/4264) ([alesapin](https://github.com/alesapin)) +* Package with debug symbols is suggested to be installed. [#4274](https://github.com/ClickHouse/ClickHouse/pull/4274) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Refactoring of performance-test. Better logging and signals handling. [#4171](https://github.com/ClickHouse/ClickHouse/pull/4171) ([alesapin](https://github.com/alesapin)) +* Added docs to anonymized Yandex.Metrika datasets. [#4164](https://github.com/ClickHouse/ClickHouse/pull/4164) ([alesapin](https://github.com/alesapin)) +* Аdded tool for converting an old month-partitioned part to the custom-partitioned format. [#4195](https://github.com/ClickHouse/ClickHouse/pull/4195) ([Alex Zatelepin](https://github.com/ztlpn)) +* Added docs about two datasets in s3. [#4144](https://github.com/ClickHouse/ClickHouse/pull/4144) ([alesapin](https://github.com/alesapin)) +* Added script which creates changelog from pull requests description. [#4169](https://github.com/ClickHouse/ClickHouse/pull/4169) [#4173](https://github.com/ClickHouse/ClickHouse/pull/4173) ([KochetovNicolai](https://github.com/KochetovNicolai)) ([KochetovNicolai](https://github.com/KochetovNicolai)) +* Added puppet module for Clickhouse. [#4182](https://github.com/ClickHouse/ClickHouse/pull/4182) ([Maxim Fedotov](https://github.com/MaxFedotov)) +* Added docs for a group of undocumented functions. [#4168](https://github.com/ClickHouse/ClickHouse/pull/4168) ([Winter Zhang](https://github.com/zhang2014)) +* ARM build fixes. [#4210](https://github.com/ClickHouse/ClickHouse/pull/4210)[#4306](https://github.com/ClickHouse/ClickHouse/pull/4306) [#4291](https://github.com/ClickHouse/ClickHouse/pull/4291) ([proller](https://github.com/proller)) ([proller](https://github.com/proller)) +* Dictionary tests now able to run from `ctest`. [#4189](https://github.com/ClickHouse/ClickHouse/pull/4189) ([proller](https://github.com/proller)) +* Now `/etc/ssl` is used as default directory with SSL certificates. [#4167](https://github.com/ClickHouse/ClickHouse/pull/4167) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Added checking SSE and AVX instruction at start. [#4234](https://github.com/ClickHouse/ClickHouse/pull/4234) ([Igr](https://github.com/igron99)) +* Init script will wait server until start. [#4281](https://github.com/ClickHouse/ClickHouse/pull/4281) ([proller](https://github.com/proller)) + +#### Backward Incompatible Changes +* Removed `allow_experimental_low_cardinality_type` setting. `LowCardinality` data types are production ready. [#4323](https://github.com/ClickHouse/ClickHouse/pull/4323) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Reduce mark cache size and uncompressed cache size accordingly to available memory amount. [#4240](https://github.com/ClickHouse/ClickHouse/pull/4240) ([Lopatin Konstantin](https://github.com/k-lopatin) +* Added keyword `INDEX` in `CREATE TABLE` query. A column with name `index` must be quoted with backticks or double quotes: `` `index` ``. [#4143](https://github.com/ClickHouse/ClickHouse/pull/4143) ([Nikita Vasilev](https://github.com/nikvas0)) +* `sumMap` now promote result type instead of overflow. The old `sumMap` behavior can be obtained by using `sumMapWithOverflow` function. [#4151](https://github.com/ClickHouse/ClickHouse/pull/4151) ([Léo Ercolanelli](https://github.com/ercolanelli-leo)) + +#### Performance Improvements +* `std::sort` replaced by `pdqsort` for queries without `LIMIT`. [#4236](https://github.com/ClickHouse/ClickHouse/pull/4236) ([Evgenii Pravda](https://github.com/kvinty)) +* Now server reuse threads from global thread pool. This affects performance in some corner cases. [#4150](https://github.com/ClickHouse/ClickHouse/pull/4150) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +#### Improvements +* Implemented AIO support for FreeBSD. [#4305](https://github.com/ClickHouse/ClickHouse/pull/4305) ([urgordeadbeef](https://github.com/urgordeadbeef)) +* `SELECT * FROM a JOIN b USING a, b` now return `a` and `b` columns only from the left table. [#4141](https://github.com/ClickHouse/ClickHouse/pull/4141) ([Artem Zuikov](https://github.com/4ertus2)) +* Allow `-C` option of client to work as `-c` option. [#4232](https://github.com/ClickHouse/ClickHouse/pull/4232) ([syominsergey](https://github.com/syominsergey)) +* Now option `--password` used without value requires password from stdin. [#4230](https://github.com/ClickHouse/ClickHouse/pull/4230) ([BSD_Conqueror](https://github.com/bsd-conqueror)) +* Added highlighting of unescaped metacharacters in string literals that contain `LIKE` expressions or regexps. [#4327](https://github.com/ClickHouse/ClickHouse/pull/4327) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Added cancelling of HTTP read only queries if client socket goes away. [#4213](https://github.com/ClickHouse/ClickHouse/pull/4213) ([nvartolomei](https://github.com/nvartolomei)) +* Now server reports progress to keep client connections alive. [#4215](https://github.com/ClickHouse/ClickHouse/pull/4215) ([Ivan](https://github.com/abyss7)) +* Slightly better message with reason for OPTIMIZE query with `optimize_throw_if_noop` setting enabled. [#4294](https://github.com/ClickHouse/ClickHouse/pull/4294) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Added support of `--version` option for clickhouse server. [#4251](https://github.com/ClickHouse/ClickHouse/pull/4251) ([Lopatin Konstantin](https://github.com/k-lopatin)) +* Added `--help/-h` option to `clickhouse-server`. [#4233](https://github.com/ClickHouse/ClickHouse/pull/4233) ([Yuriy Baranov](https://github.com/yurriy)) +* Added support for scalar subqueries with aggregate function state result. [#4348](https://github.com/ClickHouse/ClickHouse/pull/4348) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Improved server shutdown time and ALTERs waiting time. [#4372](https://github.com/ClickHouse/ClickHouse/pull/4372) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Added info about the replicated_can_become_leader setting to system.replicas and add logging if the replica won't try to become leader. [#4379](https://github.com/ClickHouse/ClickHouse/pull/4379) ([Alex Zatelepin](https://github.com/ztlpn)) + + +## ClickHouse release 19.1 +### ClickHouse release 19.1.14, 2019-03-14 + +* Fixed error `Column ... queried more than once` that may happen if the setting `asterisk_left_columns_only` is set to 1 in case of using `GLOBAL JOIN` with `SELECT *` (rare case). The issue does not exist in 19.3 and newer. [6bac7d8d](https://github.com/ClickHouse/ClickHouse/pull/4692/commits/6bac7d8d11a9b0d6de0b32b53c47eb2f6f8e7062) ([Artem Zuikov](https://github.com/4ertus2)) + +### ClickHouse release 19.1.13, 2019-03-12 + +This release contains exactly the same set of patches as 19.3.7. + +### ClickHouse release 19.1.10, 2019-03-03 + +This release contains exactly the same set of patches as 19.3.6. + + +## ClickHouse release 19.1 +### ClickHouse release 19.1.9, 2019-02-21 + +#### Bug fixes +* Fixed backward incompatibility with old versions due to wrong implementation of `send_logs_level` setting. [#4445](https://github.com/ClickHouse/ClickHouse/pull/4445) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed backward incompatibility of table function `remote` introduced with column comments. [#4446](https://github.com/ClickHouse/ClickHouse/pull/4446) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +### ClickHouse release 19.1.8, 2019-02-16 + +#### Bug Fixes +* Fix install package with missing /etc/clickhouse-server/config.xml. [#4343](https://github.com/ClickHouse/ClickHouse/pull/4343) ([proller](https://github.com/proller)) + + +## ClickHouse release 19.1 +### ClickHouse release 19.1.7, 2019-02-15 + +#### Bug Fixes +* Correctly return the right type and properly handle locks in `joinGet` function. [#4153](https://github.com/ClickHouse/ClickHouse/pull/4153) ([Amos Bird](https://github.com/amosbird)) +* Fixed error when system logs are tried to create again at server shutdown. [#4254](https://github.com/ClickHouse/ClickHouse/pull/4254) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed error: if there is a database with `Dictionary` engine, all dictionaries forced to load at server startup, and if there is a dictionary with ClickHouse source from localhost, the dictionary cannot load. [#4255](https://github.com/ClickHouse/ClickHouse/pull/4255) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed a bug when the execution of mutations containing `IN` operators was producing incorrect results. [#4099](https://github.com/ClickHouse/ClickHouse/pull/4099) ([Alex Zatelepin](https://github.com/ztlpn)) +* `clickhouse-client` can segfault on exit while loading data for command line suggestions if it was run in interactive mode. [#4317](https://github.com/ClickHouse/ClickHouse/pull/4317) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed race condition when selecting from `system.tables` may give `table doesn't exist` error. [#4313](https://github.com/ClickHouse/ClickHouse/pull/4313) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed deadlock when `SELECT` from a table with `File` engine was retried after `No such file or directory` error. [#4161](https://github.com/ClickHouse/ClickHouse/pull/4161) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed an issue: local ClickHouse dictionaries are loaded via TCP, but should load within process. [#4166](https://github.com/ClickHouse/ClickHouse/pull/4166) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed `No message received` error when interacting with PostgreSQL ODBC Driver through TLS connection. Also fixes segfault when using MySQL ODBC Driver. [#4170](https://github.com/ClickHouse/ClickHouse/pull/4170) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Temporarily disable predicate optimization for `ORDER BY`. [#3890](https://github.com/ClickHouse/ClickHouse/pull/3890) ([Winter Zhang](https://github.com/zhang2014)) +* Fixed infinite loop when selecting from table function `numbers(0)`. [#4280](https://github.com/ClickHouse/ClickHouse/pull/4280) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed `compile_expressions` bug with comparison of big (more than int16) dates. [#4341](https://github.com/ClickHouse/ClickHouse/pull/4341) ([alesapin](https://github.com/alesapin)) +* Fixed segmentation fault with `uncompressed_cache=1` and exception with wrong uncompressed size. [#4186](https://github.com/ClickHouse/ClickHouse/pull/4186) ([alesapin](https://github.com/alesapin)) +* Fixed `ALL JOIN` with duplicates in right table. [#4184](https://github.com/ClickHouse/ClickHouse/pull/4184) ([Artem Zuikov](https://github.com/4ertus2)) +* Fixed wrong behaviour when doing `INSERT ... SELECT ... FROM file(...)` query and file has `CSVWithNames` or `TSVWIthNames` format and the first data row is missing. [#4297](https://github.com/ClickHouse/ClickHouse/pull/4297) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed aggregate functions execution with `Array(LowCardinality)` arguments. [#4055](https://github.com/ClickHouse/ClickHouse/pull/4055) ([KochetovNicolai](https://github.com/KochetovNicolai)) +* Debian package: correct /etc/clickhouse-server/preprocessed link according to config. [#4205](https://github.com/ClickHouse/ClickHouse/pull/4205) ([proller](https://github.com/proller)) +* Fixed fuzz test under undefined behavior sanitizer: added parameter type check for `quantile*Weighted` family of functions. [#4145](https://github.com/ClickHouse/ClickHouse/pull/4145) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Make `START REPLICATED SENDS` command start replicated sends. [#4229](https://github.com/ClickHouse/ClickHouse/pull/4229) ([nvartolomei](https://github.com/nvartolomei)) +* Fixed `Not found column` for duplicate columns in JOIN ON section. [#4279](https://github.com/ClickHouse/ClickHouse/pull/4279) ([Artem Zuikov](https://github.com/4ertus2)) +* Now `/etc/ssl` is used as default directory with SSL certificates. [#4167](https://github.com/ClickHouse/ClickHouse/pull/4167) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed crash on dictionary reload if dictionary not available. [#4188](https://github.com/ClickHouse/ClickHouse/pull/4188) ([proller](https://github.com/proller)) +* Fixed bug with incorrect `Date` and `DateTime` comparison. [#4237](https://github.com/ClickHouse/ClickHouse/pull/4237) ([valexey](https://github.com/valexey)) +* Fixed incorrect result when `Date` and `DateTime` arguments are used in branches of conditional operator (function `if`). Added generic case for function `if`. [#4243](https://github.com/ClickHouse/ClickHouse/pull/4243) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +### ClickHouse release 19.1.6, 2019-01-24 + +#### New Features + +* Custom per column compression codecs for tables. [#3899](https://github.com/ClickHouse/ClickHouse/pull/3899) [#4111](https://github.com/ClickHouse/ClickHouse/pull/4111) ([alesapin](https://github.com/alesapin), [Winter Zhang](https://github.com/zhang2014), [Anatoly](https://github.com/Sindbag)) +* Added compression codec `Delta`. [#4052](https://github.com/ClickHouse/ClickHouse/pull/4052) ([alesapin](https://github.com/alesapin)) +* Allow to `ALTER` compression codecs. [#4054](https://github.com/ClickHouse/ClickHouse/pull/4054) ([alesapin](https://github.com/alesapin)) +* Added functions `left`, `right`, `trim`, `ltrim`, `rtrim`, `timestampadd`, `timestampsub` for SQL standard compatibility. [#3826](https://github.com/ClickHouse/ClickHouse/pull/3826) ([Ivan Blinkov](https://github.com/blinkov)) +* Support for write in `HDFS` tables and `hdfs` table function. [#4084](https://github.com/ClickHouse/ClickHouse/pull/4084) ([alesapin](https://github.com/alesapin)) +* Added functions to search for multiple constant strings from big haystack: `multiPosition`, `multiSearch` ,`firstMatch` also with `-UTF8`, `-CaseInsensitive`, and `-CaseInsensitiveUTF8` variants. [#4053](https://github.com/ClickHouse/ClickHouse/pull/4053) ([Danila Kutenin](https://github.com/danlark1)) +* Pruning of unused shards if `SELECT` query filters by sharding key (setting `optimize_skip_unused_shards`). [#3851](https://github.com/ClickHouse/ClickHouse/pull/3851) ([Gleb Kanterov](https://github.com/kanterov), [Ivan](https://github.com/abyss7)) +* Allow `Kafka` engine to ignore some number of parsing errors per block. [#4094](https://github.com/ClickHouse/ClickHouse/pull/4094) ([Ivan](https://github.com/abyss7)) +* Added support for `CatBoost` multiclass models evaluation. Function `modelEvaluate` returns tuple with per-class raw predictions for multiclass models. `libcatboostmodel.so` should be built with [#607](https://github.com/catboost/catboost/pull/607). [#3959](https://github.com/ClickHouse/ClickHouse/pull/3959) ([KochetovNicolai](https://github.com/KochetovNicolai)) +* Added functions `filesystemAvailable`, `filesystemFree`, `filesystemCapacity`. [#4097](https://github.com/ClickHouse/ClickHouse/pull/4097) ([Boris Granveaud](https://github.com/bgranvea)) +* Added hashing functions `xxHash64` and `xxHash32`. [#3905](https://github.com/ClickHouse/ClickHouse/pull/3905) ([filimonov](https://github.com/filimonov)) +* Added `gccMurmurHash` hashing function (GCC flavoured Murmur hash) which uses the same hash seed as [gcc](https://github.com/gcc-mirror/gcc/blob/41d6b10e96a1de98e90a7c0378437c3255814b16/libstdc%2B%2B-v3/include/bits/functional_hash.h#L191) [#4000](https://github.com/ClickHouse/ClickHouse/pull/4000) ([sundyli](https://github.com/sundy-li)) +* Added hashing functions `javaHash`, `hiveHash`. [#3811](https://github.com/ClickHouse/ClickHouse/pull/3811) ([shangshujie365](https://github.com/shangshujie365)) +* Added table function `remoteSecure`. Function works as `remote`, but uses secure connection. [#4088](https://github.com/ClickHouse/ClickHouse/pull/4088) ([proller](https://github.com/proller)) + + +#### Experimental features + +* Added multiple JOINs emulation (`allow_experimental_multiple_joins_emulation` setting). [#3946](https://github.com/ClickHouse/ClickHouse/pull/3946) ([Artem Zuikov](https://github.com/4ertus2)) + + +#### Bug Fixes + +* Make `compiled_expression_cache_size` setting limited by default to lower memory consumption. [#4041](https://github.com/ClickHouse/ClickHouse/pull/4041) ([alesapin](https://github.com/alesapin)) +* Fix a bug that led to hangups in threads that perform ALTERs of Replicated tables and in the thread that updates configuration from ZooKeeper. [#2947](https://github.com/ClickHouse/ClickHouse/issues/2947) [#3891](https://github.com/ClickHouse/ClickHouse/issues/3891) [#3934](https://github.com/ClickHouse/ClickHouse/pull/3934) ([Alex Zatelepin](https://github.com/ztlpn)) +* Fixed a race condition when executing a distributed ALTER task. The race condition led to more than one replica trying to execute the task and all replicas except one failing with a ZooKeeper error. [#3904](https://github.com/ClickHouse/ClickHouse/pull/3904) ([Alex Zatelepin](https://github.com/ztlpn)) +* Fix a bug when `from_zk` config elements weren't refreshed after a request to ZooKeeper timed out. [#2947](https://github.com/ClickHouse/ClickHouse/issues/2947) [#3947](https://github.com/ClickHouse/ClickHouse/pull/3947) ([Alex Zatelepin](https://github.com/ztlpn)) +* Fix bug with wrong prefix for IPv4 subnet masks. [#3945](https://github.com/ClickHouse/ClickHouse/pull/3945) ([alesapin](https://github.com/alesapin)) +* Fixed crash (`std::terminate`) in rare cases when a new thread cannot be created due to exhausted resources. [#3956](https://github.com/ClickHouse/ClickHouse/pull/3956) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix bug when in `remote` table function execution when wrong restrictions were used for in `getStructureOfRemoteTable`. [#4009](https://github.com/ClickHouse/ClickHouse/pull/4009) ([alesapin](https://github.com/alesapin)) +* Fix a leak of netlink sockets. They were placed in a pool where they were never deleted and new sockets were created at the start of a new thread when all current sockets were in use. [#4017](https://github.com/ClickHouse/ClickHouse/pull/4017) ([Alex Zatelepin](https://github.com/ztlpn)) +* Fix bug with closing `/proc/self/fd` directory earlier than all fds were read from `/proc` after forking `odbc-bridge` subprocess. [#4120](https://github.com/ClickHouse/ClickHouse/pull/4120) ([alesapin](https://github.com/alesapin)) +* Fixed String to UInt monotonic conversion in case of usage String in primary key. [#3870](https://github.com/ClickHouse/ClickHouse/pull/3870) ([Winter Zhang](https://github.com/zhang2014)) +* Fixed error in calculation of integer conversion function monotonicity. [#3921](https://github.com/ClickHouse/ClickHouse/pull/3921) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed segfault in `arrayEnumerateUniq`, `arrayEnumerateDense` functions in case of some invalid arguments. [#3909](https://github.com/ClickHouse/ClickHouse/pull/3909) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix UB in StorageMerge. [#3910](https://github.com/ClickHouse/ClickHouse/pull/3910) ([Amos Bird](https://github.com/amosbird)) +* Fixed segfault in functions `addDays`, `subtractDays`. [#3913](https://github.com/ClickHouse/ClickHouse/pull/3913) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed error: functions `round`, `floor`, `trunc`, `ceil` may return bogus result when executed on integer argument and large negative scale. [#3914](https://github.com/ClickHouse/ClickHouse/pull/3914) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed a bug induced by 'kill query sync' which leads to a core dump. [#3916](https://github.com/ClickHouse/ClickHouse/pull/3916) ([muVulDeePecker](https://github.com/fancyqlx)) +* Fix bug with long delay after empty replication queue. [#3928](https://github.com/ClickHouse/ClickHouse/pull/3928) [#3932](https://github.com/ClickHouse/ClickHouse/pull/3932) ([alesapin](https://github.com/alesapin)) +* Fixed excessive memory usage in case of inserting into table with `LowCardinality` primary key. [#3955](https://github.com/ClickHouse/ClickHouse/pull/3955) ([KochetovNicolai](https://github.com/KochetovNicolai)) +* Fixed `LowCardinality` serialization for `Native` format in case of empty arrays. [#3907](https://github.com/ClickHouse/ClickHouse/issues/3907) [#4011](https://github.com/ClickHouse/ClickHouse/pull/4011) ([KochetovNicolai](https://github.com/KochetovNicolai)) +* Fixed incorrect result while using distinct by single LowCardinality numeric column. [#3895](https://github.com/ClickHouse/ClickHouse/issues/3895) [#4012](https://github.com/ClickHouse/ClickHouse/pull/4012) ([KochetovNicolai](https://github.com/KochetovNicolai)) +* Fixed specialized aggregation with LowCardinality key (in case when `compile` setting is enabled). [#3886](https://github.com/ClickHouse/ClickHouse/pull/3886) ([KochetovNicolai](https://github.com/KochetovNicolai)) +* Fix user and password forwarding for replicated tables queries. [#3957](https://github.com/ClickHouse/ClickHouse/pull/3957) ([alesapin](https://github.com/alesapin)) ([小路](https://github.com/nicelulu)) +* Fixed very rare race condition that can happen when listing tables in Dictionary database while reloading dictionaries. [#3970](https://github.com/ClickHouse/ClickHouse/pull/3970) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed incorrect result when HAVING was used with ROLLUP or CUBE. [#3756](https://github.com/ClickHouse/ClickHouse/issues/3756) [#3837](https://github.com/ClickHouse/ClickHouse/pull/3837) ([Sam Chou](https://github.com/reflection)) +* Fixed column aliases for query with `JOIN ON` syntax and distributed tables. [#3980](https://github.com/ClickHouse/ClickHouse/pull/3980) ([Winter Zhang](https://github.com/zhang2014)) +* Fixed error in internal implementation of `quantileTDigest` (found by Artem Vakhrushev). This error never happens in ClickHouse and was relevant only for those who use ClickHouse codebase as a library directly. [#3935](https://github.com/ClickHouse/ClickHouse/pull/3935) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +#### Improvements + +* Support for `IF NOT EXISTS` in `ALTER TABLE ADD COLUMN` statements along with `IF EXISTS` in `DROP/MODIFY/CLEAR/COMMENT COLUMN`. [#3900](https://github.com/ClickHouse/ClickHouse/pull/3900) ([Boris Granveaud](https://github.com/bgranvea)) +* Function `parseDateTimeBestEffort`: support for formats `DD.MM.YYYY`, `DD.MM.YY`, `DD-MM-YYYY`, `DD-Mon-YYYY`, `DD/Month/YYYY` and similar. [#3922](https://github.com/ClickHouse/ClickHouse/pull/3922) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* `CapnProtoInputStream` now support jagged structures. [#4063](https://github.com/ClickHouse/ClickHouse/pull/4063) ([Odin Hultgren Van Der Horst](https://github.com/Miniwoffer)) +* Usability improvement: added a check that server process is started from the data directory's owner. Do not allow to start server from root if the data belongs to non-root user. [#3785](https://github.com/ClickHouse/ClickHouse/pull/3785) ([sergey-v-galtsev](https://github.com/sergey-v-galtsev)) +* Better logic of checking required columns during analysis of queries with JOINs. [#3930](https://github.com/ClickHouse/ClickHouse/pull/3930) ([Artem Zuikov](https://github.com/4ertus2)) +* Decreased the number of connections in case of large number of Distributed tables in a single server. [#3726](https://github.com/ClickHouse/ClickHouse/pull/3726) ([Winter Zhang](https://github.com/zhang2014)) +* Supported totals row for `WITH TOTALS` query for ODBC driver. [#3836](https://github.com/ClickHouse/ClickHouse/pull/3836) ([Maksim Koritckiy](https://github.com/nightweb)) +* Allowed to use `Enum`s as integers inside if function. [#3875](https://github.com/ClickHouse/ClickHouse/pull/3875) ([Ivan](https://github.com/abyss7)) +* Added `low_cardinality_allow_in_native_format` setting. If disabled, do not use `LowCadrinality` type in `Native` format. [#3879](https://github.com/ClickHouse/ClickHouse/pull/3879) ([KochetovNicolai](https://github.com/KochetovNicolai)) +* Removed some redundant objects from compiled expressions cache to lower memory usage. [#4042](https://github.com/ClickHouse/ClickHouse/pull/4042) ([alesapin](https://github.com/alesapin)) +* Add check that `SET send_logs_level = 'value'` query accept appropriate value. [#3873](https://github.com/ClickHouse/ClickHouse/pull/3873) ([Sabyanin Maxim](https://github.com/s-mx)) +* Fixed data type check in type conversion functions. [#3896](https://github.com/ClickHouse/ClickHouse/pull/3896) ([Winter Zhang](https://github.com/zhang2014)) + +#### Performance Improvements + +* Add a MergeTree setting `use_minimalistic_part_header_in_zookeeper`. If enabled, Replicated tables will store compact part metadata in a single part znode. This can dramatically reduce ZooKeeper snapshot size (especially if the tables have a lot of columns). Note that after enabling this setting you will not be able to downgrade to a version that doesn't support it. [#3960](https://github.com/ClickHouse/ClickHouse/pull/3960) ([Alex Zatelepin](https://github.com/ztlpn)) +* Add an DFA-based implementation for functions `sequenceMatch` and `sequenceCount` in case pattern doesn't contain time. [#4004](https://github.com/ClickHouse/ClickHouse/pull/4004) ([Léo Ercolanelli](https://github.com/ercolanelli-leo)) +* Performance improvement for integer numbers serialization. [#3968](https://github.com/ClickHouse/ClickHouse/pull/3968) ([Amos Bird](https://github.com/amosbird)) +* Zero left padding PODArray so that -1 element is always valid and zeroed. It's used for branchless calculation of offsets. [#3920](https://github.com/ClickHouse/ClickHouse/pull/3920) ([Amos Bird](https://github.com/amosbird)) +* Reverted `jemalloc` version which lead to performance degradation. [#4018](https://github.com/ClickHouse/ClickHouse/pull/4018) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +#### Backward Incompatible Changes + +* Removed undocumented feature `ALTER MODIFY PRIMARY KEY` because it was superseded by the `ALTER MODIFY ORDER BY` command. [#3887](https://github.com/ClickHouse/ClickHouse/pull/3887) ([Alex Zatelepin](https://github.com/ztlpn)) +* Removed function `shardByHash`. [#3833](https://github.com/ClickHouse/ClickHouse/pull/3833) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Forbid using scalar subqueries with result of type `AggregateFunction`. [#3865](https://github.com/ClickHouse/ClickHouse/pull/3865) ([Ivan](https://github.com/abyss7)) + +#### Build/Testing/Packaging Improvements + +* Added support for PowerPC (`ppc64le`) build. [#4132](https://github.com/ClickHouse/ClickHouse/pull/4132) ([Danila Kutenin](https://github.com/danlark1)) +* Stateful functional tests are run on public available dataset. [#3969](https://github.com/ClickHouse/ClickHouse/pull/3969) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed error when the server cannot start with the `bash: /usr/bin/clickhouse-extract-from-config: Operation not permitted` message within Docker or systemd-nspawn. [#4136](https://github.com/ClickHouse/ClickHouse/pull/4136) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Updated `rdkafka` library to v1.0.0-RC5. Used cppkafka instead of raw C interface. [#4025](https://github.com/ClickHouse/ClickHouse/pull/4025) ([Ivan](https://github.com/abyss7)) +* Updated `mariadb-client` library. Fixed one of issues found by UBSan. [#3924](https://github.com/ClickHouse/ClickHouse/pull/3924) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Some fixes for UBSan builds. [#3926](https://github.com/ClickHouse/ClickHouse/pull/3926) [#3021](https://github.com/ClickHouse/ClickHouse/pull/3021) [#3948](https://github.com/ClickHouse/ClickHouse/pull/3948) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Added per-commit runs of tests with UBSan build. +* Added per-commit runs of PVS-Studio static analyzer. +* Fixed bugs found by PVS-Studio. [#4013](https://github.com/ClickHouse/ClickHouse/pull/4013) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed glibc compatibility issues. [#4100](https://github.com/ClickHouse/ClickHouse/pull/4100) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Move Docker images to 18.10 and add compatibility file for glibc >= 2.28 [#3965](https://github.com/ClickHouse/ClickHouse/pull/3965) ([alesapin](https://github.com/alesapin)) +* Add env variable if user don't want to chown directories in server Docker image. [#3967](https://github.com/ClickHouse/ClickHouse/pull/3967) ([alesapin](https://github.com/alesapin)) +* Enabled most of the warnings from `-Weverything` in clang. Enabled `-Wpedantic`. [#3986](https://github.com/ClickHouse/ClickHouse/pull/3986) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Added a few more warnings that are available only in clang 8. [#3993](https://github.com/ClickHouse/ClickHouse/pull/3993) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Link to `libLLVM` rather than to individual LLVM libs when using shared linking. [#3989](https://github.com/ClickHouse/ClickHouse/pull/3989) ([Orivej Desh](https://github.com/orivej)) +* Added sanitizer variables for test images. [#4072](https://github.com/ClickHouse/ClickHouse/pull/4072) ([alesapin](https://github.com/alesapin)) +* `clickhouse-server` debian package will recommend `libcap2-bin` package to use `setcap` tool for setting capabilities. This is optional. [#4093](https://github.com/ClickHouse/ClickHouse/pull/4093) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Improved compilation time, fixed includes. [#3898](https://github.com/ClickHouse/ClickHouse/pull/3898) ([proller](https://github.com/proller)) +* Added performance tests for hash functions. [#3918](https://github.com/ClickHouse/ClickHouse/pull/3918) ([filimonov](https://github.com/filimonov)) +* Fixed cyclic library dependences. [#3958](https://github.com/ClickHouse/ClickHouse/pull/3958) ([proller](https://github.com/proller)) +* Improved compilation with low available memory. [#4030](https://github.com/ClickHouse/ClickHouse/pull/4030) ([proller](https://github.com/proller)) +* Added test script to reproduce performance degradation in `jemalloc`. [#4036](https://github.com/ClickHouse/ClickHouse/pull/4036) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed misspells in comments and string literals under `dbms`. [#4122](https://github.com/ClickHouse/ClickHouse/pull/4122) ([maiha](https://github.com/maiha)) +* Fixed typos in comments. [#4089](https://github.com/ClickHouse/ClickHouse/pull/4089) ([Evgenii Pravda](https://github.com/kvinty)) From fea69f4ed3b9da906ff78a666de036d463aac0c2 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 17 Mar 2020 20:22:21 +0300 Subject: [PATCH 035/115] Create CHANGELOG_2018.md --- CHANGELOG_2018.md | 1046 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1046 insertions(+) create mode 100644 CHANGELOG_2018.md diff --git a/CHANGELOG_2018.md b/CHANGELOG_2018.md new file mode 100644 index 00000000000..948c161b745 --- /dev/null +++ b/CHANGELOG_2018.md @@ -0,0 +1,1046 @@ + + +## ClickHouse release 18.16 +### ClickHouse release 18.16.1, 2018-12-21 + +#### Bug fixes: + +* Fixed an error that led to problems with updating dictionaries with the ODBC source. [#3825](https://github.com/ClickHouse/ClickHouse/issues/3825), [#3829](https://github.com/ClickHouse/ClickHouse/issues/3829) +* JIT compilation of aggregate functions now works with LowCardinality columns. [#3838](https://github.com/ClickHouse/ClickHouse/issues/3838) + +#### Improvements: + +* Added the `low_cardinality_allow_in_native_format` setting (enabled by default). When disabled, LowCardinality columns will be converted to ordinary columns for SELECT queries and ordinary columns will be expected for INSERT queries. [#3879](https://github.com/ClickHouse/ClickHouse/pull/3879) + +#### Build improvements: + +* Fixes for builds on macOS and ARM. + +### ClickHouse release 18.16.0, 2018-12-14 + +#### New features: + +* `DEFAULT` expressions are evaluated for missing fields when loading data in semi-structured input formats (`JSONEachRow`, `TSKV`). The feature is enabled with the `insert_sample_with_metadata` setting. [#3555](https://github.com/ClickHouse/ClickHouse/pull/3555) +* The `ALTER TABLE` query now has the `MODIFY ORDER BY` action for changing the sorting key when adding or removing a table column. This is useful for tables in the `MergeTree` family that perform additional tasks when merging based on this sorting key, such as `SummingMergeTree`, `AggregatingMergeTree`, and so on. [#3581](https://github.com/ClickHouse/ClickHouse/pull/3581) [#3755](https://github.com/ClickHouse/ClickHouse/pull/3755) +* For tables in the `MergeTree` family, now you can specify a different sorting key (`ORDER BY`) and index (`PRIMARY KEY`). The sorting key can be longer than the index. [#3581](https://github.com/ClickHouse/ClickHouse/pull/3581) +* Added the `hdfs` table function and the `HDFS` table engine for importing and exporting data to HDFS. [chenxing-xc](https://github.com/ClickHouse/ClickHouse/pull/3617) +* Added functions for working with base64: `base64Encode`, `base64Decode`, `tryBase64Decode`. [Alexander Krasheninnikov](https://github.com/ClickHouse/ClickHouse/pull/3350) +* Now you can use a parameter to configure the precision of the `uniqCombined` aggregate function (select the number of HyperLogLog cells). [#3406](https://github.com/ClickHouse/ClickHouse/pull/3406) +* Added the `system.contributors` table that contains the names of everyone who made commits in ClickHouse. [#3452](https://github.com/ClickHouse/ClickHouse/pull/3452) +* Added the ability to omit the partition for the `ALTER TABLE ... FREEZE` query in order to back up all partitions at once. [#3514](https://github.com/ClickHouse/ClickHouse/pull/3514) +* Added `dictGet` and `dictGetOrDefault` functions that don't require specifying the type of return value. The type is determined automatically from the dictionary description. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3564) +* Now you can specify comments for a column in the table description and change it using `ALTER`. [#3377](https://github.com/ClickHouse/ClickHouse/pull/3377) +* Reading is supported for `Join` type tables with simple keys. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3728) +* Now you can specify the options `join_use_nulls`, `max_rows_in_join`, `max_bytes_in_join`, and `join_overflow_mode` when creating a `Join` type table. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3728) +* Added the `joinGet` function that allows you to use a `Join` type table like a dictionary. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3728) +* Added the `partition_key`, `sorting_key`, `primary_key`, and `sampling_key` columns to the `system.tables` table in order to provide information about table keys. [#3609](https://github.com/ClickHouse/ClickHouse/pull/3609) +* Added the `is_in_partition_key`, `is_in_sorting_key`, `is_in_primary_key`, and `is_in_sampling_key` columns to the `system.columns` table. [#3609](https://github.com/ClickHouse/ClickHouse/pull/3609) +* Added the `min_time` and `max_time` columns to the `system.parts` table. These columns are populated when the partitioning key is an expression consisting of `DateTime` columns. [Emmanuel Donin de Rosière](https://github.com/ClickHouse/ClickHouse/pull/3800) + +#### Bug fixes: + +* Fixes and performance improvements for the `LowCardinality` data type. `GROUP BY` using `LowCardinality(Nullable(...))`. Getting the values of `extremes`. Processing high-order functions. `LEFT ARRAY JOIN`. Distributed `GROUP BY`. Functions that return `Array`. Execution of `ORDER BY`. Writing to `Distributed` tables (nicelulu). Backward compatibility for `INSERT` queries from old clients that implement the `Native` protocol. Support for `LowCardinality` for `JOIN`. Improved performance when working in a single stream. [#3823](https://github.com/ClickHouse/ClickHouse/pull/3823) [#3803](https://github.com/ClickHouse/ClickHouse/pull/3803) [#3799](https://github.com/ClickHouse/ClickHouse/pull/3799) [#3769](https://github.com/ClickHouse/ClickHouse/pull/3769) [#3744](https://github.com/ClickHouse/ClickHouse/pull/3744) [#3681](https://github.com/ClickHouse/ClickHouse/pull/3681) [#3651](https://github.com/ClickHouse/ClickHouse/pull/3651) [#3649](https://github.com/ClickHouse/ClickHouse/pull/3649) [#3641](https://github.com/ClickHouse/ClickHouse/pull/3641) [#3632](https://github.com/ClickHouse/ClickHouse/pull/3632) [#3568](https://github.com/ClickHouse/ClickHouse/pull/3568) [#3523](https://github.com/ClickHouse/ClickHouse/pull/3523) [#3518](https://github.com/ClickHouse/ClickHouse/pull/3518) +* Fixed how the `select_sequential_consistency` option works. Previously, when this setting was enabled, an incomplete result was sometimes returned after beginning to write to a new partition. [#2863](https://github.com/ClickHouse/ClickHouse/pull/2863) +* Databases are correctly specified when executing DDL `ON CLUSTER` queries and `ALTER UPDATE/DELETE`. [#3772](https://github.com/ClickHouse/ClickHouse/pull/3772) [#3460](https://github.com/ClickHouse/ClickHouse/pull/3460) +* Databases are correctly specified for subqueries inside a VIEW. [#3521](https://github.com/ClickHouse/ClickHouse/pull/3521) +* Fixed a bug in `PREWHERE` with `FINAL` for `VersionedCollapsingMergeTree`. [7167bfd7](https://github.com/ClickHouse/ClickHouse/commit/7167bfd7b365538f7a91c4307ad77e552ab4e8c1) +* Now you can use `KILL QUERY` to cancel queries that have not started yet because they are waiting for the table to be locked. [#3517](https://github.com/ClickHouse/ClickHouse/pull/3517) +* Corrected date and time calculations if the clocks were moved back at midnight (this happens in Iran, and happened in Moscow from 1981 to 1983). Previously, this led to the time being reset a day earlier than necessary, and also caused incorrect formatting of the date and time in text format. [#3819](https://github.com/ClickHouse/ClickHouse/pull/3819) +* Fixed bugs in some cases of `VIEW` and subqueries that omit the database. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3521) +* Fixed a race condition when simultaneously reading from a `MATERIALIZED VIEW` and deleting a `MATERIALIZED VIEW` due to not locking the internal `MATERIALIZED VIEW`. [#3404](https://github.com/ClickHouse/ClickHouse/pull/3404) [#3694](https://github.com/ClickHouse/ClickHouse/pull/3694) +* Fixed the error `Lock handler cannot be nullptr.` [#3689](https://github.com/ClickHouse/ClickHouse/pull/3689) +* Fixed query processing when the `compile_expressions` option is enabled (it's enabled by default). Nondeterministic constant expressions like the `now` function are no longer unfolded. [#3457](https://github.com/ClickHouse/ClickHouse/pull/3457) +* Fixed a crash when specifying a non-constant scale argument in `toDecimal32/64/128` functions. +* Fixed an error when trying to insert an array with `NULL` elements in the `Values` format into a column of type `Array` without `Nullable` (if `input_format_values_interpret_expressions` = 1). [#3487](https://github.com/ClickHouse/ClickHouse/pull/3487) [#3503](https://github.com/ClickHouse/ClickHouse/pull/3503) +* Fixed continuous error logging in `DDLWorker` if ZooKeeper is not available. [8f50c620](https://github.com/ClickHouse/ClickHouse/commit/8f50c620334988b28018213ec0092fe6423847e2) +* Fixed the return type for `quantile*` functions from `Date` and `DateTime` types of arguments. [#3580](https://github.com/ClickHouse/ClickHouse/pull/3580) +* Fixed the `WITH` clause if it specifies a simple alias without expressions. [#3570](https://github.com/ClickHouse/ClickHouse/pull/3570) +* Fixed processing of queries with named sub-queries and qualified column names when `enable_optimize_predicate_expression` is enabled. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3588) +* Fixed the error `Attempt to attach to nullptr thread group` when working with materialized views. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3623) +* Fixed a crash when passing certain incorrect arguments to the `arrayReverse` function. [73e3a7b6](https://github.com/ClickHouse/ClickHouse/commit/73e3a7b662161d6005e7727d8a711b930386b871) +* Fixed the buffer overflow in the `extractURLParameter` function. Improved performance. Added correct processing of strings containing zero bytes. [141e9799](https://github.com/ClickHouse/ClickHouse/commit/141e9799e49201d84ea8e951d1bed4fb6d3dacb5) +* Fixed buffer overflow in the `lowerUTF8` and `upperUTF8` functions. Removed the ability to execute these functions over `FixedString` type arguments. [#3662](https://github.com/ClickHouse/ClickHouse/pull/3662) +* Fixed a rare race condition when deleting `MergeTree` tables. [#3680](https://github.com/ClickHouse/ClickHouse/pull/3680) +* Fixed a race condition when reading from `Buffer` tables and simultaneously performing `ALTER` or `DROP` on the target tables. [#3719](https://github.com/ClickHouse/ClickHouse/pull/3719) +* Fixed a segfault if the `max_temporary_non_const_columns` limit was exceeded. [#3788](https://github.com/ClickHouse/ClickHouse/pull/3788) + +#### Improvements: + +* The server does not write the processed configuration files to the `/etc/clickhouse-server/` directory. Instead, it saves them in the `preprocessed_configs` directory inside `path`. This means that the `/etc/clickhouse-server/` directory doesn't have write access for the `clickhouse` user, which improves security. [#2443](https://github.com/ClickHouse/ClickHouse/pull/2443) +* The `min_merge_bytes_to_use_direct_io` option is set to 10 GiB by default. A merge that forms large parts of tables from the MergeTree family will be performed in `O_DIRECT` mode, which prevents excessive page cache eviction. [#3504](https://github.com/ClickHouse/ClickHouse/pull/3504) +* Accelerated server start when there is a very large number of tables. [#3398](https://github.com/ClickHouse/ClickHouse/pull/3398) +* Added a connection pool and HTTP `Keep-Alive` for connections between replicas. [#3594](https://github.com/ClickHouse/ClickHouse/pull/3594) +* If the query syntax is invalid, the `400 Bad Request` code is returned in the `HTTP` interface (500 was returned previously). [31bc680a](https://github.com/ClickHouse/ClickHouse/commit/31bc680ac5f4bb1d0360a8ba4696fa84bb47d6ab) +* The `join_default_strictness` option is set to `ALL` by default for compatibility. [120e2cbe](https://github.com/ClickHouse/ClickHouse/commit/120e2cbe2ff4fbad626c28042d9b28781c805afe) +* Removed logging to `stderr` from the `re2` library for invalid or complex regular expressions. [#3723](https://github.com/ClickHouse/ClickHouse/pull/3723) +* Added for the `Kafka` table engine: checks for subscriptions before beginning to read from Kafka; the kafka_max_block_size setting for the table. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3396) +* The `cityHash64`, `farmHash64`, `metroHash64`, `sipHash64`, `halfMD5`, `murmurHash2_32`, `murmurHash2_64`, `murmurHash3_32`, and `murmurHash3_64` functions now work for any number of arguments and for arguments in the form of tuples. [#3451](https://github.com/ClickHouse/ClickHouse/pull/3451) [#3519](https://github.com/ClickHouse/ClickHouse/pull/3519) +* The `arrayReverse` function now works with any types of arrays. [73e3a7b6](https://github.com/ClickHouse/ClickHouse/commit/73e3a7b662161d6005e7727d8a711b930386b871) +* Added an optional parameter: the slot size for the `timeSlots` function. [Kirill Shvakov](https://github.com/ClickHouse/ClickHouse/pull/3724) +* For `FULL` and `RIGHT JOIN`, the `max_block_size` setting is used for a stream of non-joined data from the right table. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3699) +* Added the `--secure` command line parameter in `clickhouse-benchmark` and `clickhouse-performance-test` to enable TLS. [#3688](https://github.com/ClickHouse/ClickHouse/pull/3688) [#3690](https://github.com/ClickHouse/ClickHouse/pull/3690) +* Type conversion when the structure of a `Buffer` type table does not match the structure of the destination table. [Vitaly Baranov](https://github.com/ClickHouse/ClickHouse/pull/3603) +* Added the `tcp_keep_alive_timeout` option to enable keep-alive packets after inactivity for the specified time interval. [#3441](https://github.com/ClickHouse/ClickHouse/pull/3441) +* Removed unnecessary quoting of values for the partition key in the `system.parts` table if it consists of a single column. [#3652](https://github.com/ClickHouse/ClickHouse/pull/3652) +* The modulo function works for `Date` and `DateTime` data types. [#3385](https://github.com/ClickHouse/ClickHouse/pull/3385) +* Added synonyms for the `POWER`, `LN`, `LCASE`, `UCASE`, `REPLACE`, `LOCATE`, `SUBSTR`, and `MID` functions. [#3774](https://github.com/ClickHouse/ClickHouse/pull/3774) [#3763](https://github.com/ClickHouse/ClickHouse/pull/3763) Some function names are case-insensitive for compatibility with the SQL standard. Added syntactic sugar `SUBSTRING(expr FROM start FOR length)` for compatibility with SQL. [#3804](https://github.com/ClickHouse/ClickHouse/pull/3804) +* Added the ability to `mlock` memory pages corresponding to `clickhouse-server` executable code to prevent it from being forced out of memory. This feature is disabled by default. [#3553](https://github.com/ClickHouse/ClickHouse/pull/3553) +* Improved performance when reading from `O_DIRECT` (with the `min_bytes_to_use_direct_io` option enabled). [#3405](https://github.com/ClickHouse/ClickHouse/pull/3405) +* Improved performance of the `dictGet...OrDefault` function for a constant key argument and a non-constant default argument. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3563) +* The `firstSignificantSubdomain` function now processes the domains `gov`, `mil`, and `edu`. [Igor Hatarist](https://github.com/ClickHouse/ClickHouse/pull/3601) Improved performance. [#3628](https://github.com/ClickHouse/ClickHouse/pull/3628) +* Ability to specify custom environment variables for starting `clickhouse-server` using the `SYS-V init.d` script by defining `CLICKHOUSE_PROGRAM_ENV` in `/etc/default/clickhouse`. +[Pavlo Bashynskyi](https://github.com/ClickHouse/ClickHouse/pull/3612) +* Correct return code for the clickhouse-server init script. [#3516](https://github.com/ClickHouse/ClickHouse/pull/3516) +* The `system.metrics` table now has the `VersionInteger` metric, and `system.build_options` has the added line `VERSION_INTEGER`, which contains the numeric form of the ClickHouse version, such as `18016000`. [#3644](https://github.com/ClickHouse/ClickHouse/pull/3644) +* Removed the ability to compare the `Date` type with a number to avoid potential errors like `date = 2018-12-17`, where quotes around the date are omitted by mistake. [#3687](https://github.com/ClickHouse/ClickHouse/pull/3687) +* Fixed the behavior of stateful functions like `rowNumberInAllBlocks`. They previously output a result that was one number larger due to starting during query analysis. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3729) +* If the `force_restore_data` file can't be deleted, an error message is displayed. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3794) + +#### Build improvements: + +* Updated the `jemalloc` library, which fixes a potential memory leak. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3557) +* Profiling with `jemalloc` is enabled by default in order to debug builds. [2cc82f5c](https://github.com/ClickHouse/ClickHouse/commit/2cc82f5cbe266421cd4c1165286c2c47e5ffcb15) +* Added the ability to run integration tests when only `Docker` is installed on the system. [#3650](https://github.com/ClickHouse/ClickHouse/pull/3650) +* Added the fuzz expression test in SELECT queries. [#3442](https://github.com/ClickHouse/ClickHouse/pull/3442) +* Added a stress test for commits, which performs functional tests in parallel and in random order to detect more race conditions. [#3438](https://github.com/ClickHouse/ClickHouse/pull/3438) +* Improved the method for starting clickhouse-server in a Docker image. [Elghazal Ahmed](https://github.com/ClickHouse/ClickHouse/pull/3663) +* For a Docker image, added support for initializing databases using files in the `/docker-entrypoint-initdb.d` directory. [Konstantin Lebedev](https://github.com/ClickHouse/ClickHouse/pull/3695) +* Fixes for builds on ARM. [#3709](https://github.com/ClickHouse/ClickHouse/pull/3709) + +#### Backward incompatible changes: + +* Removed the ability to compare the `Date` type with a number. Instead of `toDate('2018-12-18') = 17883`, you must use explicit type conversion `= toDate(17883)` [#3687](https://github.com/ClickHouse/ClickHouse/pull/3687) + +## ClickHouse release 18.14 +### ClickHouse release 18.14.19, 2018-12-19 + +#### Bug fixes: + +* Fixed an error that led to problems with updating dictionaries with the ODBC source. [#3825](https://github.com/ClickHouse/ClickHouse/issues/3825), [#3829](https://github.com/ClickHouse/ClickHouse/issues/3829) +* Databases are correctly specified when executing DDL `ON CLUSTER` queries. [#3460](https://github.com/ClickHouse/ClickHouse/pull/3460) +* Fixed a segfault if the `max_temporary_non_const_columns` limit was exceeded. [#3788](https://github.com/ClickHouse/ClickHouse/pull/3788) + +#### Build improvements: + +* Fixes for builds on ARM. + +### ClickHouse release 18.14.18, 2018-12-04 + +#### Bug fixes: +* Fixed error in `dictGet...` function for dictionaries of type `range`, if one of the arguments is constant and other is not. [#3751](https://github.com/ClickHouse/ClickHouse/pull/3751) +* Fixed error that caused messages `netlink: '...': attribute type 1 has an invalid length` to be printed in Linux kernel log, that was happening only on fresh enough versions of Linux kernel. [#3749](https://github.com/ClickHouse/ClickHouse/pull/3749) +* Fixed segfault in function `empty` for argument of `FixedString` type. [Daniel, Dao Quang Minh](https://github.com/ClickHouse/ClickHouse/pull/3703) +* Fixed excessive memory allocation when using large value of `max_query_size` setting (a memory chunk of `max_query_size` bytes was preallocated at once). [#3720](https://github.com/ClickHouse/ClickHouse/pull/3720) + +#### Build changes: +* Fixed build with LLVM/Clang libraries of version 7 from the OS packages (these libraries are used for runtime query compilation). [#3582](https://github.com/ClickHouse/ClickHouse/pull/3582) + +### ClickHouse release 18.14.17, 2018-11-30 + +#### Bug fixes: +* Fixed cases when the ODBC bridge process did not terminate with the main server process. [#3642](https://github.com/ClickHouse/ClickHouse/pull/3642) +* Fixed synchronous insertion into the `Distributed` table with a columns list that differs from the column list of the remote table. [#3673](https://github.com/ClickHouse/ClickHouse/pull/3673) +* Fixed a rare race condition that can lead to a crash when dropping a MergeTree table. [#3643](https://github.com/ClickHouse/ClickHouse/pull/3643) +* Fixed a query deadlock in case when query thread creation fails with the `Resource temporarily unavailable` error. [#3643](https://github.com/ClickHouse/ClickHouse/pull/3643) +* Fixed parsing of the `ENGINE` clause when the `CREATE AS table` syntax was used and the `ENGINE` clause was specified before the `AS table` (the error resulted in ignoring the specified engine). [#3692](https://github.com/ClickHouse/ClickHouse/pull/3692) + +### ClickHouse release 18.14.15, 2018-11-21 + +#### Bug fixes: +* The size of memory chunk was overestimated while deserializing the column of type `Array(String)` that leads to "Memory limit exceeded" errors. The issue appeared in version 18.12.13. [#3589](https://github.com/ClickHouse/ClickHouse/issues/3589) + +### ClickHouse release 18.14.14, 2018-11-20 + +#### Bug fixes: +* Fixed `ON CLUSTER` queries when cluster configured as secure (flag ``). [#3599](https://github.com/ClickHouse/ClickHouse/pull/3599) + +#### Build changes: +* Fixed problems (llvm-7 from system, macos) [#3582](https://github.com/ClickHouse/ClickHouse/pull/3582) + +### ClickHouse release 18.14.13, 2018-11-08 + +#### Bug fixes: +* Fixed the `Block structure mismatch in MergingSorted stream` error. [#3162](https://github.com/ClickHouse/ClickHouse/issues/3162) +* Fixed `ON CLUSTER` queries in case when secure connections were turned on in the cluster config (the `` flag). [#3465](https://github.com/ClickHouse/ClickHouse/pull/3465) +* Fixed an error in queries that used `SAMPLE`, `PREWHERE` and alias columns. [#3543](https://github.com/ClickHouse/ClickHouse/pull/3543) +* Fixed a rare `unknown compression method` error when the `min_bytes_to_use_direct_io` setting was enabled. [3544](https://github.com/ClickHouse/ClickHouse/pull/3544) + +#### Performance improvements: +* Fixed performance regression of queries with `GROUP BY` of columns of UInt16 or Date type when executing on AMD EPYC processors. [Igor Lapko](https://github.com/ClickHouse/ClickHouse/pull/3512) +* Fixed performance regression of queries that process long strings. [#3530](https://github.com/ClickHouse/ClickHouse/pull/3530) + +#### Build improvements: +* Improvements for simplifying the Arcadia build. [#3475](https://github.com/ClickHouse/ClickHouse/pull/3475), [#3535](https://github.com/ClickHouse/ClickHouse/pull/3535) + +### ClickHouse release 18.14.12, 2018-11-02 + +#### Bug fixes: + +* Fixed a crash on joining two unnamed subqueries. [#3505](https://github.com/ClickHouse/ClickHouse/pull/3505) +* Fixed generating incorrect queries (with an empty `WHERE` clause) when querying external databases. [hotid](https://github.com/ClickHouse/ClickHouse/pull/3477) +* Fixed using an incorrect timeout value in ODBC dictionaries. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3511) + +### ClickHouse release 18.14.11, 2018-10-29 + +#### Bug fixes: + +* Fixed the error `Block structure mismatch in UNION stream: different number of columns` in LIMIT queries. [#2156](https://github.com/ClickHouse/ClickHouse/issues/2156) +* Fixed errors when merging data in tables containing arrays inside Nested structures. [#3397](https://github.com/ClickHouse/ClickHouse/pull/3397) +* Fixed incorrect query results if the `merge_tree_uniform_read_distribution` setting is disabled (it is enabled by default). [#3429](https://github.com/ClickHouse/ClickHouse/pull/3429) +* Fixed an error on inserts to a Distributed table in Native format. [#3411](https://github.com/ClickHouse/ClickHouse/issues/3411) + +### ClickHouse release 18.14.10, 2018-10-23 + +* The `compile_expressions` setting (JIT compilation of expressions) is disabled by default. [#3410](https://github.com/ClickHouse/ClickHouse/pull/3410) +* The `enable_optimize_predicate_expression` setting is disabled by default. + +### ClickHouse release 18.14.9, 2018-10-16 + +#### New features: + +* The `WITH CUBE` modifier for `GROUP BY` (the alternative syntax `GROUP BY CUBE(...)` is also available). [#3172](https://github.com/ClickHouse/ClickHouse/pull/3172) +* Added the `formatDateTime` function. [Alexandr Krasheninnikov](https://github.com/ClickHouse/ClickHouse/pull/2770) +* Added the `JDBC` table engine and `jdbc` table function (requires installing clickhouse-jdbc-bridge). [Alexandr Krasheninnikov](https://github.com/ClickHouse/ClickHouse/pull/3210) +* Added functions for working with the ISO week number: `toISOWeek`, `toISOYear`, `toStartOfISOYear`, and `toDayOfYear`. [#3146](https://github.com/ClickHouse/ClickHouse/pull/3146) +* Now you can use `Nullable` columns for `MySQL` and `ODBC` tables. [#3362](https://github.com/ClickHouse/ClickHouse/pull/3362) +* Nested data structures can be read as nested objects in `JSONEachRow` format. Added the `input_format_import_nested_json` setting. [Veloman Yunkan](https://github.com/ClickHouse/ClickHouse/pull/3144) +* Parallel processing is available for many `MATERIALIZED VIEW`s when inserting data. See the `parallel_view_processing` setting. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3208) +* Added the `SYSTEM FLUSH LOGS` query (forced log flushes to system tables such as `query_log`) [#3321](https://github.com/ClickHouse/ClickHouse/pull/3321) +* Now you can use pre-defined `database` and `table` macros when declaring `Replicated` tables. [#3251](https://github.com/ClickHouse/ClickHouse/pull/3251) +* Added the ability to read `Decimal` type values in engineering notation (indicating powers of ten). [#3153](https://github.com/ClickHouse/ClickHouse/pull/3153) + +#### Experimental features: + +* Optimization of the GROUP BY clause for `LowCardinality data types.` [#3138](https://github.com/ClickHouse/ClickHouse/pull/3138) +* Optimized calculation of expressions for `LowCardinality data types.` [#3200](https://github.com/ClickHouse/ClickHouse/pull/3200) + +#### Improvements: + +* Significantly reduced memory consumption for queries with `ORDER BY` and `LIMIT`. See the `max_bytes_before_remerge_sort` setting. [#3205](https://github.com/ClickHouse/ClickHouse/pull/3205) +* In the absence of `JOIN` (`LEFT`, `INNER`, ...), `INNER JOIN` is assumed. [#3147](https://github.com/ClickHouse/ClickHouse/pull/3147) +* Qualified asterisks work correctly in queries with `JOIN`. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3202) +* The `ODBC` table engine correctly chooses the method for quoting identifiers in the SQL dialect of a remote database. [Alexandr Krasheninnikov](https://github.com/ClickHouse/ClickHouse/pull/3210) +* The `compile_expressions` setting (JIT compilation of expressions) is enabled by default. +* Fixed behavior for simultaneous DROP DATABASE/TABLE IF EXISTS and CREATE DATABASE/TABLE IF NOT EXISTS. Previously, a `CREATE DATABASE ... IF NOT EXISTS` query could return the error message "File ... already exists", and the `CREATE TABLE ... IF NOT EXISTS` and `DROP TABLE IF EXISTS` queries could return `Table ... is creating or attaching right now`. [#3101](https://github.com/ClickHouse/ClickHouse/pull/3101) +* LIKE and IN expressions with a constant right half are passed to the remote server when querying from MySQL or ODBC tables. [#3182](https://github.com/ClickHouse/ClickHouse/pull/3182) +* Comparisons with constant expressions in a WHERE clause are passed to the remote server when querying from MySQL and ODBC tables. Previously, only comparisons with constants were passed. [#3182](https://github.com/ClickHouse/ClickHouse/pull/3182) +* Correct calculation of row width in the terminal for `Pretty` formats, including strings with hieroglyphs. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3257). +* `ON CLUSTER` can be specified for `ALTER UPDATE` queries. +* Improved performance for reading data in `JSONEachRow` format. [#3332](https://github.com/ClickHouse/ClickHouse/pull/3332) +* Added synonyms for the `LENGTH` and `CHARACTER_LENGTH` functions for compatibility. The `CONCAT` function is no longer case-sensitive. [#3306](https://github.com/ClickHouse/ClickHouse/pull/3306) +* Added the `TIMESTAMP` synonym for the `DateTime` type. [#3390](https://github.com/ClickHouse/ClickHouse/pull/3390) +* There is always space reserved for query_id in the server logs, even if the log line is not related to a query. This makes it easier to parse server text logs with third-party tools. +* Memory consumption by a query is logged when it exceeds the next level of an integer number of gigabytes. [#3205](https://github.com/ClickHouse/ClickHouse/pull/3205) +* Added compatibility mode for the case when the client library that uses the Native protocol sends fewer columns by mistake than the server expects for the INSERT query. This scenario was possible when using the clickhouse-cpp library. Previously, this scenario caused the server to crash. [#3171](https://github.com/ClickHouse/ClickHouse/pull/3171) +* In a user-defined WHERE expression in `clickhouse-copier`, you can now use a `partition_key` alias (for additional filtering by source table partition). This is useful if the partitioning scheme changes during copying, but only changes slightly. [#3166](https://github.com/ClickHouse/ClickHouse/pull/3166) +* The workflow of the `Kafka` engine has been moved to a background thread pool in order to automatically reduce the speed of data reading at high loads. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3215). +* Support for reading `Tuple` and `Nested` values of structures like `struct` in the `Cap'n'Proto format`. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3216) +* The list of top-level domains for the `firstSignificantSubdomain` function now includes the domain `biz`. [decaseal](https://github.com/ClickHouse/ClickHouse/pull/3219) +* In the configuration of external dictionaries, `null_value` is interpreted as the value of the default data type. [#3330](https://github.com/ClickHouse/ClickHouse/pull/3330) +* Support for the `intDiv` and `intDivOrZero` functions for `Decimal`. [b48402e8](https://github.com/ClickHouse/ClickHouse/commit/b48402e8712e2b9b151e0eef8193811d433a1264) +* Support for the `Date`, `DateTime`, `UUID`, and `Decimal` types as a key for the `sumMap` aggregate function. [#3281](https://github.com/ClickHouse/ClickHouse/pull/3281) +* Support for the `Decimal` data type in external dictionaries. [#3324](https://github.com/ClickHouse/ClickHouse/pull/3324) +* Support for the `Decimal` data type in `SummingMergeTree` tables. [#3348](https://github.com/ClickHouse/ClickHouse/pull/3348) +* Added specializations for `UUID` in `if`. [#3366](https://github.com/ClickHouse/ClickHouse/pull/3366) +* Reduced the number of `open` and `close` system calls when reading from a `MergeTree table`. [#3283](https://github.com/ClickHouse/ClickHouse/pull/3283) +* A `TRUNCATE TABLE` query can be executed on any replica (the query is passed to the leader replica). [Kirill Shvakov](https://github.com/ClickHouse/ClickHouse/pull/3375) + +#### Bug fixes: + +* Fixed an issue with `Dictionary` tables for `range_hashed` dictionaries. This error occurred in version 18.12.17. [#1702](https://github.com/ClickHouse/ClickHouse/pull/1702) +* Fixed an error when loading `range_hashed` dictionaries (the message `Unsupported type Nullable (...)`). This error occurred in version 18.12.17. [#3362](https://github.com/ClickHouse/ClickHouse/pull/3362) +* Fixed errors in the `pointInPolygon` function due to the accumulation of inaccurate calculations for polygons with a large number of vertices located close to each other. [#3331](https://github.com/ClickHouse/ClickHouse/pull/3331) [#3341](https://github.com/ClickHouse/ClickHouse/pull/3341) +* If after merging data parts, the checksum for the resulting part differs from the result of the same merge in another replica, the result of the merge is deleted and the data part is downloaded from the other replica (this is the correct behavior). But after downloading the data part, it couldn't be added to the working set because of an error that the part already exists (because the data part was deleted with some delay after the merge). This led to cyclical attempts to download the same data. [#3194](https://github.com/ClickHouse/ClickHouse/pull/3194) +* Fixed incorrect calculation of total memory consumption by queries (because of incorrect calculation, the `max_memory_usage_for_all_queries` setting worked incorrectly and the `MemoryTracking` metric had an incorrect value). This error occurred in version 18.12.13. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3344) +* Fixed the functionality of `CREATE TABLE ... ON CLUSTER ... AS SELECT ...` This error occurred in version 18.12.13. [#3247](https://github.com/ClickHouse/ClickHouse/pull/3247) +* Fixed unnecessary preparation of data structures for `JOIN`s on the server that initiates the query if the `JOIN` is only performed on remote servers. [#3340](https://github.com/ClickHouse/ClickHouse/pull/3340) +* Fixed bugs in the `Kafka` engine: deadlocks after exceptions when starting to read data, and locks upon completion [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3215). +* For `Kafka` tables, the optional `schema` parameter was not passed (the schema of the `Cap'n'Proto` format). [Vojtech Splichal](https://github.com/ClickHouse/ClickHouse/pull/3150) +* If the ensemble of ZooKeeper servers has servers that accept the connection but then immediately close it instead of responding to the handshake, ClickHouse chooses to connect another server. Previously, this produced the error `Cannot read all data. Bytes read: 0. Bytes expected: 4.` and the server couldn't start. [8218cf3a](https://github.com/ClickHouse/ClickHouse/commit/8218cf3a5f39a43401953769d6d12a0bb8d29da9) +* If the ensemble of ZooKeeper servers contains servers for which the DNS query returns an error, these servers are ignored. [17b8e209](https://github.com/ClickHouse/ClickHouse/commit/17b8e209221061325ad7ba0539f03c6e65f87f29) +* Fixed type conversion between `Date` and `DateTime` when inserting data in the `VALUES` format (if `input_format_values_interpret_expressions = 1`). Previously, the conversion was performed between the numerical value of the number of days in Unix Epoch time and the Unix timestamp, which led to unexpected results. [#3229](https://github.com/ClickHouse/ClickHouse/pull/3229) +* Corrected type conversion between `Decimal` and integer numbers. [#3211](https://github.com/ClickHouse/ClickHouse/pull/3211) +* Fixed errors in the `enable_optimize_predicate_expression` setting. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3231) +* Fixed a parsing error in CSV format with floating-point numbers if a non-default CSV separator is used, such as `;` [#3155](https://github.com/ClickHouse/ClickHouse/pull/3155) +* Fixed the `arrayCumSumNonNegative` function (it does not accumulate negative values if the accumulator is less than zero). [Aleksey Studnev](https://github.com/ClickHouse/ClickHouse/pull/3163) +* Fixed how `Merge` tables work on top of `Distributed` tables when using `PREWHERE`. [#3165](https://github.com/ClickHouse/ClickHouse/pull/3165) +* Bug fixes in the `ALTER UPDATE` query. +* Fixed bugs in the `odbc` table function that appeared in version 18.12. [#3197](https://github.com/ClickHouse/ClickHouse/pull/3197) +* Fixed the operation of aggregate functions with `StateArray` combinators. [#3188](https://github.com/ClickHouse/ClickHouse/pull/3188) +* Fixed a crash when dividing a `Decimal` value by zero. [69dd6609](https://github.com/ClickHouse/ClickHouse/commit/69dd6609193beb4e7acd3e6ad216eca0ccfb8179) +* Fixed output of types for operations using `Decimal` and integer arguments. [#3224](https://github.com/ClickHouse/ClickHouse/pull/3224) +* Fixed the segfault during `GROUP BY` on `Decimal128`. [3359ba06](https://github.com/ClickHouse/ClickHouse/commit/3359ba06c39fcd05bfdb87d6c64154819621e13a) +* The `log_query_threads` setting (logging information about each thread of query execution) now takes effect only if the `log_queries` option (logging information about queries) is set to 1. Since the `log_query_threads` option is enabled by default, information about threads was previously logged even if query logging was disabled. [#3241](https://github.com/ClickHouse/ClickHouse/pull/3241) +* Fixed an error in the distributed operation of the quantiles aggregate function (the error message `Not found column quantile...`). [292a8855](https://github.com/ClickHouse/ClickHouse/commit/292a885533b8e3b41ce8993867069d14cbd5a664) +* Fixed the compatibility problem when working on a cluster of version 18.12.17 servers and older servers at the same time. For distributed queries with GROUP BY keys of both fixed and non-fixed length, if there was a large amount of data to aggregate, the returned data was not always fully aggregated (two different rows contained the same aggregation keys). [#3254](https://github.com/ClickHouse/ClickHouse/pull/3254) +* Fixed handling of substitutions in `clickhouse-performance-test`, if the query contains only part of the substitutions declared in the test. [#3263](https://github.com/ClickHouse/ClickHouse/pull/3263) +* Fixed an error when using `FINAL` with `PREWHERE`. [#3298](https://github.com/ClickHouse/ClickHouse/pull/3298) +* Fixed an error when using `PREWHERE` over columns that were added during `ALTER`. [#3298](https://github.com/ClickHouse/ClickHouse/pull/3298) +* Added a check for the absence of `arrayJoin` for `DEFAULT` and `MATERIALIZED` expressions. Previously, `arrayJoin` led to an error when inserting data. [#3337](https://github.com/ClickHouse/ClickHouse/pull/3337) +* Added a check for the absence of `arrayJoin` in a `PREWHERE` clause. Previously, this led to messages like `Size ... doesn't match` or `Unknown compression method` when executing queries. [#3357](https://github.com/ClickHouse/ClickHouse/pull/3357) +* Fixed segfault that could occur in rare cases after optimization that replaced AND chains from equality evaluations with the corresponding IN expression. [liuyimin-bytedance](https://github.com/ClickHouse/ClickHouse/pull/3339) +* Minor corrections to `clickhouse-benchmark`: previously, client information was not sent to the server; now the number of queries executed is calculated more accurately when shutting down and for limiting the number of iterations. [#3351](https://github.com/ClickHouse/ClickHouse/pull/3351) [#3352](https://github.com/ClickHouse/ClickHouse/pull/3352) + +#### Backward incompatible changes: + +* Removed the `allow_experimental_decimal_type` option. The `Decimal` data type is available for default use. [#3329](https://github.com/ClickHouse/ClickHouse/pull/3329) + +## ClickHouse release 18.12 + +### ClickHouse release 18.12.17, 2018-09-16 + +#### New features: + +* `invalidate_query` (the ability to specify a query to check whether an external dictionary needs to be updated) is implemented for the `clickhouse` source. [#3126](https://github.com/ClickHouse/ClickHouse/pull/3126) +* Added the ability to use `UInt*`, `Int*`, and `DateTime` data types (along with the `Date` type) as a `range_hashed` external dictionary key that defines the boundaries of ranges. Now `NULL` can be used to designate an open range. [Vasily Nemkov](https://github.com/ClickHouse/ClickHouse/pull/3123) +* The `Decimal` type now supports `var*` and `stddev*` aggregate functions. [#3129](https://github.com/ClickHouse/ClickHouse/pull/3129) +* The `Decimal` type now supports mathematical functions (`exp`, `sin` and so on.) [#3129](https://github.com/ClickHouse/ClickHouse/pull/3129) +* The `system.part_log` table now has the `partition_id` column. [#3089](https://github.com/ClickHouse/ClickHouse/pull/3089) + +#### Bug fixes: + +* `Merge` now works correctly on `Distributed` tables. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3159) +* Fixed incompatibility (unnecessary dependency on the `glibc` version) that made it impossible to run ClickHouse on `Ubuntu Precise` and older versions. The incompatibility arose in version 18.12.13. [#3130](https://github.com/ClickHouse/ClickHouse/pull/3130) +* Fixed errors in the `enable_optimize_predicate_expression` setting. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3107) +* Fixed a minor issue with backwards compatibility that appeared when working with a cluster of replicas on versions earlier than 18.12.13 and simultaneously creating a new replica of a table on a server with a newer version (shown in the message `Can not clone replica, because the ... updated to new ClickHouse version`, which is logical, but shouldn't happen). [#3122](https://github.com/ClickHouse/ClickHouse/pull/3122) + +#### Backward incompatible changes: + +* The `enable_optimize_predicate_expression` option is enabled by default (which is rather optimistic). If query analysis errors occur that are related to searching for the column names, set `enable_optimize_predicate_expression` to 0. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3107) + +### ClickHouse release 18.12.14, 2018-09-13 + +#### New features: + +* Added support for `ALTER UPDATE` queries. [#3035](https://github.com/ClickHouse/ClickHouse/pull/3035) +* Added the `allow_ddl` option, which restricts the user's access to DDL queries. [#3104](https://github.com/ClickHouse/ClickHouse/pull/3104) +* Added the `min_merge_bytes_to_use_direct_io` option for `MergeTree` engines, which allows you to set a threshold for the total size of the merge (when above the threshold, data part files will be handled using O_DIRECT). [#3117](https://github.com/ClickHouse/ClickHouse/pull/3117) +* The `system.merges` system table now contains the `partition_id` column. [#3099](https://github.com/ClickHouse/ClickHouse/pull/3099) + +#### Improvements + +* If a data part remains unchanged during mutation, it isn't downloaded by replicas. [#3103](https://github.com/ClickHouse/ClickHouse/pull/3103) +* Autocomplete is available for names of settings when working with `clickhouse-client`. [#3106](https://github.com/ClickHouse/ClickHouse/pull/3106) + +#### Bug fixes: + +* Added a check for the sizes of arrays that are elements of `Nested` type fields when inserting. [#3118](https://github.com/ClickHouse/ClickHouse/pull/3118) +* Fixed an error updating external dictionaries with the `ODBC` source and `hashed` storage. This error occurred in version 18.12.13. +* Fixed a crash when creating a temporary table from a query with an `IN` condition. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3098) +* Fixed an error in aggregate functions for arrays that can have `NULL` elements. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3097) + + +### ClickHouse release 18.12.13, 2018-09-10 + +#### New features: + +* Added the `DECIMAL(digits, scale)` data type (`Decimal32(scale)`, `Decimal64(scale)`, `Decimal128(scale)`). To enable it, use the setting `allow_experimental_decimal_type`. [#2846](https://github.com/ClickHouse/ClickHouse/pull/2846) [#2970](https://github.com/ClickHouse/ClickHouse/pull/2970) [#3008](https://github.com/ClickHouse/ClickHouse/pull/3008) [#3047](https://github.com/ClickHouse/ClickHouse/pull/3047) +* New `WITH ROLLUP` modifier for `GROUP BY` (alternative syntax: `GROUP BY ROLLUP(...)`). [#2948](https://github.com/ClickHouse/ClickHouse/pull/2948) +* In queries with JOIN, the star character expands to a list of columns in all tables, in compliance with the SQL standard. You can restore the old behavior by setting `asterisk_left_columns_only` to 1 on the user configuration level. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2787) +* Added support for JOIN with table functions. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2907) +* Autocomplete by pressing Tab in clickhouse-client. [Sergey Shcherbin](https://github.com/ClickHouse/ClickHouse/pull/2447) +* Ctrl+C in clickhouse-client clears a query that was entered. [#2877](https://github.com/ClickHouse/ClickHouse/pull/2877) +* Added the `join_default_strictness` setting (values: `"`, `'any'`, `'all'`). This allows you to not specify `ANY` or `ALL` for `JOIN`. [#2982](https://github.com/ClickHouse/ClickHouse/pull/2982) +* Each line of the server log related to query processing shows the query ID. [#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) +* Now you can get query execution logs in clickhouse-client (use the `send_logs_level` setting). With distributed query processing, logs are cascaded from all the servers. [#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) +* The `system.query_log` and `system.processes` (`SHOW PROCESSLIST`) tables now have information about all changed settings when you run a query (the nested structure of the `Settings` data). Added the `log_query_settings` setting. [#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) +* The `system.query_log` and `system.processes` tables now show information about the number of threads that are participating in query execution (see the `thread_numbers` column). [#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) +* Added `ProfileEvents` counters that measure the time spent on reading and writing over the network and reading and writing to disk, the number of network errors, and the time spent waiting when network bandwidth is limited. [#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) +* Added `ProfileEvents`counters that contain the system metrics from rusage (you can use them to get information about CPU usage in userspace and the kernel, page faults, and context switches), as well as taskstats metrics (use these to obtain information about I/O wait time, CPU wait time, and the amount of data read and recorded, both with and without page cache). [#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) +* The `ProfileEvents` counters are applied globally and for each query, as well as for each query execution thread, which allows you to profile resource consumption by query in detail. [#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) +* Added the `system.query_thread_log` table, which contains information about each query execution thread. Added the `log_query_threads` setting. [#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) +* The `system.metrics` and `system.events` tables now have built-in documentation. [#3016](https://github.com/ClickHouse/ClickHouse/pull/3016) +* Added the `arrayEnumerateDense` function. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2975) +* Added the `arrayCumSumNonNegative` and `arrayDifference` functions. [Aleksey Studnev](https://github.com/ClickHouse/ClickHouse/pull/2942) +* Added the `retention` aggregate function. [Sundy Li](https://github.com/ClickHouse/ClickHouse/pull/2887) +* Now you can add (merge) states of aggregate functions by using the plus operator, and multiply the states of aggregate functions by a nonnegative constant. [#3062](https://github.com/ClickHouse/ClickHouse/pull/3062) [#3034](https://github.com/ClickHouse/ClickHouse/pull/3034) +* Tables in the MergeTree family now have the virtual column `_partition_id`. [#3089](https://github.com/ClickHouse/ClickHouse/pull/3089) + +#### Experimental features: + +* Added the `LowCardinality(T)` data type. This data type automatically creates a local dictionary of values and allows data processing without unpacking the dictionary. [#2830](https://github.com/ClickHouse/ClickHouse/pull/2830) +* Added a cache of JIT-compiled functions and a counter for the number of uses before compiling. To JIT compile expressions, enable the `compile_expressions` setting. [#2990](https://github.com/ClickHouse/ClickHouse/pull/2990) [#3077](https://github.com/ClickHouse/ClickHouse/pull/3077) + +#### Improvements: + +* Fixed the problem with unlimited accumulation of the replication log when there are abandoned replicas. Added an effective recovery mode for replicas with a long lag. +* Improved performance of `GROUP BY` with multiple aggregation fields when one of them is string and the others are fixed length. +* Improved performance when using `PREWHERE` and with implicit transfer of expressions in `PREWHERE`. +* Improved parsing performance for text formats (`CSV`, `TSV`). [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2977) [#2980](https://github.com/ClickHouse/ClickHouse/pull/2980) +* Improved performance of reading strings and arrays in binary formats. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2955) +* Increased performance and reduced memory consumption for queries to `system.tables` and `system.columns` when there is a very large number of tables on a single server. [#2953](https://github.com/ClickHouse/ClickHouse/pull/2953) +* Fixed a performance problem in the case of a large stream of queries that result in an error (the ` _dl_addr` function is visible in `perf top`, but the server isn't using much CPU). [#2938](https://github.com/ClickHouse/ClickHouse/pull/2938) +* Conditions are cast into the View (when `enable_optimize_predicate_expression` is enabled). [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2907) +* Improvements to the functionality for the `UUID` data type. [#3074](https://github.com/ClickHouse/ClickHouse/pull/3074) [#2985](https://github.com/ClickHouse/ClickHouse/pull/2985) +* The `UUID` data type is supported in The-Alchemist dictionaries. [#2822](https://github.com/ClickHouse/ClickHouse/pull/2822) +* The `visitParamExtractRaw` function works correctly with nested structures. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2974) +* When the `input_format_skip_unknown_fields` setting is enabled, object fields in `JSONEachRow` format are skipped correctly. [BlahGeek](https://github.com/ClickHouse/ClickHouse/pull/2958) +* For a `CASE` expression with conditions, you can now omit `ELSE`, which is equivalent to `ELSE NULL`. [#2920](https://github.com/ClickHouse/ClickHouse/pull/2920) +* The operation timeout can now be configured when working with ZooKeeper. [urykhy](https://github.com/ClickHouse/ClickHouse/pull/2971) +* You can specify an offset for `LIMIT n, m` as `LIMIT n OFFSET m`. [#2840](https://github.com/ClickHouse/ClickHouse/pull/2840) +* You can use the `SELECT TOP n` syntax as an alternative for `LIMIT`. [#2840](https://github.com/ClickHouse/ClickHouse/pull/2840) +* Increased the size of the queue to write to system tables, so the `SystemLog parameter queue is full` error doesn't happen as often. +* The `windowFunnel` aggregate function now supports events that meet multiple conditions. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2801) +* Duplicate columns can be used in a `USING` clause for `JOIN`. [#3006](https://github.com/ClickHouse/ClickHouse/pull/3006) +* `Pretty` formats now have a limit on column alignment by width. Use the `output_format_pretty_max_column_pad_width` setting. If a value is wider, it will still be displayed in its entirety, but the other cells in the table will not be too wide. [#3003](https://github.com/ClickHouse/ClickHouse/pull/3003) +* The `odbc` table function now allows you to specify the database/schema name. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2885) +* Added the ability to use a username specified in the `clickhouse-client` config file. [Vladimir Kozbin](https://github.com/ClickHouse/ClickHouse/pull/2909) +* The `ZooKeeperExceptions` counter has been split into three counters: `ZooKeeperUserExceptions`, `ZooKeeperHardwareExceptions`, and `ZooKeeperOtherExceptions`. +* `ALTER DELETE` queries work for materialized views. +* Added randomization when running the cleanup thread periodically for `ReplicatedMergeTree` tables in order to avoid periodic load spikes when there are a very large number of `ReplicatedMergeTree` tables. +* Support for `ATTACH TABLE ... ON CLUSTER` queries. [#3025](https://github.com/ClickHouse/ClickHouse/pull/3025) + +#### Bug fixes: + +* Fixed an issue with `Dictionary` tables (throws the `Size of offsets doesn't match size of column` or `Unknown compression method` exception). This bug appeared in version 18.10.3. [#2913](https://github.com/ClickHouse/ClickHouse/issues/2913) +* Fixed a bug when merging `CollapsingMergeTree` tables if one of the data parts is empty (these parts are formed during merge or `ALTER DELETE` if all data was deleted), and the `vertical` algorithm was used for the merge. [#3049](https://github.com/ClickHouse/ClickHouse/pull/3049) +* Fixed a race condition during `DROP` or `TRUNCATE` for `Memory` tables with a simultaneous `SELECT`, which could lead to server crashes. This bug appeared in version 1.1.54388. [#3038](https://github.com/ClickHouse/ClickHouse/pull/3038) +* Fixed the possibility of data loss when inserting in `Replicated` tables if the `Session is expired` error is returned (data loss can be detected by the `ReplicatedDataLoss` metric). This error occurred in version 1.1.54378. [#2939](https://github.com/ClickHouse/ClickHouse/pull/2939) [#2949](https://github.com/ClickHouse/ClickHouse/pull/2949) [#2964](https://github.com/ClickHouse/ClickHouse/pull/2964) +* Fixed a segfault during `JOIN ... ON`. [#3000](https://github.com/ClickHouse/ClickHouse/pull/3000) +* Fixed the error searching column names when the `WHERE` expression consists entirely of a qualified column name, such as `WHERE table.column`. [#2994](https://github.com/ClickHouse/ClickHouse/pull/2994) +* Fixed the "Not found column" error that occurred when executing distributed queries if a single column consisting of an IN expression with a subquery is requested from a remote server. [#3087](https://github.com/ClickHouse/ClickHouse/pull/3087) +* Fixed the `Block structure mismatch in UNION stream: different number of columns` error that occurred for distributed queries if one of the shards is local and the other is not, and optimization of the move to `PREWHERE` is triggered. [#2226](https://github.com/ClickHouse/ClickHouse/pull/2226) [#3037](https://github.com/ClickHouse/ClickHouse/pull/3037) [#3055](https://github.com/ClickHouse/ClickHouse/pull/3055) [#3065](https://github.com/ClickHouse/ClickHouse/pull/3065) [#3073](https://github.com/ClickHouse/ClickHouse/pull/3073) [#3090](https://github.com/ClickHouse/ClickHouse/pull/3090) [#3093](https://github.com/ClickHouse/ClickHouse/pull/3093) +* Fixed the `pointInPolygon` function for certain cases of non-convex polygons. [#2910](https://github.com/ClickHouse/ClickHouse/pull/2910) +* Fixed the incorrect result when comparing `nan` with integers. [#3024](https://github.com/ClickHouse/ClickHouse/pull/3024) +* Fixed an error in the `zlib-ng` library that could lead to segfault in rare cases. [#2854](https://github.com/ClickHouse/ClickHouse/pull/2854) +* Fixed a memory leak when inserting into a table with `AggregateFunction` columns, if the state of the aggregate function is not simple (allocates memory separately), and if a single insertion request results in multiple small blocks. [#3084](https://github.com/ClickHouse/ClickHouse/pull/3084) +* Fixed a race condition when creating and deleting the same `Buffer` or `MergeTree` table simultaneously. +* Fixed the possibility of a segfault when comparing tuples made up of certain non-trivial types, such as tuples. [#2989](https://github.com/ClickHouse/ClickHouse/pull/2989) +* Fixed the possibility of a segfault when running certain `ON CLUSTER` queries. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2960) +* Fixed an error in the `arrayDistinct` function for `Nullable` array elements. [#2845](https://github.com/ClickHouse/ClickHouse/pull/2845) [#2937](https://github.com/ClickHouse/ClickHouse/pull/2937) +* The `enable_optimize_predicate_expression` option now correctly supports cases with `SELECT *`. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2929) +* Fixed the segfault when re-initializing the ZooKeeper session. [#2917](https://github.com/ClickHouse/ClickHouse/pull/2917) +* Fixed potential blocking when working with ZooKeeper. +* Fixed incorrect code for adding nested data structures in a `SummingMergeTree`. +* When allocating memory for states of aggregate functions, alignment is correctly taken into account, which makes it possible to use operations that require alignment when implementing states of aggregate functions. [chenxing-xc](https://github.com/ClickHouse/ClickHouse/pull/2808) + +#### Security fix: + +* Safe use of ODBC data sources. Interaction with ODBC drivers uses a separate `clickhouse-odbc-bridge` process. Errors in third-party ODBC drivers no longer cause problems with server stability or vulnerabilities. [#2828](https://github.com/ClickHouse/ClickHouse/pull/2828) [#2879](https://github.com/ClickHouse/ClickHouse/pull/2879) [#2886](https://github.com/ClickHouse/ClickHouse/pull/2886) [#2893](https://github.com/ClickHouse/ClickHouse/pull/2893) [#2921](https://github.com/ClickHouse/ClickHouse/pull/2921) +* Fixed incorrect validation of the file path in the `catBoostPool` table function. [#2894](https://github.com/ClickHouse/ClickHouse/pull/2894) +* The contents of system tables (`tables`, `databases`, `parts`, `columns`, `parts_columns`, `merges`, `mutations`, `replicas`, and `replication_queue`) are filtered according to the user's configured access to databases (`allow_databases`). [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2856) + +#### Backward incompatible changes: + +* In queries with JOIN, the star character expands to a list of columns in all tables, in compliance with the SQL standard. You can restore the old behavior by setting `asterisk_left_columns_only` to 1 on the user configuration level. + +#### Build changes: + +* Most integration tests can now be run by commit. +* Code style checks can also be run by commit. +* The `memcpy` implementation is chosen correctly when building on CentOS7/Fedora. [Etienne Champetier](https://github.com/ClickHouse/ClickHouse/pull/2912) +* When using clang to build, some warnings from `-Weverything` have been added, in addition to the regular `-Wall-Wextra -Werror`. [#2957](https://github.com/ClickHouse/ClickHouse/pull/2957) +* Debugging the build uses the `jemalloc` debug option. +* The interface of the library for interacting with ZooKeeper is declared abstract. [#2950](https://github.com/ClickHouse/ClickHouse/pull/2950) + +## ClickHouse release 18.10 + +### ClickHouse release 18.10.3, 2018-08-13 + +#### New features: + +* HTTPS can be used for replication. [#2760](https://github.com/ClickHouse/ClickHouse/pull/2760) +* Added the functions `murmurHash2_64`, `murmurHash3_32`, `murmurHash3_64`, and `murmurHash3_128` in addition to the existing `murmurHash2_32`. [#2791](https://github.com/ClickHouse/ClickHouse/pull/2791) +* Support for Nullable types in the ClickHouse ODBC driver (`ODBCDriver2` output format). [#2834](https://github.com/ClickHouse/ClickHouse/pull/2834) +* Support for `UUID` in the key columns. + +#### Improvements: + +* Clusters can be removed without restarting the server when they are deleted from the config files. [#2777](https://github.com/ClickHouse/ClickHouse/pull/2777) +* External dictionaries can be removed without restarting the server when they are removed from config files. [#2779](https://github.com/ClickHouse/ClickHouse/pull/2779) +* Added `SETTINGS` support for the `Kafka` table engine. [Alexander Marshalov](https://github.com/ClickHouse/ClickHouse/pull/2781) +* Improvements for the `UUID` data type (not yet complete). [#2618](https://github.com/ClickHouse/ClickHouse/pull/2618) +* Support for empty parts after merges in the `SummingMergeTree`, `CollapsingMergeTree` and `VersionedCollapsingMergeTree` engines. [#2815](https://github.com/ClickHouse/ClickHouse/pull/2815) +* Old records of completed mutations are deleted (`ALTER DELETE`). [#2784](https://github.com/ClickHouse/ClickHouse/pull/2784) +* Added the `system.merge_tree_settings` table. [Kirill Shvakov](https://github.com/ClickHouse/ClickHouse/pull/2841) +* The `system.tables` table now has dependency columns: `dependencies_database` and `dependencies_table`. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2851) +* Added the `max_partition_size_to_drop` config option. [#2782](https://github.com/ClickHouse/ClickHouse/pull/2782) +* Added the `output_format_json_escape_forward_slashes` option. [Alexander Bocharov](https://github.com/ClickHouse/ClickHouse/pull/2812) +* Added the `max_fetch_partition_retries_count` setting. [#2831](https://github.com/ClickHouse/ClickHouse/pull/2831) +* Added the `prefer_localhost_replica` setting for disabling the preference for a local replica and going to a local replica without inter-process interaction. [#2832](https://github.com/ClickHouse/ClickHouse/pull/2832) +* The `quantileExact` aggregate function returns `nan` in the case of aggregation on an empty `Float32` or `Float64` set. [Sundy Li](https://github.com/ClickHouse/ClickHouse/pull/2855) + +#### Bug fixes: + +* Removed unnecessary escaping of the connection string parameters for ODBC, which made it impossible to establish a connection. This error occurred in version 18.6.0. +* Fixed the logic for processing `REPLACE PARTITION` commands in the replication queue. If there are two `REPLACE` commands for the same partition, the incorrect logic could cause one of them to remain in the replication queue and not be executed. [#2814](https://github.com/ClickHouse/ClickHouse/pull/2814) +* Fixed a merge bug when all data parts were empty (parts that were formed from a merge or from `ALTER DELETE` if all data was deleted). This bug appeared in version 18.1.0. [#2930](https://github.com/ClickHouse/ClickHouse/pull/2930) +* Fixed an error for concurrent `Set` or `Join`. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2823) +* Fixed the `Block structure mismatch in UNION stream: different number of columns` error that occurred for `UNION ALL` queries inside a sub-query if one of the `SELECT` queries contains duplicate column names. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2094) +* Fixed a memory leak if an exception occurred when connecting to a MySQL server. +* Fixed incorrect clickhouse-client response code in case of a query error. +* Fixed incorrect behavior of materialized views containing DISTINCT. [#2795](https://github.com/ClickHouse/ClickHouse/issues/2795) + +#### Backward incompatible changes + +* Removed support for CHECK TABLE queries for Distributed tables. + +#### Build changes: + +* The allocator has been replaced: `jemalloc` is now used instead of `tcmalloc`. In some scenarios, this increases speed up to 20%. However, there are queries that have slowed by up to 20%. Memory consumption has been reduced by approximately 10% in some scenarios, with improved stability. With highly competitive loads, CPU usage in userspace and in system shows just a slight increase. [#2773](https://github.com/ClickHouse/ClickHouse/pull/2773) +* Use of libressl from a submodule. [#1983](https://github.com/ClickHouse/ClickHouse/pull/1983) [#2807](https://github.com/ClickHouse/ClickHouse/pull/2807) +* Use of unixodbc from a submodule. [#2789](https://github.com/ClickHouse/ClickHouse/pull/2789) +* Use of mariadb-connector-c from a submodule. [#2785](https://github.com/ClickHouse/ClickHouse/pull/2785) +* Added functional test files to the repository that depend on the availability of test data (for the time being, without the test data itself). + +## ClickHouse release 18.6 + +### ClickHouse release 18.6.0, 2018-08-02 + +#### New features: + +* Added support for ON expressions for the JOIN ON syntax: +`JOIN ON Expr([table.]column ...) = Expr([table.]column, ...) [AND Expr([table.]column, ...) = Expr([table.]column, ...) ...]` +The expression must be a chain of equalities joined by the AND operator. Each side of the equality can be an arbitrary expression over the columns of one of the tables. The use of fully qualified column names is supported (`table.name`, `database.table.name`, `table_alias.name`, `subquery_alias.name`) for the right table. [#2742](https://github.com/ClickHouse/ClickHouse/pull/2742) +* HTTPS can be enabled for replication. [#2760](https://github.com/ClickHouse/ClickHouse/pull/2760) + +#### Improvements: + +* The server passes the patch component of its version to the client. Data about the patch version component is in `system.processes` and `query_log`. [#2646](https://github.com/ClickHouse/ClickHouse/pull/2646) + +## ClickHouse release 18.5 + +### ClickHouse release 18.5.1, 2018-07-31 + +#### New features: + +* Added the hash function `murmurHash2_32` [#2756](https://github.com/ClickHouse/ClickHouse/pull/2756). + +#### Improvements: + +* Now you can use the `from_env` [#2741](https://github.com/ClickHouse/ClickHouse/pull/2741) attribute to set values in config files from environment variables. +* Added case-insensitive versions of the `coalesce`, `ifNull`, and `nullIf functions` [#2752](https://github.com/ClickHouse/ClickHouse/pull/2752). + +#### Bug fixes: + +* Fixed a possible bug when starting a replica [#2759](https://github.com/ClickHouse/ClickHouse/pull/2759). + +## ClickHouse release 18.4 + +### ClickHouse release 18.4.0, 2018-07-28 + +#### New features: + +* Added system tables: `formats`, `data_type_families`, `aggregate_function_combinators`, `table_functions`, `table_engines`, `collations` [#2721](https://github.com/ClickHouse/ClickHouse/pull/2721). +* Added the ability to use a table function instead of a table as an argument of a `remote` or `cluster table function` [#2708](https://github.com/ClickHouse/ClickHouse/pull/2708). +* Support for `HTTP Basic` authentication in the replication protocol [#2727](https://github.com/ClickHouse/ClickHouse/pull/2727). +* The `has` function now allows searching for a numeric value in an array of `Enum` values [Maxim Khrisanfov](https://github.com/ClickHouse/ClickHouse/pull/2699). +* Support for adding arbitrary message separators when reading from `Kafka` [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2701). + +#### Improvements: + +* The `ALTER TABLE t DELETE WHERE` query does not rewrite data parts that were not affected by the WHERE condition [#2694](https://github.com/ClickHouse/ClickHouse/pull/2694). +* The `use_minimalistic_checksums_in_zookeeper` option for `ReplicatedMergeTree` tables is enabled by default. This setting was added in version 1.1.54378, 2018-04-16. Versions that are older than 1.1.54378 can no longer be installed. +* Support for running `KILL` and `OPTIMIZE` queries that specify `ON CLUSTER` [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2689). + +#### Bug fixes: + +* Fixed the error `Column ... is not under an aggregate function and not in GROUP BY` for aggregation with an IN expression. This bug appeared in version 18.1.0. ([bbdd780b](https://github.com/ClickHouse/ClickHouse/commit/bbdd780be0be06a0f336775941cdd536878dd2c2)) +* Fixed a bug in the `windowFunnel aggregate function` [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2735). +* Fixed a bug in the `anyHeavy` aggregate function ([a2101df2](https://github.com/ClickHouse/ClickHouse/commit/a2101df25a6a0fba99aa71f8793d762af2b801ee)) +* Fixed server crash when using the `countArray()` aggregate function. + +#### Backward incompatible changes: + +* Parameters for `Kafka` engine was changed from `Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format[, kafka_schema, kafka_num_consumers])` to `Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format[, kafka_row_delimiter, kafka_schema, kafka_num_consumers])`. If your tables use `kafka_schema` or `kafka_num_consumers` parameters, you have to manually edit the metadata files `path/metadata/database/table.sql` and add `kafka_row_delimiter` parameter with `''` value. + +## ClickHouse release 18.1 + +### ClickHouse release 18.1.0, 2018-07-23 + +#### New features: + +* Support for the `ALTER TABLE t DELETE WHERE` query for non-replicated MergeTree tables ([#2634](https://github.com/ClickHouse/ClickHouse/pull/2634)). +* Support for arbitrary types for the `uniq*` family of aggregate functions ([#2010](https://github.com/ClickHouse/ClickHouse/issues/2010)). +* Support for arbitrary types in comparison operators ([#2026](https://github.com/ClickHouse/ClickHouse/issues/2026)). +* The `users.xml` file allows setting a subnet mask in the format `10.0.0.1/255.255.255.0`. This is necessary for using masks for IPv6 networks with zeros in the middle ([#2637](https://github.com/ClickHouse/ClickHouse/pull/2637)). +* Added the `arrayDistinct` function ([#2670](https://github.com/ClickHouse/ClickHouse/pull/2670)). +* The SummingMergeTree engine can now work with AggregateFunction type columns ([Constantin S. Pan](https://github.com/ClickHouse/ClickHouse/pull/2566)). + +#### Improvements: + +* Changed the numbering scheme for release versions. Now the first part contains the year of release (A.D., Moscow timezone, minus 2000), the second part contains the number for major changes (increases for most releases), and the third part is the patch version. Releases are still backward compatible, unless otherwise stated in the changelog. +* Faster conversions of floating-point numbers to a string ([Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2664)). +* If some rows were skipped during an insert due to parsing errors (this is possible with the `input_allow_errors_num` and `input_allow_errors_ratio` settings enabled), the number of skipped rows is now written to the server log ([Leonardo Cecchi](https://github.com/ClickHouse/ClickHouse/pull/2669)). + +#### Bug fixes: + +* Fixed the TRUNCATE command for temporary tables ([Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2624)). +* Fixed a rare deadlock in the ZooKeeper client library that occurred when there was a network error while reading the response ([c315200](https://github.com/ClickHouse/ClickHouse/commit/c315200e64b87e44bdf740707fc857d1fdf7e947)). +* Fixed an error during a CAST to Nullable types ([#1322](https://github.com/ClickHouse/ClickHouse/issues/1322)). +* Fixed the incorrect result of the `maxIntersection()` function when the boundaries of intervals coincided ([Michael Furmur](https://github.com/ClickHouse/ClickHouse/pull/2657)). +* Fixed incorrect transformation of the OR expression chain in a function argument ([chenxing-xc](https://github.com/ClickHouse/ClickHouse/pull/2663)). +* Fixed performance degradation for queries containing `IN (subquery)` expressions inside another subquery ([#2571](https://github.com/ClickHouse/ClickHouse/issues/2571)). +* Fixed incompatibility between servers with different versions in distributed queries that use a `CAST` function that isn't in uppercase letters ([fe8c4d6](https://github.com/ClickHouse/ClickHouse/commit/fe8c4d64e434cacd4ceef34faa9005129f2190a5)). +* Added missing quoting of identifiers for queries to an external DBMS ([#2635](https://github.com/ClickHouse/ClickHouse/issues/2635)). + +#### Backward incompatible changes: + +* Converting a string containing the number zero to DateTime does not work. Example: `SELECT toDateTime('0')`. This is also the reason that `DateTime DEFAULT '0'` does not work in tables, as well as `0` in dictionaries. Solution: replace `0` with `0000-00-00 00:00:00`. + +## ClickHouse release 1.1 + +### ClickHouse release 1.1.54394, 2018-07-12 + +#### New features: + +* Added the `histogram` aggregate function ([Mikhail Surin](https://github.com/ClickHouse/ClickHouse/pull/2521)). +* Now `OPTIMIZE TABLE ... FINAL` can be used without specifying partitions for `ReplicatedMergeTree` ([Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2600)). + +#### Bug fixes: + +* Fixed a problem with a very small timeout for sockets (one second) for reading and writing when sending and downloading replicated data, which made it impossible to download larger parts if there is a load on the network or disk (it resulted in cyclical attempts to download parts). This error occurred in version 1.1.54388. +* Fixed issues when using chroot in ZooKeeper if you inserted duplicate data blocks in the table. +* The `has` function now works correctly for an array with Nullable elements ([#2115](https://github.com/ClickHouse/ClickHouse/issues/2115)). +* The `system.tables` table now works correctly when used in distributed queries. The `metadata_modification_time` and `engine_full` columns are now non-virtual. Fixed an error that occurred if only these columns were queried from the table. +* Fixed how an empty `TinyLog` table works after inserting an empty data block ([#2563](https://github.com/ClickHouse/ClickHouse/issues/2563)). +* The `system.zookeeper` table works if the value of the node in ZooKeeper is NULL. + +### ClickHouse release 1.1.54390, 2018-07-06 + +#### New features: + +* Queries can be sent in `multipart/form-data` format (in the `query` field), which is useful if external data is also sent for query processing ([Olga Hvostikova](https://github.com/ClickHouse/ClickHouse/pull/2490)). +* Added the ability to enable or disable processing single or double quotes when reading data in CSV format. You can configure this in the `format_csv_allow_single_quotes` and `format_csv_allow_double_quotes` settings ([Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2574)). +* Now `OPTIMIZE TABLE ... FINAL` can be used without specifying the partition for non-replicated variants of `MergeTree` ([Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2599)). + +#### Improvements: + +* Improved performance, reduced memory consumption, and correct memory consumption tracking with use of the IN operator when a table index could be used ([#2584](https://github.com/ClickHouse/ClickHouse/pull/2584)). +* Removed redundant checking of checksums when adding a data part. This is important when there are a large number of replicas, because in these cases the total number of checks was equal to N^2. +* Added support for `Array(Tuple(...))` arguments for the `arrayEnumerateUniq` function ([#2573](https://github.com/ClickHouse/ClickHouse/pull/2573)). +* Added `Nullable` support for the `runningDifference` function ([#2594](https://github.com/ClickHouse/ClickHouse/pull/2594)). +* Improved query analysis performance when there is a very large number of expressions ([#2572](https://github.com/ClickHouse/ClickHouse/pull/2572)). +* Faster selection of data parts for merging in `ReplicatedMergeTree` tables. Faster recovery of the ZooKeeper session ([#2597](https://github.com/ClickHouse/ClickHouse/pull/2597)). +* The `format_version.txt` file for `MergeTree` tables is re-created if it is missing, which makes sense if ClickHouse is launched after copying the directory structure without files ([Ciprian Hacman](https://github.com/ClickHouse/ClickHouse/pull/2593)). + +#### Bug fixes: + +* Fixed a bug when working with ZooKeeper that could make it impossible to recover the session and readonly states of tables before restarting the server. +* Fixed a bug when working with ZooKeeper that could result in old nodes not being deleted if the session is interrupted. +* Fixed an error in the `quantileTDigest` function for Float arguments (this bug was introduced in version 1.1.54388) ([Mikhail Surin](https://github.com/ClickHouse/ClickHouse/pull/2553)). +* Fixed a bug in the index for MergeTree tables if the primary key column is located inside the function for converting types between signed and unsigned integers of the same size ([#2603](https://github.com/ClickHouse/ClickHouse/pull/2603)). +* Fixed segfault if `macros` are used but they aren't in the config file ([#2570](https://github.com/ClickHouse/ClickHouse/pull/2570)). +* Fixed switching to the default database when reconnecting the client ([#2583](https://github.com/ClickHouse/ClickHouse/pull/2583)). +* Fixed a bug that occurred when the `use_index_for_in_with_subqueries` setting was disabled. + +#### Security fix: + +* Sending files is no longer possible when connected to MySQL (`LOAD DATA LOCAL INFILE`). + +### ClickHouse release 1.1.54388, 2018-06-28 + +#### New features: + +* Support for the `ALTER TABLE t DELETE WHERE` query for replicated tables. Added the `system.mutations` table to track progress of this type of queries. +* Support for the `ALTER TABLE t [REPLACE|ATTACH] PARTITION` query for \*MergeTree tables. +* Support for the `TRUNCATE TABLE` query ([Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2260)) +* Several new `SYSTEM` queries for replicated tables (`RESTART REPLICAS`, `SYNC REPLICA`, `[STOP|START] [MERGES|FETCHES|SENDS REPLICATED|REPLICATION QUEUES]`). +* Added the ability to write to a table with the MySQL engine and the corresponding table function ([sundy-li](https://github.com/ClickHouse/ClickHouse/pull/2294)). +* Added the `url()` table function and the `URL` table engine ([Alexander Sapin](https://github.com/ClickHouse/ClickHouse/pull/2501)). +* Added the `windowFunnel` aggregate function ([sundy-li](https://github.com/ClickHouse/ClickHouse/pull/2352)). +* New `startsWith` and `endsWith` functions for strings ([Vadim Plakhtinsky](https://github.com/ClickHouse/ClickHouse/pull/2429)). +* The `numbers()` table function now allows you to specify the offset ([Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2535)). +* The password to `clickhouse-client` can be entered interactively. +* Server logs can now be sent to syslog ([Alexander Krasheninnikov](https://github.com/ClickHouse/ClickHouse/pull/2459)). +* Support for logging in dictionaries with a shared library source ([Alexander Sapin](https://github.com/ClickHouse/ClickHouse/pull/2472)). +* Support for custom CSV delimiters ([Ivan Zhukov](https://github.com/ClickHouse/ClickHouse/pull/2263)) +* Added the `date_time_input_format` setting. If you switch this setting to `'best_effort'`, DateTime values will be read in a wide range of formats. +* Added the `clickhouse-obfuscator` utility for data obfuscation. Usage example: publishing data used in performance tests. + +#### Experimental features: + +* Added the ability to calculate `and` arguments only where they are needed ([Anastasia Tsarkova](https://github.com/ClickHouse/ClickHouse/pull/2272)) +* JIT compilation to native code is now available for some expressions ([pyos](https://github.com/ClickHouse/ClickHouse/pull/2277)). + +#### Bug fixes: + +* Duplicates no longer appear for a query with `DISTINCT` and `ORDER BY`. +* Queries with `ARRAY JOIN` and `arrayFilter` no longer return an incorrect result. +* Fixed an error when reading an array column from a Nested structure ([#2066](https://github.com/ClickHouse/ClickHouse/issues/2066)). +* Fixed an error when analyzing queries with a HAVING clause like `HAVING tuple IN (...)`. +* Fixed an error when analyzing queries with recursive aliases. +* Fixed an error when reading from ReplacingMergeTree with a condition in PREWHERE that filters all rows ([#2525](https://github.com/ClickHouse/ClickHouse/issues/2525)). +* User profile settings were not applied when using sessions in the HTTP interface. +* Fixed how settings are applied from the command line parameters in clickhouse-local. +* The ZooKeeper client library now uses the session timeout received from the server. +* Fixed a bug in the ZooKeeper client library when the client waited for the server response longer than the timeout. +* Fixed pruning of parts for queries with conditions on partition key columns ([#2342](https://github.com/ClickHouse/ClickHouse/issues/2342)). +* Merges are now possible after `CLEAR COLUMN IN PARTITION` ([#2315](https://github.com/ClickHouse/ClickHouse/issues/2315)). +* Type mapping in the ODBC table function has been fixed ([sundy-li](https://github.com/ClickHouse/ClickHouse/pull/2268)). +* Type comparisons have been fixed for `DateTime` with and without the time zone ([Alexander Bocharov](https://github.com/ClickHouse/ClickHouse/pull/2400)). +* Fixed syntactic parsing and formatting of the `CAST` operator. +* Fixed insertion into a materialized view for the Distributed table engine ([Babacar Diassé](https://github.com/ClickHouse/ClickHouse/pull/2411)). +* Fixed a race condition when writing data from the `Kafka` engine to materialized views ([Yangkuan Liu](https://github.com/ClickHouse/ClickHouse/pull/2448)). +* Fixed SSRF in the remote() table function. +* Fixed exit behavior of `clickhouse-client` in multiline mode ([#2510](https://github.com/ClickHouse/ClickHouse/issues/2510)). + +#### Improvements: + +* Background tasks in replicated tables are now performed in a thread pool instead of in separate threads ([Silviu Caragea](https://github.com/ClickHouse/ClickHouse/pull/1722)). +* Improved LZ4 compression performance. +* Faster analysis for queries with a large number of JOINs and sub-queries. +* The DNS cache is now updated automatically when there are too many network errors. +* Table inserts no longer occur if the insert into one of the materialized views is not possible because it has too many parts. +* Corrected the discrepancy in the event counters `Query`, `SelectQuery`, and `InsertQuery`. +* Expressions like `tuple IN (SELECT tuple)` are allowed if the tuple types match. +* A server with replicated tables can start even if you haven't configured ZooKeeper. +* When calculating the number of available CPU cores, limits on cgroups are now taken into account ([Atri Sharma](https://github.com/ClickHouse/ClickHouse/pull/2325)). +* Added chown for config directories in the systemd config file ([Mikhail Shiryaev](https://github.com/ClickHouse/ClickHouse/pull/2421)). + +#### Build changes: + +* The gcc8 compiler can be used for builds. +* Added the ability to build llvm from submodule. +* The version of the librdkafka library has been updated to v0.11.4. +* Added the ability to use the system libcpuid library. The library version has been updated to 0.4.0. +* Fixed the build using the vectorclass library ([Babacar Diassé](https://github.com/ClickHouse/ClickHouse/pull/2274)). +* Cmake now generates files for ninja by default (like when using `-G Ninja`). +* Added the ability to use the libtinfo library instead of libtermcap ([Georgy Kondratiev](https://github.com/ClickHouse/ClickHouse/pull/2519)). +* Fixed a header file conflict in Fedora Rawhide ([#2520](https://github.com/ClickHouse/ClickHouse/issues/2520)). + +#### Backward incompatible changes: + +* Removed escaping in `Vertical` and `Pretty*` formats and deleted the `VerticalRaw` format. +* If servers with version 1.1.54388 (or newer) and servers with an older version are used simultaneously in a distributed query and the query has the `cast(x, 'Type')` expression without the `AS` keyword and doesn't have the word `cast` in uppercase, an exception will be thrown with a message like `Not found column cast(0, 'UInt8') in block`. Solution: Update the server on the entire cluster. + +### ClickHouse release 1.1.54385, 2018-06-01 + +#### Bug fixes: + +* Fixed an error that in some cases caused ZooKeeper operations to block. + +### ClickHouse release 1.1.54383, 2018-05-22 + +#### Bug fixes: + +* Fixed a slowdown of replication queue if a table has many replicas. + +### ClickHouse release 1.1.54381, 2018-05-14 + +#### Bug fixes: + +* Fixed a nodes leak in ZooKeeper when ClickHouse loses connection to ZooKeeper server. + +### ClickHouse release 1.1.54380, 2018-04-21 + +#### New features: + +* Added the table function `file(path, format, structure)`. An example reading bytes from `/dev/urandom`: `ln -s /dev/urandom /var/lib/clickhouse/user_files/random``clickhouse-client -q "SELECT * FROM file('random', 'RowBinary', 'd UInt8') LIMIT 10"`. + +#### Improvements: + +* Subqueries can be wrapped in `()` brackets to enhance query readability. For example: `(SELECT 1) UNION ALL (SELECT 1)`. +* Simple `SELECT` queries from the `system.processes` table are not included in the `max_concurrent_queries` limit. + +#### Bug fixes: + +* Fixed incorrect behavior of the `IN` operator when select from `MATERIALIZED VIEW`. +* Fixed incorrect filtering by partition index in expressions like `partition_key_column IN (...)`. +* Fixed inability to execute `OPTIMIZE` query on non-leader replica if `REANAME` was performed on the table. +* Fixed the authorization error when executing `OPTIMIZE` or `ALTER` queries on a non-leader replica. +* Fixed freezing of `KILL QUERY`. +* Fixed an error in ZooKeeper client library which led to loss of watches, freezing of distributed DDL queue, and slowdowns in the replication queue if a non-empty `chroot` prefix is used in the ZooKeeper configuration. + +#### Backward incompatible changes: + +* Removed support for expressions like `(a, b) IN (SELECT (a, b))` (you can use the equivalent expression `(a, b) IN (SELECT a, b)`). In previous releases, these expressions led to undetermined `WHERE` filtering or caused errors. + +### ClickHouse release 1.1.54378, 2018-04-16 + +#### New features: + +* Logging level can be changed without restarting the server. +* Added the `SHOW CREATE DATABASE` query. +* The `query_id` can be passed to `clickhouse-client` (elBroom). +* New setting: `max_network_bandwidth_for_all_users`. +* Added support for `ALTER TABLE ... PARTITION ... ` for `MATERIALIZED VIEW`. +* Added information about the size of data parts in uncompressed form in the system table. +* Server-to-server encryption support for distributed tables (`1` in the replica config in ``). +* Configuration of the table level for the `ReplicatedMergeTree` family in order to minimize the amount of data stored in Zookeeper: : `use_minimalistic_checksums_in_zookeeper = 1` +* Configuration of the `clickhouse-client` prompt. By default, server names are now output to the prompt. The server's display name can be changed. It's also sent in the `X-ClickHouse-Display-Name` HTTP header (Kirill Shvakov). +* Multiple comma-separated `topics` can be specified for the `Kafka` engine (Tobias Adamson) +* When a query is stopped by `KILL QUERY` or `replace_running_query`, the client receives the `Query was canceled` exception instead of an incomplete result. + +#### Improvements: + +* `ALTER TABLE ... DROP/DETACH PARTITION` queries are run at the front of the replication queue. +* `SELECT ... FINAL` and `OPTIMIZE ... FINAL` can be used even when the table has a single data part. +* A `query_log` table is recreated on the fly if it was deleted manually (Kirill Shvakov). +* The `lengthUTF8` function runs faster (zhang2014). +* Improved performance of synchronous inserts in `Distributed` tables (`insert_distributed_sync = 1`) when there is a very large number of shards. +* The server accepts the `send_timeout` and `receive_timeout` settings from the client and applies them when connecting to the client (they are applied in reverse order: the server socket's `send_timeout` is set to the `receive_timeout` value received from the client, and vice versa). +* More robust crash recovery for asynchronous insertion into `Distributed` tables. +* The return type of the `countEqual` function changed from `UInt32` to `UInt64` (谢磊). + +#### Bug fixes: + +* Fixed an error with `IN` when the left side of the expression is `Nullable`. +* Correct results are now returned when using tuples with `IN` when some of the tuple components are in the table index. +* The `max_execution_time` limit now works correctly with distributed queries. +* Fixed errors when calculating the size of composite columns in the `system.columns` table. +* Fixed an error when creating a temporary table `CREATE TEMPORARY TABLE IF NOT EXISTS.` +* Fixed errors in `StorageKafka` (##2075) +* Fixed server crashes from invalid arguments of certain aggregate functions. +* Fixed the error that prevented the `DETACH DATABASE` query from stopping background tasks for `ReplicatedMergeTree` tables. +* `Too many parts` state is less likely to happen when inserting into aggregated materialized views (##2084). +* Corrected recursive handling of substitutions in the config if a substitution must be followed by another substitution on the same level. +* Corrected the syntax in the metadata file when creating a `VIEW` that uses a query with `UNION ALL`. +* `SummingMergeTree` now works correctly for summation of nested data structures with a composite key. +* Fixed the possibility of a race condition when choosing the leader for `ReplicatedMergeTree` tables. + +#### Build changes: + +* The build supports `ninja` instead of `make` and uses `ninja` by default for building releases. +* Renamed packages: `clickhouse-server-base` in `clickhouse-common-static`; `clickhouse-server-common` in `clickhouse-server`; `clickhouse-common-dbg` in `clickhouse-common-static-dbg`. To install, use `clickhouse-server clickhouse-client`. Packages with the old names will still load in the repositories for backward compatibility. + +#### Backward incompatible changes: + +* Removed the special interpretation of an IN expression if an array is specified on the left side. Previously, the expression `arr IN (set)` was interpreted as "at least one `arr` element belongs to the `set`". To get the same behavior in the new version, write `arrayExists(x -> x IN (set), arr)`. +* Disabled the incorrect use of the socket option `SO_REUSEPORT`, which was incorrectly enabled by default in the Poco library. Note that on Linux there is no longer any reason to simultaneously specify the addresses `::` and `0.0.0.0` for listen – use just `::`, which allows listening to the connection both over IPv4 and IPv6 (with the default kernel config settings). You can also revert to the behavior from previous versions by specifying `1` in the config. + +### ClickHouse release 1.1.54370, 2018-03-16 + +#### New features: + +* Added the `system.macros` table and auto updating of macros when the config file is changed. +* Added the `SYSTEM RELOAD CONFIG` query. +* Added the `maxIntersections(left_col, right_col)` aggregate function, which returns the maximum number of simultaneously intersecting intervals `[left; right]`. The `maxIntersectionsPosition(left, right)` function returns the beginning of the "maximum" interval. ([Michael Furmur](https://github.com/ClickHouse/ClickHouse/pull/2012)). + +#### Improvements: + +* When inserting data in a `Replicated` table, fewer requests are made to `ZooKeeper` (and most of the user-level errors have disappeared from the `ZooKeeper` log). +* Added the ability to create aliases for data sets. Example: `WITH (1, 2, 3) AS set SELECT number IN set FROM system.numbers LIMIT 10`. + +#### Bug fixes: + +* Fixed the `Illegal PREWHERE` error when reading from Merge tables for `Distributed`tables. +* Added fixes that allow you to start clickhouse-server in IPv4-only Docker containers. +* Fixed a race condition when reading from system `system.parts_columns tables.` +* Removed double buffering during a synchronous insert to a `Distributed` table, which could have caused the connection to timeout. +* Fixed a bug that caused excessively long waits for an unavailable replica before beginning a `SELECT` query. +* Fixed incorrect dates in the `system.parts` table. +* Fixed a bug that made it impossible to insert data in a `Replicated` table if `chroot` was non-empty in the configuration of the `ZooKeeper` cluster. +* Fixed the vertical merging algorithm for an empty `ORDER BY` table. +* Restored the ability to use dictionaries in queries to remote tables, even if these dictionaries are not present on the requestor server. This functionality was lost in release 1.1.54362. +* Restored the behavior for queries like `SELECT * FROM remote('server2', default.table) WHERE col IN (SELECT col2 FROM default.table)` when the right side of the `IN` should use a remote `default.table` instead of a local one. This behavior was broken in version 1.1.54358. +* Removed extraneous error-level logging of `Not found column ... in block`. + +### Clickhouse Release 1.1.54362, 2018-03-11 + +#### New features: + +* Aggregation without `GROUP BY` for an empty set (such as `SELECT count(*) FROM table WHERE 0`) now returns a result with one row with null values for aggregate functions, in compliance with the SQL standard. To restore the old behavior (return an empty result), set `empty_result_for_aggregation_by_empty_set` to 1. +* Added type conversion for `UNION ALL`. Different alias names are allowed in `SELECT` positions in `UNION ALL`, in compliance with the SQL standard. +* Arbitrary expressions are supported in `LIMIT BY` clauses. Previously, it was only possible to use columns resulting from `SELECT`. +* An index of `MergeTree` tables is used when `IN` is applied to a tuple of expressions from the columns of the primary key. Example: `WHERE (UserID, EventDate) IN ((123, '2000-01-01'), ...)` (Anastasiya Tsarkova). +* Added the `clickhouse-copier` tool for copying between clusters and resharding data (beta). +* Added consistent hashing functions: `yandexConsistentHash`, `jumpConsistentHash`, `sumburConsistentHash`. They can be used as a sharding key in order to reduce the amount of network traffic during subsequent reshardings. +* Added functions: `arrayAny`, `arrayAll`, `hasAny`, `hasAll`, `arrayIntersect`, `arrayResize`. +* Added the `arrayCumSum` function (Javi Santana). +* Added the `parseDateTimeBestEffort`, `parseDateTimeBestEffortOrZero`, and `parseDateTimeBestEffortOrNull` functions to read the DateTime from a string containing text in a wide variety of possible formats. +* Data can be partially reloaded from external dictionaries during updating (load just the records in which the value of the specified field greater than in the previous download) (Arsen Hakobyan). +* Added the `cluster` table function. Example: `cluster(cluster_name, db, table)`. The `remote` table function can accept the cluster name as the first argument, if it is specified as an identifier. +* The `remote` and `cluster` table functions can be used in `INSERT` queries. +* Added the `create_table_query` and `engine_full` virtual columns to the `system.tables`table . The `metadata_modification_time` column is virtual. +* Added the `data_path` and `metadata_path` columns to `system.tables`and` system.databases` tables, and added the `path` column to the `system.parts` and `system.parts_columns` tables. +* Added additional information about merges in the `system.part_log` table. +* An arbitrary partitioning key can be used for the `system.query_log` table (Kirill Shvakov). +* The `SHOW TABLES` query now also shows temporary tables. Added temporary tables and the `is_temporary` column to `system.tables` (zhang2014). +* Added `DROP TEMPORARY TABLE` and `EXISTS TEMPORARY TABLE` queries (zhang2014). +* Support for `SHOW CREATE TABLE` for temporary tables (zhang2014). +* Added the `system_profile` configuration parameter for the settings used by internal processes. +* Support for loading `object_id` as an attribute in `MongoDB` dictionaries (Pavel Litvinenko). +* Reading `null` as the default value when loading data for an external dictionary with the `MongoDB` source (Pavel Litvinenko). +* Reading `DateTime` values in the `Values` format from a Unix timestamp without single quotes. +* Failover is supported in `remote` table functions for cases when some of the replicas are missing the requested table. +* Configuration settings can be overridden in the command line when you run `clickhouse-server`. Example: `clickhouse-server -- --logger.level=information`. +* Implemented the `empty` function from a `FixedString` argument: the function returns 1 if the string consists entirely of null bytes (zhang2014). +* Added the `listen_try`configuration parameter for listening to at least one of the listen addresses without quitting, if some of the addresses can't be listened to (useful for systems with disabled support for IPv4 or IPv6). +* Added the `VersionedCollapsingMergeTree` table engine. +* Support for rows and arbitrary numeric types for the `library` dictionary source. +* `MergeTree` tables can be used without a primary key (you need to specify `ORDER BY tuple()`). +* A `Nullable` type can be `CAST` to a non-`Nullable` type if the argument is not `NULL`. +* `RENAME TABLE` can be performed for `VIEW`. +* Added the `throwIf` function. +* Added the `odbc_default_field_size` option, which allows you to extend the maximum size of the value loaded from an ODBC source (by default, it is 1024). +* The `system.processes` table and `SHOW PROCESSLIST` now have the `is_cancelled` and `peak_memory_usage` columns. + +#### Improvements: + +* Limits and quotas on the result are no longer applied to intermediate data for `INSERT SELECT` queries or for `SELECT` subqueries. +* Fewer false triggers of `force_restore_data` when checking the status of `Replicated` tables when the server starts. +* Added the `allow_distributed_ddl` option. +* Nondeterministic functions are not allowed in expressions for `MergeTree` table keys. +* Files with substitutions from `config.d` directories are loaded in alphabetical order. +* Improved performance of the `arrayElement` function in the case of a constant multidimensional array with an empty array as one of the elements. Example: `[[1], []][x]`. +* The server starts faster now when using configuration files with very large substitutions (for instance, very large lists of IP networks). +* When running a query, table valued functions run once. Previously, `remote` and `mysql` table valued functions performed the same query twice to retrieve the table structure from a remote server. +* The `MkDocs` documentation generator is used. +* When you try to delete a table column that `DEFAULT`/`MATERIALIZED` expressions of other columns depend on, an exception is thrown (zhang2014). +* Added the ability to parse an empty line in text formats as the number 0 for `Float` data types. This feature was previously available but was lost in release 1.1.54342. +* `Enum` values can be used in `min`, `max`, `sum` and some other functions. In these cases, it uses the corresponding numeric values. This feature was previously available but was lost in the release 1.1.54337. +* Added `max_expanded_ast_elements` to restrict the size of the AST after recursively expanding aliases. + +#### Bug fixes: + +* Fixed cases when unnecessary columns were removed from subqueries in error, or not removed from subqueries containing `UNION ALL`. +* Fixed a bug in merges for `ReplacingMergeTree` tables. +* Fixed synchronous insertions in `Distributed` tables (`insert_distributed_sync = 1`). +* Fixed segfault for certain uses of `FULL` and `RIGHT JOIN` with duplicate columns in subqueries. +* Fixed segfault for certain uses of `replace_running_query` and `KILL QUERY`. +* Fixed the order of the `source` and `last_exception` columns in the `system.dictionaries` table. +* Fixed a bug when the `DROP DATABASE` query did not delete the file with metadata. +* Fixed the `DROP DATABASE` query for `Dictionary` databases. +* Fixed the low precision of `uniqHLL12` and `uniqCombined` functions for cardinalities greater than 100 million items (Alex Bocharov). +* Fixed the calculation of implicit default values when necessary to simultaneously calculate default explicit expressions in `INSERT` queries (zhang2014). +* Fixed a rare case when a query to a `MergeTree` table couldn't finish (chenxing-xc). +* Fixed a crash that occurred when running a `CHECK` query for `Distributed` tables if all shards are local (chenxing.xc). +* Fixed a slight performance regression with functions that use regular expressions. +* Fixed a performance regression when creating multidimensional arrays from complex expressions. +* Fixed a bug that could cause an extra `FORMAT` section to appear in an `.sql` file with metadata. +* Fixed a bug that caused the `max_table_size_to_drop` limit to apply when trying to delete a `MATERIALIZED VIEW` looking at an explicitly specified table. +* Fixed incompatibility with old clients (old clients were sometimes sent data with the `DateTime('timezone')` type, which they do not understand). +* Fixed a bug when reading `Nested` column elements of structures that were added using `ALTER` but that are empty for the old partitions, when the conditions for these columns moved to `PREWHERE`. +* Fixed a bug when filtering tables by virtual `_table` columns in queries to `Merge` tables. +* Fixed a bug when using `ALIAS` columns in `Distributed` tables. +* Fixed a bug that made dynamic compilation impossible for queries with aggregate functions from the `quantile` family. +* Fixed a race condition in the query execution pipeline that occurred in very rare cases when using `Merge` tables with a large number of tables, and when using `GLOBAL` subqueries. +* Fixed a crash when passing arrays of different sizes to an `arrayReduce` function when using aggregate functions from multiple arguments. +* Prohibited the use of queries with `UNION ALL` in a `MATERIALIZED VIEW`. +* Fixed an error during initialization of the `part_log` system table when the server starts (by default, `part_log` is disabled). + +#### Backward incompatible changes: + +* Removed the `distributed_ddl_allow_replicated_alter` option. This behavior is enabled by default. +* Removed the `strict_insert_defaults` setting. If you were using this functionality, write to `clickhouse-feedback@yandex-team.com`. +* Removed the `UnsortedMergeTree` engine. + +### Clickhouse Release 1.1.54343, 2018-02-05 + +* Added macros support for defining cluster names in distributed DDL queries and constructors of Distributed tables: `CREATE TABLE distr ON CLUSTER '{cluster}' (...) ENGINE = Distributed('{cluster}', 'db', 'table')`. +* Now queries like `SELECT ... FROM table WHERE expr IN (subquery)` are processed using the `table` index. +* Improved processing of duplicates when inserting to Replicated tables, so they no longer slow down execution of the replication queue. + +### Clickhouse Release 1.1.54342, 2018-01-22 + +This release contains bug fixes for the previous release 1.1.54337: + +* Fixed a regression in 1.1.54337: if the default user has readonly access, then the server refuses to start up with the message `Cannot create database in readonly mode`. +* Fixed a regression in 1.1.54337: on systems with systemd, logs are always written to syslog regardless of the configuration; the watchdog script still uses init.d. +* Fixed a regression in 1.1.54337: wrong default configuration in the Docker image. +* Fixed nondeterministic behavior of GraphiteMergeTree (you can see it in log messages `Data after merge is not byte-identical to the data on another replicas`). +* Fixed a bug that may lead to inconsistent merges after OPTIMIZE query to Replicated tables (you may see it in log messages `Part ... intersects the previous part`). +* Buffer tables now work correctly when MATERIALIZED columns are present in the destination table (by zhang2014). +* Fixed a bug in implementation of NULL. + +### Clickhouse Release 1.1.54337, 2018-01-18 + +#### New features: + +* Added support for storage of multi-dimensional arrays and tuples (`Tuple` data type) in tables. +* Support for table functions for `DESCRIBE` and `INSERT` queries. Added support for subqueries in `DESCRIBE`. Examples: `DESC TABLE remote('host', default.hits)`; `DESC TABLE (SELECT 1)`; `INSERT INTO TABLE FUNCTION remote('host', default.hits)`. Support for `INSERT INTO TABLE` in addition to `INSERT INTO`. +* Improved support for time zones. The `DateTime` data type can be annotated with the timezone that is used for parsing and formatting in text formats. Example: `DateTime('Europe/Moscow')`. When timezones are specified in functions for `DateTime` arguments, the return type will track the timezone, and the value will be displayed as expected. +* Added the functions `toTimeZone`, `timeDiff`, `toQuarter`, `toRelativeQuarterNum`. The `toRelativeHour`/`Minute`/`Second` functions can take a value of type `Date` as an argument. The `now` function name is case-sensitive. +* Added the `toStartOfFifteenMinutes` function (Kirill Shvakov). +* Added the `clickhouse format` tool for formatting queries. +* Added the `format_schema_path` configuration parameter (Marek Vavruşa). It is used for specifying a schema in `Cap'n Proto` format. Schema files can be located only in the specified directory. +* Added support for config substitutions (`incl` and `conf.d`) for configuration of external dictionaries and models (Pavel Yakunin). +* Added a column with documentation for the `system.settings` table (Kirill Shvakov). +* Added the `system.parts_columns` table with information about column sizes in each data part of `MergeTree` tables. +* Added the `system.models` table with information about loaded `CatBoost` machine learning models. +* Added the `mysql` and `odbc` table function and corresponding `MySQL` and `ODBC` table engines for accessing remote databases. This functionality is in the beta stage. +* Added the possibility to pass an argument of type `AggregateFunction` for the `groupArray` aggregate function (so you can create an array of states of some aggregate function). +* Removed restrictions on various combinations of aggregate function combinators. For example, you can use `avgForEachIf` as well as `avgIfForEach` aggregate functions, which have different behaviors. +* The `-ForEach` aggregate function combinator is extended for the case of aggregate functions of multiple arguments. +* Added support for aggregate functions of `Nullable` arguments even for cases when the function returns a non-`Nullable` result (added with the contribution of Silviu Caragea). Example: `groupArray`, `groupUniqArray`, `topK`. +* Added the `max_client_network_bandwidth` for `clickhouse-client` (Kirill Shvakov). +* Users with the ` readonly = 2` setting are allowed to work with TEMPORARY tables (CREATE, DROP, INSERT...) (Kirill Shvakov). +* Added support for using multiple consumers with the `Kafka` engine. Extended configuration options for `Kafka` (Marek Vavruša). +* Added the `intExp3` and `intExp4` functions. +* Added the `sumKahan` aggregate function. +* Added the to * Number* OrNull functions, where * Number* is a numeric type. +* Added support for `WITH` clauses for an `INSERT SELECT` query (author: zhang2014). +* Added settings: `http_connection_timeout`, `http_send_timeout`, `http_receive_timeout`. In particular, these settings are used for downloading data parts for replication. Changing these settings allows for faster failover if the network is overloaded. +* Added support for `ALTER` for tables of type `Null` (Anastasiya Tsarkova). +* The `reinterpretAsString` function is extended for all data types that are stored contiguously in memory. +* Added the `--silent` option for the `clickhouse-local` tool. It suppresses printing query execution info in stderr. +* Added support for reading values of type `Date` from text in a format where the month and/or day of the month is specified using a single digit instead of two digits (Amos Bird). + +#### Performance optimizations: + +* Improved performance of aggregate functions `min`, `max`, `any`, `anyLast`, `anyHeavy`, `argMin`, `argMax` from string arguments. +* Improved performance of the functions `isInfinite`, `isFinite`, `isNaN`, `roundToExp2`. +* Improved performance of parsing and formatting `Date` and `DateTime` type values in text format. +* Improved performance and precision of parsing floating point numbers. +* Lowered memory usage for `JOIN` in the case when the left and right parts have columns with identical names that are not contained in `USING` . +* Improved performance of aggregate functions `varSamp`, `varPop`, `stddevSamp`, `stddevPop`, `covarSamp`, `covarPop`, `corr` by reducing computational stability. The old functions are available under the names `varSampStable`, `varPopStable`, `stddevSampStable`, `stddevPopStable`, `covarSampStable`, `covarPopStable`, `corrStable`. + +#### Bug fixes: + +* Fixed data deduplication after running a `DROP` or `DETACH PARTITION` query. In the previous version, dropping a partition and inserting the same data again was not working because inserted blocks were considered duplicates. +* Fixed a bug that could lead to incorrect interpretation of the `WHERE` clause for ` CREATE MATERIALIZED VIEW` queries with `POPULATE` . +* Fixed a bug in using the `root_path` parameter in the `zookeeper_servers` configuration. +* Fixed unexpected results of passing the `Date` argument to `toStartOfDay` . +* Fixed the `addMonths` and `subtractMonths` functions and the arithmetic for ` INTERVAL n MONTH` in cases when the result has the previous year. +* Added missing support for the `UUID` data type for `DISTINCT` , `JOIN` , and `uniq` aggregate functions and external dictionaries (Evgeniy Ivanov). Support for `UUID` is still incomplete. +* Fixed `SummingMergeTree` behavior in cases when the rows summed to zero. +* Various fixes for the `Kafka` engine (Marek Vavruša). +* Fixed incorrect behavior of the `Join` table engine (Amos Bird). +* Fixed incorrect allocator behavior under FreeBSD and OS X. +* The `extractAll` function now supports empty matches. +* Fixed an error that blocked usage of `libressl` instead of `openssl` . +* Fixed the ` CREATE TABLE AS SELECT` query from temporary tables. +* Fixed non-atomicity of updating the replication queue. This could lead to replicas being out of sync until the server restarts. +* Fixed possible overflow in `gcd` , `lcm` and `modulo` (`%` operator) (Maks Skorokhod). +* `-preprocessed` files are now created after changing `umask` (`umask` can be changed in the config). +* Fixed a bug in the background check of parts (`MergeTreePartChecker` ) when using a custom partition key. +* Fixed parsing of tuples (values of the `Tuple` data type) in text formats. +* Improved error messages about incompatible types passed to `multiIf` , `array` and some other functions. +* Redesigned support for `Nullable` types. Fixed bugs that may lead to a server crash. Fixed almost all other bugs related to ` NULL` support: incorrect type conversions in INSERT SELECT, insufficient support for Nullable in HAVING and PREWHERE, `join_use_nulls` mode, Nullable types as arguments of `OR` operator, etc. +* Fixed various bugs related to internal semantics of data types. Examples: unnecessary summing of `Enum` type fields in `SummingMergeTree` ; alignment of `Enum` types in `Pretty` formats, etc. +* Stricter checks for allowed combinations of composite columns. +* Fixed the overflow when specifying a very large parameter for the `FixedString` data type. +* Fixed a bug in the `topK` aggregate function in a generic case. +* Added the missing check for equality of array sizes in arguments of n-ary variants of aggregate functions with an `-Array` combinator. +* Fixed a bug in `--pager` for `clickhouse-client` (author: ks1322). +* Fixed the precision of the `exp10` function. +* Fixed the behavior of the `visitParamExtract` function for better compliance with documentation. +* Fixed the crash when incorrect data types are specified. +* Fixed the behavior of `DISTINCT` in the case when all columns are constants. +* Fixed query formatting in the case of using the `tupleElement` function with a complex constant expression as the tuple element index. +* Fixed a bug in `Dictionary` tables for `range_hashed` dictionaries. +* Fixed a bug that leads to excessive rows in the result of `FULL` and ` RIGHT JOIN` (Amos Bird). +* Fixed a server crash when creating and removing temporary files in `config.d` directories during config reload. +* Fixed the ` SYSTEM DROP DNS CACHE` query: the cache was flushed but addresses of cluster nodes were not updated. +* Fixed the behavior of ` MATERIALIZED VIEW` after executing ` DETACH TABLE` for the table under the view (Marek Vavruša). + +#### Build improvements: + +* The `pbuilder` tool is used for builds. The build process is almost completely independent of the build host environment. +* A single build is used for different OS versions. Packages and binaries have been made compatible with a wide range of Linux systems. +* Added the `clickhouse-test` package. It can be used to run functional tests. +* The source tarball can now be published to the repository. It can be used to reproduce the build without using GitHub. +* Added limited integration with Travis CI. Due to limits on build time in Travis, only the debug build is tested and a limited subset of tests are run. +* Added support for `Cap'n'Proto` in the default build. +* Changed the format of documentation sources from `Restricted Text` to `Markdown`. +* Added support for `systemd` (Vladimir Smirnov). It is disabled by default due to incompatibility with some OS images and can be enabled manually. +* For dynamic code generation, `clang` and `lld` are embedded into the `clickhouse` binary. They can also be invoked as ` clickhouse clang` and ` clickhouse lld` . +* Removed usage of GNU extensions from the code. Enabled the `-Wextra` option. When building with `clang` the default is `libc++` instead of `libstdc++`. +* Extracted `clickhouse_parsers` and `clickhouse_common_io` libraries to speed up builds of various tools. + +#### Backward incompatible changes: + +* The format for marks in `Log` type tables that contain `Nullable` columns was changed in a backward incompatible way. If you have these tables, you should convert them to the `TinyLog` type before starting up the new server version. To do this, replace `ENGINE = Log` with `ENGINE = TinyLog` in the corresponding `.sql` file in the `metadata` directory. If your table doesn't have `Nullable` columns or if the type of your table is not `Log`, then you don't need to do anything. +* Removed the `experimental_allow_extended_storage_definition_syntax` setting. Now this feature is enabled by default. +* The `runningIncome` function was renamed to `runningDifferenceStartingWithFirstvalue` to avoid confusion. +* Removed the ` FROM ARRAY JOIN arr` syntax when ARRAY JOIN is specified directly after FROM with no table (Amos Bird). +* Removed the `BlockTabSeparated` format that was used solely for demonstration purposes. +* Changed the state format for aggregate functions `varSamp`, `varPop`, `stddevSamp`, `stddevPop`, `covarSamp`, `covarPop`, `corr`. If you have stored states of these aggregate functions in tables (using the `AggregateFunction` data type or materialized views with corresponding states), please write to clickhouse-feedback@yandex-team.com. +* In previous server versions there was an undocumented feature: if an aggregate function depends on parameters, you can still specify it without parameters in the AggregateFunction data type. Example: `AggregateFunction(quantiles, UInt64)` instead of `AggregateFunction(quantiles(0.5, 0.9), UInt64)`. This feature was lost. Although it was undocumented, we plan to support it again in future releases. +* Enum data types cannot be used in min/max aggregate functions. This ability will be returned in the next release. + +#### Please note when upgrading: + +* When doing a rolling update on a cluster, at the point when some of the replicas are running the old version of ClickHouse and some are running the new version, replication is temporarily stopped and the message ` unknown parameter 'shard'` appears in the log. Replication will continue after all replicas of the cluster are updated. +* If different versions of ClickHouse are running on the cluster servers, it is possible that distributed queries using the following functions will have incorrect results: `varSamp`, `varPop`, `stddevSamp`, `stddevPop`, `covarSamp`, `covarPop`, `corr`. You should update all cluster nodes. From 28abe73257a5fd05a3baee09075cd6824112db16 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 17 Mar 2020 20:23:29 +0300 Subject: [PATCH 036/115] Create CHANGELOG_2017.md --- CHANGELOG_2017.md | 262 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 262 insertions(+) create mode 100644 CHANGELOG_2017.md diff --git a/CHANGELOG_2017.md b/CHANGELOG_2017.md new file mode 100644 index 00000000000..e6d6d6bec10 --- /dev/null +++ b/CHANGELOG_2017.md @@ -0,0 +1,262 @@ + +### ClickHouse release 1.1.54327, 2017-12-21 + +This release contains bug fixes for the previous release 1.1.54318: + +* Fixed bug with possible race condition in replication that could lead to data loss. This issue affects versions 1.1.54310 and 1.1.54318. If you use one of these versions with Replicated tables, the update is strongly recommended. This issue shows in logs in Warning messages like ` Part ... from own log doesn't exist.` The issue is relevant even if you don't see these messages in logs. + +### ClickHouse release 1.1.54318, 2017-11-30 + +This release contains bug fixes for the previous release 1.1.54310: + +* Fixed incorrect row deletions during merges in the SummingMergeTree engine +* Fixed a memory leak in unreplicated MergeTree engines +* Fixed performance degradation with frequent inserts in MergeTree engines +* Fixed an issue that was causing the replication queue to stop running +* Fixed rotation and archiving of server logs + +### ClickHouse release 1.1.54310, 2017-11-01 + +#### New features: + +* Custom partitioning key for the MergeTree family of table engines. +* [Kafka](https://clickhouse.yandex/docs/en/operations/table_engines/kafka/) table engine. +* Added support for loading [CatBoost](https://catboost.yandex/) models and applying them to data stored in ClickHouse. +* Added support for time zones with non-integer offsets from UTC. +* Added support for arithmetic operations with time intervals. +* The range of values for the Date and DateTime types is extended to the year 2105. +* Added the ` CREATE MATERIALIZED VIEW x TO y` query (specifies an existing table for storing the data of a materialized view). +* Added the `ATTACH TABLE` query without arguments. +* The processing logic for Nested columns with names ending in -Map in a SummingMergeTree table was extracted to the sumMap aggregate function. You can now specify such columns explicitly. +* Max size of the IP trie dictionary is increased to 128M entries. +* Added the getSizeOfEnumType function. +* Added the sumWithOverflow aggregate function. +* Added support for the Cap'n Proto input format. +* You can now customize compression level when using the zstd algorithm. + +#### Backward incompatible changes: + +* Creation of temporary tables with an engine other than Memory is not allowed. +* Explicit creation of tables with the View or MaterializedView engine is not allowed. +* During table creation, a new check verifies that the sampling key expression is included in the primary key. + +#### Bug fixes: + +* Fixed hangups when synchronously inserting into a Distributed table. +* Fixed nonatomic adding and removing of parts in Replicated tables. +* Data inserted into a materialized view is not subjected to unnecessary deduplication. +* Executing a query to a Distributed table for which the local replica is lagging and remote replicas are unavailable does not result in an error anymore. +* Users don't need access permissions to the `default` database to create temporary tables anymore. +* Fixed crashing when specifying the Array type without arguments. +* Fixed hangups when the disk volume containing server logs is full. +* Fixed an overflow in the toRelativeWeekNum function for the first week of the Unix epoch. + +#### Build improvements: + +* Several third-party libraries (notably Poco) were updated and converted to git submodules. + +### ClickHouse release 1.1.54304, 2017-10-19 + +#### New features: + +* TLS support in the native protocol (to enable, set `tcp_ssl_port` in `config.xml` ). + +#### Bug fixes: + +* `ALTER` for replicated tables now tries to start running as soon as possible. +* Fixed crashing when reading data with the setting `preferred_block_size_bytes=0.` +* Fixed crashes of `clickhouse-client` when pressing ` Page Down` +* Correct interpretation of certain complex queries with `GLOBAL IN` and `UNION ALL` +* `FREEZE PARTITION` always works atomically now. +* Empty POST requests now return a response with code 411. +* Fixed interpretation errors for expressions like `CAST(1 AS Nullable(UInt8)).` +* Fixed an error when reading `Array(Nullable(String))` columns from `MergeTree` tables. +* Fixed crashing when parsing queries like `SELECT dummy AS dummy, dummy AS b` +* Users are updated correctly with invalid `users.xml` +* Correct handling when an executable dictionary returns a non-zero response code. + +### ClickHouse release 1.1.54292, 2017-09-20 + +#### New features: + +* Added the `pointInPolygon` function for working with coordinates on a coordinate plane. +* Added the `sumMap` aggregate function for calculating the sum of arrays, similar to `SummingMergeTree`. +* Added the `trunc` function. Improved performance of the rounding functions (`round`, `floor`, `ceil`, `roundToExp2`) and corrected the logic of how they work. Changed the logic of the `roundToExp2` function for fractions and negative numbers. +* The ClickHouse executable file is now less dependent on the libc version. The same ClickHouse executable file can run on a wide variety of Linux systems. There is still a dependency when using compiled queries (with the setting ` compile = 1` , which is not used by default). +* Reduced the time needed for dynamic compilation of queries. + +#### Bug fixes: + +* Fixed an error that sometimes produced ` part ... intersects previous part` messages and weakened replica consistency. +* Fixed an error that caused the server to lock up if ZooKeeper was unavailable during shutdown. +* Removed excessive logging when restoring replicas. +* Fixed an error in the UNION ALL implementation. +* Fixed an error in the concat function that occurred if the first column in a block has the Array type. +* Progress is now displayed correctly in the system.merges table. + +### ClickHouse release 1.1.54289, 2017-09-13 + +#### New features: + +* `SYSTEM` queries for server administration: `SYSTEM RELOAD DICTIONARY`, `SYSTEM RELOAD DICTIONARIES`, `SYSTEM DROP DNS CACHE`, `SYSTEM SHUTDOWN`, `SYSTEM KILL`. +* Added functions for working with arrays: `concat`, `arraySlice`, `arrayPushBack`, `arrayPushFront`, `arrayPopBack`, `arrayPopFront`. +* Added `root` and `identity` parameters for the ZooKeeper configuration. This allows you to isolate individual users on the same ZooKeeper cluster. +* Added aggregate functions `groupBitAnd`, `groupBitOr`, and `groupBitXor` (for compatibility, they are also available under the names `BIT_AND`, `BIT_OR`, and `BIT_XOR`). +* External dictionaries can be loaded from MySQL by specifying a socket in the filesystem. +* External dictionaries can be loaded from MySQL over SSL (`ssl_cert`, `ssl_key`, `ssl_ca` parameters). +* Added the `max_network_bandwidth_for_user` setting to restrict the overall bandwidth use for queries per user. +* Support for `DROP TABLE` for temporary tables. +* Support for reading `DateTime` values in Unix timestamp format from the `CSV` and `JSONEachRow` formats. +* Lagging replicas in distributed queries are now excluded by default (the default threshold is 5 minutes). +* FIFO locking is used during ALTER: an ALTER query isn't blocked indefinitely for continuously running queries. +* Option to set `umask` in the config file. +* Improved performance for queries with `DISTINCT` . + +#### Bug fixes: + +* Improved the process for deleting old nodes in ZooKeeper. Previously, old nodes sometimes didn't get deleted if there were very frequent inserts, which caused the server to be slow to shut down, among other things. +* Fixed randomization when choosing hosts for the connection to ZooKeeper. +* Fixed the exclusion of lagging replicas in distributed queries if the replica is localhost. +* Fixed an error where a data part in a `ReplicatedMergeTree` table could be broken after running ` ALTER MODIFY` on an element in a `Nested` structure. +* Fixed an error that could cause SELECT queries to "hang". +* Improvements to distributed DDL queries. +* Fixed the query `CREATE TABLE ... AS `. +* Resolved the deadlock in the ` ALTER ... CLEAR COLUMN IN PARTITION` query for `Buffer` tables. +* Fixed the invalid default value for `Enum` s (0 instead of the minimum) when using the `JSONEachRow` and `TSKV` formats. +* Resolved the appearance of zombie processes when using a dictionary with an `executable` source. +* Fixed segfault for the HEAD query. + +#### Improved workflow for developing and assembling ClickHouse: + +* You can use `pbuilder` to build ClickHouse. +* You can use `libc++` instead of `libstdc++` for builds on Linux. +* Added instructions for using static code analysis tools: `Coverage`, `clang-tidy`, `cppcheck`. + +#### Please note when upgrading: + +* There is now a higher default value for the MergeTree setting `max_bytes_to_merge_at_max_space_in_pool` (the maximum total size of data parts to merge, in bytes): it has increased from 100 GiB to 150 GiB. This might result in large merges running after the server upgrade, which could cause an increased load on the disk subsystem. If the free space available on the server is less than twice the total amount of the merges that are running, this will cause all other merges to stop running, including merges of small data parts. As a result, INSERT queries will fail with the message "Merges are processing significantly slower than inserts." Use the ` SELECT * FROM system.merges` query to monitor the situation. You can also check the `DiskSpaceReservedForMerge` metric in the `system.metrics` table, or in Graphite. You don't need to do anything to fix this, since the issue will resolve itself once the large merges finish. If you find this unacceptable, you can restore the previous value for the `max_bytes_to_merge_at_max_space_in_pool` setting. To do this, go to the section in config.xml, set ```107374182400` and restart the server. + +### ClickHouse release 1.1.54284, 2017-08-29 + +* This is a bugfix release for the previous 1.1.54282 release. It fixes leaks in the parts directory in ZooKeeper. + +### ClickHouse release 1.1.54282, 2017-08-23 + +This release contains bug fixes for the previous release 1.1.54276: + +* Fixed `DB::Exception: Assertion violation: !_path.empty()` when inserting into a Distributed table. +* Fixed parsing when inserting in RowBinary format if input data starts with';'. +* Errors during runtime compilation of certain aggregate functions (e.g. `groupArray()`). + +### Clickhouse Release 1.1.54276, 2017-08-16 + +#### New features: + +* Added an optional WITH section for a SELECT query. Example query: `WITH 1+1 AS a SELECT a, a*a` +* INSERT can be performed synchronously in a Distributed table: OK is returned only after all the data is saved on all the shards. This is activated by the setting insert_distributed_sync=1. +* Added the UUID data type for working with 16-byte identifiers. +* Added aliases of CHAR, FLOAT and other types for compatibility with the Tableau. +* Added the functions toYYYYMM, toYYYYMMDD, and toYYYYMMDDhhmmss for converting time into numbers. +* You can use IP addresses (together with the hostname) to identify servers for clustered DDL queries. +* Added support for non-constant arguments and negative offsets in the function `substring(str, pos, len).` +* Added the max_size parameter for the `groupArray(max_size)(column)` aggregate function, and optimized its performance. + +#### Main changes: + +* Security improvements: all server files are created with 0640 permissions (can be changed via config parameter). +* Improved error messages for queries with invalid syntax. +* Significantly reduced memory consumption and improved performance when merging large sections of MergeTree data. +* Significantly increased the performance of data merges for the ReplacingMergeTree engine. +* Improved performance for asynchronous inserts from a Distributed table by combining multiple source inserts. To enable this functionality, use the setting distributed_directory_monitor_batch_inserts=1. + +#### Backward incompatible changes: + +* Changed the binary format of aggregate states of `groupArray(array_column)` functions for arrays. + +#### Complete list of changes: + +* Added the `output_format_json_quote_denormals` setting, which enables outputting nan and inf values in JSON format. +* Optimized stream allocation when reading from a Distributed table. +* Settings can be configured in readonly mode if the value doesn't change. +* Added the ability to retrieve non-integer granules of the MergeTree engine in order to meet restrictions on the block size specified in the preferred_block_size_bytes setting. The purpose is to reduce the consumption of RAM and increase cache locality when processing queries from tables with large columns. +* Efficient use of indexes that contain expressions like `toStartOfHour(x)` for conditions like `toStartOfHour(x) op сonstexpr.` +* Added new settings for MergeTree engines (the merge_tree section in config.xml): + - replicated_deduplication_window_seconds sets the number of seconds allowed for deduplicating inserts in Replicated tables. + - cleanup_delay_period sets how often to start cleanup to remove outdated data. + - replicated_can_become_leader can prevent a replica from becoming the leader (and assigning merges). +* Accelerated cleanup to remove outdated data from ZooKeeper. +* Multiple improvements and fixes for clustered DDL queries. Of particular interest is the new setting distributed_ddl_task_timeout, which limits the time to wait for a response from the servers in the cluster. If a ddl request has not been performed on all hosts, a response will contain a timeout error and a request will be executed in an async mode. +* Improved display of stack traces in the server logs. +* Added the "none" value for the compression method. +* You can use multiple dictionaries_config sections in config.xml. +* It is possible to connect to MySQL through a socket in the file system. +* The system.parts table has a new column with information about the size of marks, in bytes. + +#### Bug fixes: + +* Distributed tables using a Merge table now work correctly for a SELECT query with a condition on the `_table` field. +* Fixed a rare race condition in ReplicatedMergeTree when checking data parts. +* Fixed possible freezing on "leader election" when starting a server. +* The max_replica_delay_for_distributed_queries setting was ignored when using a local replica of the data source. This has been fixed. +* Fixed incorrect behavior of `ALTER TABLE CLEAR COLUMN IN PARTITION` when attempting to clean a non-existing column. +* Fixed an exception in the multiIf function when using empty arrays or strings. +* Fixed excessive memory allocations when deserializing Native format. +* Fixed incorrect auto-update of Trie dictionaries. +* Fixed an exception when running queries with a GROUP BY clause from a Merge table when using SAMPLE. +* Fixed a crash of GROUP BY when using distributed_aggregation_memory_efficient=1. +* Now you can specify the database.table in the right side of IN and JOIN. +* Too many threads were used for parallel aggregation. This has been fixed. +* Fixed how the "if" function works with FixedString arguments. +* SELECT worked incorrectly from a Distributed table for shards with a weight of 0. This has been fixed. +* Running `CREATE VIEW IF EXISTS no longer causes crashes.` +* Fixed incorrect behavior when input_format_skip_unknown_fields=1 is set and there are negative numbers. +* Fixed an infinite loop in the `dictGetHierarchy()` function if there is some invalid data in the dictionary. +* Fixed `Syntax error: unexpected (...)` errors when running distributed queries with subqueries in an IN or JOIN clause and Merge tables. +* Fixed an incorrect interpretation of a SELECT query from Dictionary tables. +* Fixed the "Cannot mremap" error when using arrays in IN and JOIN clauses with more than 2 billion elements. +* Fixed the failover for dictionaries with MySQL as the source. + +#### Improved workflow for developing and assembling ClickHouse: + +* Builds can be assembled in Arcadia. +* You can use gcc 7 to compile ClickHouse. +* Parallel builds using ccache+distcc are faster now. + +### ClickHouse release 1.1.54245, 2017-07-04 + +#### New features: + +* Distributed DDL (for example, `CREATE TABLE ON CLUSTER`) +* The replicated query `ALTER TABLE CLEAR COLUMN IN PARTITION.` +* The engine for Dictionary tables (access to dictionary data in the form of a table). +* Dictionary database engine (this type of database automatically has Dictionary tables available for all the connected external dictionaries). +* You can check for updates to the dictionary by sending a request to the source. +* Qualified column names +* Quoting identifiers using double quotation marks. +* Sessions in the HTTP interface. +* The OPTIMIZE query for a Replicated table can can run not only on the leader. + +#### Backward incompatible changes: + +* Removed SET GLOBAL. + +#### Minor changes: + +* Now after an alert is triggered, the log prints the full stack trace. +* Relaxed the verification of the number of damaged/extra data parts at startup (there were too many false positives). + +#### Bug fixes: + +* Fixed a bad connection "sticking" when inserting into a Distributed table. +* GLOBAL IN now works for a query from a Merge table that looks at a Distributed table. +* The incorrect number of cores was detected on a Google Compute Engine virtual machine. This has been fixed. +* Changes in how an executable source of cached external dictionaries works. +* Fixed the comparison of strings containing null characters. +* Fixed the comparison of Float32 primary key fields with constants. +* Previously, an incorrect estimate of the size of a field could lead to overly large allocations. +* Fixed a crash when querying a Nullable column added to a table using ALTER. +* Fixed a crash when sorting by a Nullable column, if the number of rows is less than LIMIT. +* Fixed an ORDER BY subquery consisting of only constant values. +* Previously, a Replicated table could remain in the invalid state after a failed DROP TABLE. +* Aliases for scalar subqueries with empty results are no longer lost. +* Now a query that used compilation does not fail with an error if the .so file gets damaged. From 1a500bad785921b1434d0b67d20acfba3165a463 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 17 Mar 2020 20:24:36 +0300 Subject: [PATCH 037/115] Update CHANGELOG.md Move old changelog to separate files. --- CHANGELOG.md | 3253 -------------------------------------------------- 1 file changed, 3253 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c0667f6b8bf..bbddfd47917 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -608,3256 +608,3 @@ ### Security Fix * Fixed the possibility of reading directories structure in tables with `File` table engine. This fixes [#8536](https://github.com/ClickHouse/ClickHouse/issues/8536). [#8537](https://github.com/ClickHouse/ClickHouse/pull/8537) ([alexey-milovidov](https://github.com/alexey-milovidov)) -## ClickHouse release v19.17 - -### ClickHouse release v19.17.6.36, 2019-12-27 - -#### Bug Fix -* Fixed potential buffer overflow in decompress. Malicious user can pass fabricated compressed data that could cause read after buffer. This issue was found by Eldar Zaitov from Yandex information security team. [#8404](https://github.com/ClickHouse/ClickHouse/pull/8404) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed possible server crash (`std::terminate`) when the server cannot send or write data in JSON or XML format with values of String data type (that require UTF-8 validation) or when compressing result data with Brotli algorithm or in some other rare cases. [#8384](https://github.com/ClickHouse/ClickHouse/pull/8384) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed dictionaries with source from a clickhouse `VIEW`, now reading such dictionaries doesn't cause the error `There is no query`. [#8351](https://github.com/ClickHouse/ClickHouse/pull/8351) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Fixed checking if a client host is allowed by host_regexp specified in users.xml. [#8241](https://github.com/ClickHouse/ClickHouse/pull/8241), [#8342](https://github.com/ClickHouse/ClickHouse/pull/8342) ([Vitaly Baranov](https://github.com/vitlibar)) -* `RENAME TABLE` for a distributed table now renames the folder containing inserted data before sending to shards. This fixes an issue with successive renames `tableA->tableB`, `tableC->tableA`. [#8306](https://github.com/ClickHouse/ClickHouse/pull/8306) ([tavplubix](https://github.com/tavplubix)) -* `range_hashed` external dictionaries created by DDL queries now allow ranges of arbitrary numeric types. [#8275](https://github.com/ClickHouse/ClickHouse/pull/8275) ([alesapin](https://github.com/alesapin)) -* Fixed `INSERT INTO table SELECT ... FROM mysql(...)` table function. [#8234](https://github.com/ClickHouse/ClickHouse/pull/8234) ([tavplubix](https://github.com/tavplubix)) -* Fixed segfault in `INSERT INTO TABLE FUNCTION file()` while inserting into a file which doesn't exist. Now in this case file would be created and then insert would be processed. [#8177](https://github.com/ClickHouse/ClickHouse/pull/8177) ([Olga Khvostikova](https://github.com/stavrolia)) -* Fixed bitmapAnd error when intersecting an aggregated bitmap and a scalar bitmap. [#8082](https://github.com/ClickHouse/ClickHouse/pull/8082) ([Yue Huang](https://github.com/moon03432)) -* Fixed segfault when `EXISTS` query was used without `TABLE` or `DICTIONARY` qualifier, just like `EXISTS t`. [#8213](https://github.com/ClickHouse/ClickHouse/pull/8213) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed return type for functions `rand` and `randConstant` in case of nullable argument. Now functions always return `UInt32` and never `Nullable(UInt32)`. [#8204](https://github.com/ClickHouse/ClickHouse/pull/8204) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Fixed `DROP DICTIONARY IF EXISTS db.dict`, now it doesn't throw exception if `db` doesn't exist. [#8185](https://github.com/ClickHouse/ClickHouse/pull/8185) ([Vitaly Baranov](https://github.com/vitlibar)) -* If a table wasn't completely dropped because of server crash, the server will try to restore and load it [#8176](https://github.com/ClickHouse/ClickHouse/pull/8176) ([tavplubix](https://github.com/tavplubix)) -* Fixed a trivial count query for a distributed table if there are more than two shard local table. [#8164](https://github.com/ClickHouse/ClickHouse/pull/8164) ([小路](https://github.com/nicelulu)) -* Fixed bug that lead to a data race in DB::BlockStreamProfileInfo::calculateRowsBeforeLimit() [#8143](https://github.com/ClickHouse/ClickHouse/pull/8143) ([Alexander Kazakov](https://github.com/Akazz)) -* Fixed `ALTER table MOVE part` executed immediately after merging the specified part, which could cause moving a part which the specified part merged into. Now it correctly moves the specified part. [#8104](https://github.com/ClickHouse/ClickHouse/pull/8104) ([Vladimir Chebotarev](https://github.com/excitoon)) -* Expressions for dictionaries can be specified as strings now. This is useful for calculation of attributes while extracting data from non-ClickHouse sources because it allows to use non-ClickHouse syntax for those expressions. [#8098](https://github.com/ClickHouse/ClickHouse/pull/8098) ([alesapin](https://github.com/alesapin)) -* Fixed a very rare race in `clickhouse-copier` because of an overflow in ZXid. [#8088](https://github.com/ClickHouse/ClickHouse/pull/8088) ([Ding Xiang Fei](https://github.com/dingxiangfei2009)) -* Fixed the bug when after the query failed (due to "Too many simultaneous queries" for example) it would not read external tables info, and the -next request would interpret this info as the beginning of the next query causing an error like `Unknown packet from client`. [#8084](https://github.com/ClickHouse/ClickHouse/pull/8084) ([Azat Khuzhin](https://github.com/azat)) -* Avoid null dereference after "Unknown packet X from server" [#8071](https://github.com/ClickHouse/ClickHouse/pull/8071) ([Azat Khuzhin](https://github.com/azat)) -* Restore support of all ICU locales, add the ability to apply collations for constant expressions and add language name to system.collations table. [#8051](https://github.com/ClickHouse/ClickHouse/pull/8051) ([alesapin](https://github.com/alesapin)) -* Number of streams for read from `StorageFile` and `StorageHDFS` is now limited, to avoid exceeding the memory limit. [#7981](https://github.com/ClickHouse/ClickHouse/pull/7981) ([alesapin](https://github.com/alesapin)) -* Fixed `CHECK TABLE` query for `*MergeTree` tables without key. [#7979](https://github.com/ClickHouse/ClickHouse/pull/7979) ([alesapin](https://github.com/alesapin)) -* Removed the mutation number from a part name in case there were no mutations. This removing improved the compatibility with older versions. [#8250](https://github.com/ClickHouse/ClickHouse/pull/8250) ([alesapin](https://github.com/alesapin)) -* Fixed the bug that mutations are skipped for some attached parts due to their data_version are larger than the table mutation version. [#7812](https://github.com/ClickHouse/ClickHouse/pull/7812) ([Zhichang Yu](https://github.com/yuzhichang)) -* Allow starting the server with redundant copies of parts after moving them to another device. [#7810](https://github.com/ClickHouse/ClickHouse/pull/7810) ([Vladimir Chebotarev](https://github.com/excitoon)) -* Fixed the error "Sizes of columns doesn't match" that might appear when using aggregate function columns. [#7790](https://github.com/ClickHouse/ClickHouse/pull/7790) ([Boris Granveaud](https://github.com/bgranvea)) -* Now an exception will be thrown in case of using WITH TIES alongside LIMIT BY. And now it's possible to use TOP with LIMIT BY. [#7637](https://github.com/ClickHouse/ClickHouse/pull/7637) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) -* Fix dictionary reload if it has `invalidate_query`, which stopped updates and some exception on previous update tries. [#8029](https://github.com/ClickHouse/ClickHouse/pull/8029) ([alesapin](https://github.com/alesapin)) - -### ClickHouse release v19.17.4.11, 2019-11-22 - -#### Backward Incompatible Change -* Using column instead of AST to store scalar subquery results for better performance. Setting `enable_scalar_subquery_optimization` was added in 19.17 and it was enabled by default. It leads to errors like [this](https://github.com/ClickHouse/ClickHouse/issues/7851) during upgrade to 19.17.2 or 19.17.3 from previous versions. This setting was disabled by default in 19.17.4, to make possible upgrading from 19.16 and older versions without errors. [#7392](https://github.com/ClickHouse/ClickHouse/pull/7392) ([Amos Bird](https://github.com/amosbird)) - -#### New Feature -* Add the ability to create dictionaries with DDL queries. [#7360](https://github.com/ClickHouse/ClickHouse/pull/7360) ([alesapin](https://github.com/alesapin)) -* Make `bloom_filter` type of index supporting `LowCardinality` and `Nullable` [#7363](https://github.com/ClickHouse/ClickHouse/issues/7363) [#7561](https://github.com/ClickHouse/ClickHouse/pull/7561) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Add function `isValidJSON` to check that passed string is a valid json. [#5910](https://github.com/ClickHouse/ClickHouse/issues/5910) [#7293](https://github.com/ClickHouse/ClickHouse/pull/7293) ([Vdimir](https://github.com/Vdimir)) -* Implement `arrayCompact` function [#7328](https://github.com/ClickHouse/ClickHouse/pull/7328) ([Memo](https://github.com/Joeywzr)) -* Created function `hex` for Decimal numbers. It works like `hex(reinterpretAsString())`, but doesn't delete last zero bytes. [#7355](https://github.com/ClickHouse/ClickHouse/pull/7355) ([Mikhail Korotov](https://github.com/millb)) -* Add `arrayFill` and `arrayReverseFill` functions, which replace elements by other elements in front/back of them in the array. [#7380](https://github.com/ClickHouse/ClickHouse/pull/7380) ([hcz](https://github.com/hczhcz)) -* Add `CRC32IEEE()`/`CRC64()` support [#7480](https://github.com/ClickHouse/ClickHouse/pull/7480) ([Azat Khuzhin](https://github.com/azat)) -* Implement `char` function similar to one in [mysql](https://dev.mysql.com/doc/refman/8.0/en/string-functions.html#function_char) [#7486](https://github.com/ClickHouse/ClickHouse/pull/7486) ([sundyli](https://github.com/sundy-li)) -* Add `bitmapTransform` function. It transforms an array of values in a bitmap to another array of values, the result is a new bitmap [#7598](https://github.com/ClickHouse/ClickHouse/pull/7598) ([Zhichang Yu](https://github.com/yuzhichang)) -* Implemented `javaHashUTF16LE()` function [#7651](https://github.com/ClickHouse/ClickHouse/pull/7651) ([achimbab](https://github.com/achimbab)) -* Add `_shard_num` virtual column for the Distributed engine [#7624](https://github.com/ClickHouse/ClickHouse/pull/7624) ([Azat Khuzhin](https://github.com/azat)) - -#### Experimental Feature -* Support for processors (new query execution pipeline) in `MergeTree`. [#7181](https://github.com/ClickHouse/ClickHouse/pull/7181) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) - -#### Bug Fix -* Fix incorrect float parsing in `Values` [#7817](https://github.com/ClickHouse/ClickHouse/issues/7817) [#7870](https://github.com/ClickHouse/ClickHouse/pull/7870) ([tavplubix](https://github.com/tavplubix)) -* Fix rare deadlock which can happen when trace_log is enabled. [#7838](https://github.com/ClickHouse/ClickHouse/pull/7838) ([filimonov](https://github.com/filimonov)) -* Prevent message duplication when producing Kafka table has any MVs selecting from it [#7265](https://github.com/ClickHouse/ClickHouse/pull/7265) ([Ivan](https://github.com/abyss7)) -* Support for `Array(LowCardinality(Nullable(String)))` in `IN`. Resolves [#7364](https://github.com/ClickHouse/ClickHouse/issues/7364) [#7366](https://github.com/ClickHouse/ClickHouse/pull/7366) ([achimbab](https://github.com/achimbab)) -* Add handling of `SQL_TINYINT` and `SQL_BIGINT`, and fix handling of `SQL_FLOAT` data source types in ODBC Bridge. [#7491](https://github.com/ClickHouse/ClickHouse/pull/7491) ([Denis Glazachev](https://github.com/traceon)) -* Fix aggregation (`avg` and quantiles) over empty decimal columns [#7431](https://github.com/ClickHouse/ClickHouse/pull/7431) ([Andrey Konyaev](https://github.com/akonyaev90)) -* Fix `INSERT` into Distributed with `MATERIALIZED` columns [#7377](https://github.com/ClickHouse/ClickHouse/pull/7377) ([Azat Khuzhin](https://github.com/azat)) -* Make `MOVE PARTITION` work if some parts of partition are already on destination disk or volume [#7434](https://github.com/ClickHouse/ClickHouse/pull/7434) ([Vladimir Chebotarev](https://github.com/excitoon)) -* Fixed bug with hardlinks failing to be created during mutations in `ReplicatedMergeTree` in multi-disk configurations. [#7558](https://github.com/ClickHouse/ClickHouse/pull/7558) ([Vladimir Chebotarev](https://github.com/excitoon)) -* Fixed a bug with a mutation on a MergeTree when whole part remains unchanged and best space is being found on another disk [#7602](https://github.com/ClickHouse/ClickHouse/pull/7602) ([Vladimir Chebotarev](https://github.com/excitoon)) -* Fixed bug with `keep_free_space_ratio` not being read from disks configuration [#7645](https://github.com/ClickHouse/ClickHouse/pull/7645) ([Vladimir Chebotarev](https://github.com/excitoon)) -* Fix bug with table contains only `Tuple` columns or columns with complex paths. Fixes [7541](https://github.com/ClickHouse/ClickHouse/issues/7541). [#7545](https://github.com/ClickHouse/ClickHouse/pull/7545) ([alesapin](https://github.com/alesapin)) -* Do not account memory for Buffer engine in max_memory_usage limit [#7552](https://github.com/ClickHouse/ClickHouse/pull/7552) ([Azat Khuzhin](https://github.com/azat)) -* Fix final mark usage in `MergeTree` tables ordered by `tuple()`. In rare cases it could lead to `Can't adjust last granule` error while select. [#7639](https://github.com/ClickHouse/ClickHouse/pull/7639) ([Anton Popov](https://github.com/CurtizJ)) -* Fix bug in mutations that have predicate with actions that require context (for example functions for json), which may lead to crashes or strange exceptions. [#7664](https://github.com/ClickHouse/ClickHouse/pull/7664) ([alesapin](https://github.com/alesapin)) -* Fix mismatch of database and table names escaping in `data/` and `shadow/` directories [#7575](https://github.com/ClickHouse/ClickHouse/pull/7575) ([Alexander Burmak](https://github.com/Alex-Burmak)) -* Support duplicated keys in RIGHT|FULL JOINs, e.g. ```ON t.x = u.x AND t.x = u.y```. Fix crash in this case. [#7586](https://github.com/ClickHouse/ClickHouse/pull/7586) ([Artem Zuikov](https://github.com/4ertus2)) -* Fix `Not found column in block` when joining on expression with RIGHT or FULL JOIN. [#7641](https://github.com/ClickHouse/ClickHouse/pull/7641) ([Artem Zuikov](https://github.com/4ertus2)) -* One more attempt to fix infinite loop in `PrettySpace` format [#7591](https://github.com/ClickHouse/ClickHouse/pull/7591) ([Olga Khvostikova](https://github.com/stavrolia)) -* Fix bug in `concat` function when all arguments were `FixedString` of the same size. [#7635](https://github.com/ClickHouse/ClickHouse/pull/7635) ([alesapin](https://github.com/alesapin)) -* Fixed exception in case of using 1 argument while defining S3, URL and HDFS storages. [#7618](https://github.com/ClickHouse/ClickHouse/pull/7618) ([Vladimir Chebotarev](https://github.com/excitoon)) -* Fix scope of the InterpreterSelectQuery for views with query [#7601](https://github.com/ClickHouse/ClickHouse/pull/7601) ([Azat Khuzhin](https://github.com/azat)) - -#### Improvement -* `Nullable` columns recognized and NULL-values handled correctly by ODBC-bridge [#7402](https://github.com/ClickHouse/ClickHouse/pull/7402) ([Vasily Nemkov](https://github.com/Enmk)) -* Write current batch for distributed send atomically [#7600](https://github.com/ClickHouse/ClickHouse/pull/7600) ([Azat Khuzhin](https://github.com/azat)) -* Throw an exception if we cannot detect table for column name in query. [#7358](https://github.com/ClickHouse/ClickHouse/pull/7358) ([Artem Zuikov](https://github.com/4ertus2)) -* Add `merge_max_block_size` setting to `MergeTreeSettings` [#7412](https://github.com/ClickHouse/ClickHouse/pull/7412) ([Artem Zuikov](https://github.com/4ertus2)) -* Queries with `HAVING` and without `GROUP BY` assume group by constant. So, `SELECT 1 HAVING 1` now returns a result. [#7496](https://github.com/ClickHouse/ClickHouse/pull/7496) ([Amos Bird](https://github.com/amosbird)) -* Support parsing `(X,)` as tuple similar to python. [#7501](https://github.com/ClickHouse/ClickHouse/pull/7501), [#7562](https://github.com/ClickHouse/ClickHouse/pull/7562) ([Amos Bird](https://github.com/amosbird)) -* Make `range` function behaviors almost like pythonic one. [#7518](https://github.com/ClickHouse/ClickHouse/pull/7518) ([sundyli](https://github.com/sundy-li)) -* Add `constraints` columns to table `system.settings` [#7553](https://github.com/ClickHouse/ClickHouse/pull/7553) ([Vitaly Baranov](https://github.com/vitlibar)) -* Better Null format for tcp handler, so that it's possible to use `select ignore() from table format Null` for perf measure via clickhouse-client [#7606](https://github.com/ClickHouse/ClickHouse/pull/7606) ([Amos Bird](https://github.com/amosbird)) -* Queries like `CREATE TABLE ... AS (SELECT (1, 2))` are parsed correctly [#7542](https://github.com/ClickHouse/ClickHouse/pull/7542) ([hcz](https://github.com/hczhcz)) - -#### Performance Improvement -* The performance of aggregation over short string keys is improved. [#6243](https://github.com/ClickHouse/ClickHouse/pull/6243) ([Alexander Kuzmenkov](https://github.com/akuzm), [Amos Bird](https://github.com/amosbird)) -* Run another pass of syntax/expression analysis to get potential optimizations after constant predicates are folded. [#7497](https://github.com/ClickHouse/ClickHouse/pull/7497) ([Amos Bird](https://github.com/amosbird)) -* Use storage meta info to evaluate trivial `SELECT count() FROM table;` [#7510](https://github.com/ClickHouse/ClickHouse/pull/7510) ([Amos Bird](https://github.com/amosbird), [alexey-milovidov](https://github.com/alexey-milovidov)) -* Vectorize processing `arrayReduce` similar to Aggregator `addBatch`. [#7608](https://github.com/ClickHouse/ClickHouse/pull/7608) ([Amos Bird](https://github.com/amosbird)) -* Minor improvements in performance of `Kafka` consumption [#7475](https://github.com/ClickHouse/ClickHouse/pull/7475) ([Ivan](https://github.com/abyss7)) - -#### Build/Testing/Packaging Improvement -* Add support for cross-compiling to the CPU architecture AARCH64. Refactor packager script. [#7370](https://github.com/ClickHouse/ClickHouse/pull/7370) [#7539](https://github.com/ClickHouse/ClickHouse/pull/7539) ([Ivan](https://github.com/abyss7)) -* Unpack darwin-x86_64 and linux-aarch64 toolchains into mounted Docker volume when building packages [#7534](https://github.com/ClickHouse/ClickHouse/pull/7534) ([Ivan](https://github.com/abyss7)) -* Update Docker Image for Binary Packager [#7474](https://github.com/ClickHouse/ClickHouse/pull/7474) ([Ivan](https://github.com/abyss7)) -* Fixed compile errors on MacOS Catalina [#7585](https://github.com/ClickHouse/ClickHouse/pull/7585) ([Ernest Poletaev](https://github.com/ernestp)) -* Some refactoring in query analysis logic: split complex class into several simple ones. [#7454](https://github.com/ClickHouse/ClickHouse/pull/7454) ([Artem Zuikov](https://github.com/4ertus2)) -* Fix build without submodules [#7295](https://github.com/ClickHouse/ClickHouse/pull/7295) ([proller](https://github.com/proller)) -* Better `add_globs` in CMake files [#7418](https://github.com/ClickHouse/ClickHouse/pull/7418) ([Amos Bird](https://github.com/amosbird)) -* Remove hardcoded paths in `unwind` target [#7460](https://github.com/ClickHouse/ClickHouse/pull/7460) ([Konstantin Podshumok](https://github.com/podshumok)) -* Allow to use mysql format without ssl [#7524](https://github.com/ClickHouse/ClickHouse/pull/7524) ([proller](https://github.com/proller)) - -#### Other -* Added ANTLR4 grammar for ClickHouse SQL dialect [#7595](https://github.com/ClickHouse/ClickHouse/issues/7595) [#7596](https://github.com/ClickHouse/ClickHouse/pull/7596) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -## ClickHouse release v19.16 - -#### Clickhouse release v19.16.14.65, 2020-03-05 - -* Fix distributed subqueries incompatibility with older CH versions. Fixes [#7851](https://github.com/ClickHouse/ClickHouse/issues/7851) -[(tabplubix)](https://github.com/tavplubix) -* When executing `CREATE` query, fold constant expressions in storage engine arguments. Replace empty database name with current database. Fixes [#6508](https://github.com/ClickHouse/ClickHouse/issues/6508), [#3492](https://github.com/ClickHouse/ClickHouse/issues/3492). Also fix check for local address in `ClickHouseDictionarySource`. -[#9262](https://github.com/ClickHouse/ClickHouse/pull/9262) [(tabplubix)](https://github.com/tavplubix) -* Now background merges in `*MergeTree` table engines family preserve storage policy volume order more accurately. -[#8549](https://github.com/ClickHouse/ClickHouse/pull/8549) ([Vladimir Chebotarev](https://github.com/excitoon)) -* Prevent losing data in `Kafka` in rare cases when exception happens after reading suffix but before commit. Fixes [#9378](https://github.com/ClickHouse/ClickHouse/issues/9378). Related: [#7175](https://github.com/ClickHouse/ClickHouse/issues/7175) -[#9507](https://github.com/ClickHouse/ClickHouse/pull/9507) [(filimonov)](https://github.com/filimonov) -* Fix bug leading to server termination when trying to use / drop `Kafka` table created with wrong parameters. Fixes [#9494](https://github.com/ClickHouse/ClickHouse/issues/9494). Incorporates [#9507](https://github.com/ClickHouse/ClickHouse/issues/9507). -[#9513](https://github.com/ClickHouse/ClickHouse/pull/9513) [(filimonov)](https://github.com/filimonov) -* Allow using `MaterializedView` with subqueries above `Kafka` tables. -[#8197](https://github.com/ClickHouse/ClickHouse/pull/8197) ([filimonov](https://github.com/filimonov)) - -#### New Feature -* Add `deduplicate_blocks_in_dependent_materialized_views` option to control the behaviour of idempotent inserts into tables with materialized views. This new feature was added to the bugfix release by a special request from Altinity. -[#9070](https://github.com/ClickHouse/ClickHouse/pull/9070) [(urykhy)](https://github.com/urykhy) - -### ClickHouse release v19.16.2.2, 2019-10-30 - -#### Backward Incompatible Change -* Add missing arity validation for count/counIf. - [#7095](https://github.com/ClickHouse/ClickHouse/issues/7095) -[#7298](https://github.com/ClickHouse/ClickHouse/pull/7298) ([Vdimir](https://github.com/Vdimir)) -* Remove legacy `asterisk_left_columns_only` setting (it was disabled by default). - [#7335](https://github.com/ClickHouse/ClickHouse/pull/7335) ([Artem -Zuikov](https://github.com/4ertus2)) -* Format strings for Template data format are now specified in files. - [#7118](https://github.com/ClickHouse/ClickHouse/pull/7118) -([tavplubix](https://github.com/tavplubix)) - -#### New Feature -* Introduce uniqCombined64() to calculate cardinality greater than UINT_MAX. - [#7213](https://github.com/ClickHouse/ClickHouse/pull/7213), -[#7222](https://github.com/ClickHouse/ClickHouse/pull/7222) ([Azat -Khuzhin](https://github.com/azat)) -* Support Bloom filter indexes on Array columns. - [#6984](https://github.com/ClickHouse/ClickHouse/pull/6984) -([achimbab](https://github.com/achimbab)) -* Add a function `getMacro(name)` that returns String with the value of corresponding `` - from server configuration. [#7240](https://github.com/ClickHouse/ClickHouse/pull/7240) -([alexey-milovidov](https://github.com/alexey-milovidov)) -* Set two configuration options for a dictionary based on an HTTP source: `credentials` and - `http-headers`. [#7092](https://github.com/ClickHouse/ClickHouse/pull/7092) ([Guillaume -Tassery](https://github.com/YiuRULE)) -* Add a new ProfileEvent `Merge` that counts the number of launched background merges. - [#7093](https://github.com/ClickHouse/ClickHouse/pull/7093) ([Mikhail -Korotov](https://github.com/millb)) -* Add fullHostName function that returns a fully qualified domain name. - [#7263](https://github.com/ClickHouse/ClickHouse/issues/7263) -[#7291](https://github.com/ClickHouse/ClickHouse/pull/7291) ([sundyli](https://github.com/sundy-li)) -* Add function `arraySplit` and `arrayReverseSplit` which split an array by "cut off" - conditions. They are useful in time sequence handling. -[#7294](https://github.com/ClickHouse/ClickHouse/pull/7294) ([hcz](https://github.com/hczhcz)) -* Add new functions that return the Array of all matched indices in multiMatch family of functions. - [#7299](https://github.com/ClickHouse/ClickHouse/pull/7299) ([Danila -Kutenin](https://github.com/danlark1)) -* Add a new database engine `Lazy` that is optimized for storing a large number of small -Log - tables. [#7171](https://github.com/ClickHouse/ClickHouse/pull/7171) ([Nikita -Vasilev](https://github.com/nikvas0)) -* Add aggregate functions groupBitmapAnd, -Or, -Xor for bitmap columns. [#7109](https://github.com/ClickHouse/ClickHouse/pull/7109) ([Zhichang -Yu](https://github.com/yuzhichang)) -* Add aggregate function combinators -OrNull and -OrDefault, which return null - or default values when there is nothing to aggregate. -[#7331](https://github.com/ClickHouse/ClickHouse/pull/7331) -([hcz](https://github.com/hczhcz)) -* Introduce CustomSeparated data format that supports custom escaping and - delimiter rules. [#7118](https://github.com/ClickHouse/ClickHouse/pull/7118) -([tavplubix](https://github.com/tavplubix)) -* Support Redis as source of external dictionary. [#4361](https://github.com/ClickHouse/ClickHouse/pull/4361) [#6962](https://github.com/ClickHouse/ClickHouse/pull/6962) ([comunodi](https://github.com/comunodi), [Anton -Popov](https://github.com/CurtizJ)) - -#### Bug Fix -* Fix wrong query result if it has `WHERE IN (SELECT ...)` section and `optimize_read_in_order` is - used. [#7371](https://github.com/ClickHouse/ClickHouse/pull/7371) ([Anton -Popov](https://github.com/CurtizJ)) -* Disabled MariaDB authentication plugin, which depends on files outside of project. - [#7140](https://github.com/ClickHouse/ClickHouse/pull/7140) ([Yuriy -Baranov](https://github.com/yurriy)) -* Fix exception `Cannot convert column ... because it is constant but values of constants are - different in source and result` which could rarely happen when functions `now()`, `today()`, -`yesterday()`, `randConstant()` are used. -[#7156](https://github.com/ClickHouse/ClickHouse/pull/7156) ([Nikolai -Kochetov](https://github.com/KochetovNicolai)) -* Fixed issue of using HTTP keep alive timeout instead of TCP keep alive timeout. - [#7351](https://github.com/ClickHouse/ClickHouse/pull/7351) ([Vasily -Nemkov](https://github.com/Enmk)) -* Fixed a segmentation fault in groupBitmapOr (issue [#7109](https://github.com/ClickHouse/ClickHouse/issues/7109)). - [#7289](https://github.com/ClickHouse/ClickHouse/pull/7289) ([Zhichang -Yu](https://github.com/yuzhichang)) -* For materialized views the commit for Kafka is called after all data were written. - [#7175](https://github.com/ClickHouse/ClickHouse/pull/7175) ([Ivan](https://github.com/abyss7)) -* Fixed wrong `duration_ms` value in `system.part_log` table. It was ten times off. - [#7172](https://github.com/ClickHouse/ClickHouse/pull/7172) ([Vladimir -Chebotarev](https://github.com/excitoon)) -* A quick fix to resolve crash in LIVE VIEW table and re-enabling all LIVE VIEW tests. - [#7201](https://github.com/ClickHouse/ClickHouse/pull/7201) -([vzakaznikov](https://github.com/vzakaznikov)) -* Serialize NULL values correctly in min/max indexes of MergeTree parts. - [#7234](https://github.com/ClickHouse/ClickHouse/pull/7234) ([Alexander -Kuzmenkov](https://github.com/akuzm)) -* Don't put virtual columns to .sql metadata when table is created as `CREATE TABLE AS`. - [#7183](https://github.com/ClickHouse/ClickHouse/pull/7183) ([Ivan](https://github.com/abyss7)) -* Fix segmentation fault in `ATTACH PART` query. - [#7185](https://github.com/ClickHouse/ClickHouse/pull/7185) -([alesapin](https://github.com/alesapin)) -* Fix wrong result for some queries given by the optimization of empty IN subqueries and empty - INNER/RIGHT JOIN. [#7284](https://github.com/ClickHouse/ClickHouse/pull/7284) ([Nikolai -Kochetov](https://github.com/KochetovNicolai)) -* Fixing AddressSanitizer error in the LIVE VIEW getHeader() method. - [#7271](https://github.com/ClickHouse/ClickHouse/pull/7271) -([vzakaznikov](https://github.com/vzakaznikov)) - -#### Improvement -* Add a message in case of queue_wait_max_ms wait takes place. - [#7390](https://github.com/ClickHouse/ClickHouse/pull/7390) ([Azat -Khuzhin](https://github.com/azat)) -* Made setting `s3_min_upload_part_size` table-level. - [#7059](https://github.com/ClickHouse/ClickHouse/pull/7059) ([Vladimir -Chebotarev](https://github.com/excitoon)) -* Check TTL in StorageFactory. [#7304](https://github.com/ClickHouse/ClickHouse/pull/7304) - ([sundyli](https://github.com/sundy-li)) -* Squash left-hand blocks in partial merge join (optimization). - [#7122](https://github.com/ClickHouse/ClickHouse/pull/7122) ([Artem -Zuikov](https://github.com/4ertus2)) -* Do not allow non-deterministic functions in mutations of Replicated table engines, because this - can introduce inconsistencies between replicas. -[#7247](https://github.com/ClickHouse/ClickHouse/pull/7247) ([Alexander -Kazakov](https://github.com/Akazz)) -* Disable memory tracker while converting exception stack trace to string. It can prevent the loss - of error messages of type `Memory limit exceeded` on server, which caused the `Attempt to read -after eof` exception on client. [#7264](https://github.com/ClickHouse/ClickHouse/pull/7264) -([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Miscellaneous format improvements. Resolves - [#6033](https://github.com/ClickHouse/ClickHouse/issues/6033), -[#2633](https://github.com/ClickHouse/ClickHouse/issues/2633), -[#6611](https://github.com/ClickHouse/ClickHouse/issues/6611), -[#6742](https://github.com/ClickHouse/ClickHouse/issues/6742) -[#7215](https://github.com/ClickHouse/ClickHouse/pull/7215) -([tavplubix](https://github.com/tavplubix)) -* ClickHouse ignores values on the right side of IN operator that are not convertible to the left - side type. Make it work properly for compound types -- Array and Tuple. -[#7283](https://github.com/ClickHouse/ClickHouse/pull/7283) ([Alexander -Kuzmenkov](https://github.com/akuzm)) -* Support missing inequalities for ASOF JOIN. It's possible to join less-or-equal variant and strict - greater and less variants for ASOF column in ON syntax. -[#7282](https://github.com/ClickHouse/ClickHouse/pull/7282) ([Artem -Zuikov](https://github.com/4ertus2)) -* Optimize partial merge join. [#7070](https://github.com/ClickHouse/ClickHouse/pull/7070) - ([Artem Zuikov](https://github.com/4ertus2)) -* Do not use more than 98K of memory in uniqCombined functions. - [#7236](https://github.com/ClickHouse/ClickHouse/pull/7236), -[#7270](https://github.com/ClickHouse/ClickHouse/pull/7270) ([Azat -Khuzhin](https://github.com/azat)) -* Flush parts of right-hand joining table on disk in PartialMergeJoin (if there is not enough - memory). Load data back when needed. [#7186](https://github.com/ClickHouse/ClickHouse/pull/7186) -([Artem Zuikov](https://github.com/4ertus2)) - -#### Performance Improvement -* Speed up joinGet with const arguments by avoiding data duplication. - [#7359](https://github.com/ClickHouse/ClickHouse/pull/7359) ([Amos -Bird](https://github.com/amosbird)) -* Return early if the subquery is empty. - [#7007](https://github.com/ClickHouse/ClickHouse/pull/7007) ([小路](https://github.com/nicelulu)) -* Optimize parsing of SQL expression in Values. - [#6781](https://github.com/ClickHouse/ClickHouse/pull/6781) -([tavplubix](https://github.com/tavplubix)) - -#### Build/Testing/Packaging Improvement -* Disable some contribs for cross-compilation to Mac OS. - [#7101](https://github.com/ClickHouse/ClickHouse/pull/7101) ([Ivan](https://github.com/abyss7)) -* Add missing linking with PocoXML for clickhouse_common_io. - [#7200](https://github.com/ClickHouse/ClickHouse/pull/7200) ([Azat -Khuzhin](https://github.com/azat)) -* Accept multiple test filter arguments in clickhouse-test. - [#7226](https://github.com/ClickHouse/ClickHouse/pull/7226) ([Alexander -Kuzmenkov](https://github.com/akuzm)) -* Enable musl and jemalloc for ARM. [#7300](https://github.com/ClickHouse/ClickHouse/pull/7300) - ([Amos Bird](https://github.com/amosbird)) -* Added `--client-option` parameter to `clickhouse-test` to pass additional parameters to client. - [#7277](https://github.com/ClickHouse/ClickHouse/pull/7277) ([Nikolai -Kochetov](https://github.com/KochetovNicolai)) -* Preserve existing configs on rpm package upgrade. - [#7103](https://github.com/ClickHouse/ClickHouse/pull/7103) -([filimonov](https://github.com/filimonov)) -* Fix errors detected by PVS. [#7153](https://github.com/ClickHouse/ClickHouse/pull/7153) ([Artem - Zuikov](https://github.com/4ertus2)) -* Fix build for Darwin. [#7149](https://github.com/ClickHouse/ClickHouse/pull/7149) - ([Ivan](https://github.com/abyss7)) -* glibc 2.29 compatibility. [#7142](https://github.com/ClickHouse/ClickHouse/pull/7142) ([Amos - Bird](https://github.com/amosbird)) -* Make sure dh_clean does not touch potential source files. - [#7205](https://github.com/ClickHouse/ClickHouse/pull/7205) ([Amos -Bird](https://github.com/amosbird)) -* Attempt to avoid conflict when updating from altinity rpm - it has config file packaged separately - in clickhouse-server-common. [#7073](https://github.com/ClickHouse/ClickHouse/pull/7073) -([filimonov](https://github.com/filimonov)) -* Optimize some header files for faster rebuilds. - [#7212](https://github.com/ClickHouse/ClickHouse/pull/7212), -[#7231](https://github.com/ClickHouse/ClickHouse/pull/7231) ([Alexander -Kuzmenkov](https://github.com/akuzm)) -* Add performance tests for Date and DateTime. [#7332](https://github.com/ClickHouse/ClickHouse/pull/7332) ([Vasily - Nemkov](https://github.com/Enmk)) -* Fix some tests that contained non-deterministic mutations. - [#7132](https://github.com/ClickHouse/ClickHouse/pull/7132) ([Alexander -Kazakov](https://github.com/Akazz)) -* Add build with MemorySanitizer to CI. [#7066](https://github.com/ClickHouse/ClickHouse/pull/7066) - ([Alexander Kuzmenkov](https://github.com/akuzm)) -* Avoid use of uninitialized values in MetricsTransmitter. - [#7158](https://github.com/ClickHouse/ClickHouse/pull/7158) ([Azat -Khuzhin](https://github.com/azat)) -* Fix some issues in Fields found by MemorySanitizer. - [#7135](https://github.com/ClickHouse/ClickHouse/pull/7135), -[#7179](https://github.com/ClickHouse/ClickHouse/pull/7179) ([Alexander -Kuzmenkov](https://github.com/akuzm)), [#7376](https://github.com/ClickHouse/ClickHouse/pull/7376) -([Amos Bird](https://github.com/amosbird)) -* Fix undefined behavior in murmurhash32. [#7388](https://github.com/ClickHouse/ClickHouse/pull/7388) ([Amos - Bird](https://github.com/amosbird)) -* Fix undefined behavior in StoragesInfoStream. [#7384](https://github.com/ClickHouse/ClickHouse/pull/7384) - ([tavplubix](https://github.com/tavplubix)) -* Fixed constant expressions folding for external database engines (MySQL, ODBC, JDBC). In previous - versions it wasn't working for multiple constant expressions and was not working at all for Date, -DateTime and UUID. This fixes [#7245](https://github.com/ClickHouse/ClickHouse/issues/7245) -[#7252](https://github.com/ClickHouse/ClickHouse/pull/7252) -([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixing ThreadSanitizer data race error in the LIVE VIEW when accessing no_users_thread variable. - [#7353](https://github.com/ClickHouse/ClickHouse/pull/7353) -([vzakaznikov](https://github.com/vzakaznikov)) -* Get rid of malloc symbols in libcommon - [#7134](https://github.com/ClickHouse/ClickHouse/pull/7134), -[#7065](https://github.com/ClickHouse/ClickHouse/pull/7065) ([Amos -Bird](https://github.com/amosbird)) -* Add global flag ENABLE_LIBRARIES for disabling all libraries. - [#7063](https://github.com/ClickHouse/ClickHouse/pull/7063) -([proller](https://github.com/proller)) - -#### Code cleanup -* Generalize configuration repository to prepare for DDL for Dictionaries. [#7155](https://github.com/ClickHouse/ClickHouse/pull/7155) - ([alesapin](https://github.com/alesapin)) -* Parser for dictionaries DDL without any semantic. - [#7209](https://github.com/ClickHouse/ClickHouse/pull/7209) -([alesapin](https://github.com/alesapin)) -* Split ParserCreateQuery into different smaller parsers. - [#7253](https://github.com/ClickHouse/ClickHouse/pull/7253) -([alesapin](https://github.com/alesapin)) -* Small refactoring and renaming near external dictionaries. - [#7111](https://github.com/ClickHouse/ClickHouse/pull/7111) -([alesapin](https://github.com/alesapin)) -* Refactor some code to prepare for role-based access control. [#7235](https://github.com/ClickHouse/ClickHouse/pull/7235) ([Vitaly - Baranov](https://github.com/vitlibar)) -* Some improvements in DatabaseOrdinary code. - [#7086](https://github.com/ClickHouse/ClickHouse/pull/7086) ([Nikita -Vasilev](https://github.com/nikvas0)) -* Do not use iterators in find() and emplace() methods of hash tables. -[#7026](https://github.com/ClickHouse/ClickHouse/pull/7026) ([Alexander -Kuzmenkov](https://github.com/akuzm)) -* Fix getMultipleValuesFromConfig in case when parameter root is not empty. [#7374](https://github.com/ClickHouse/ClickHouse/pull/7374) -([Mikhail Korotov](https://github.com/millb)) -* Remove some copy-paste (TemporaryFile and TemporaryFileStream) - [#7166](https://github.com/ClickHouse/ClickHouse/pull/7166) ([Artem -Zuikov](https://github.com/4ertus2)) -* Improved code readability a little bit (`MergeTreeData::getActiveContainingPart`). - [#7361](https://github.com/ClickHouse/ClickHouse/pull/7361) ([Vladimir -Chebotarev](https://github.com/excitoon)) -* Wait for all scheduled jobs, which are using local objects, if `ThreadPool::schedule(...)` throws - an exception. Rename `ThreadPool::schedule(...)` to `ThreadPool::scheduleOrThrowOnError(...)` and -fix comments to make obvious that it may throw. -[#7350](https://github.com/ClickHouse/ClickHouse/pull/7350) -([tavplubix](https://github.com/tavplubix)) - -## ClickHouse release 19.15 - -### ClickHouse release 19.15.4.10, 2019-10-31 - -#### Bug Fix -* Added handling of SQL_TINYINT and SQL_BIGINT, and fix handling of SQL_FLOAT data source types in ODBC Bridge. -[#7491](https://github.com/ClickHouse/ClickHouse/pull/7491) ([Denis Glazachev](https://github.com/traceon)) -* Allowed to have some parts on destination disk or volume in MOVE PARTITION. -[#7434](https://github.com/ClickHouse/ClickHouse/pull/7434) ([Vladimir Chebotarev](https://github.com/excitoon)) -* Fixed NULL-values in nullable columns through ODBC-bridge. -[#7402](https://github.com/ClickHouse/ClickHouse/pull/7402) ([Vasily Nemkov](https://github.com/Enmk)) -* Fixed INSERT into Distributed non local node with MATERIALIZED columns. -[#7377](https://github.com/ClickHouse/ClickHouse/pull/7377) ([Azat Khuzhin](https://github.com/azat)) -* Fixed function getMultipleValuesFromConfig. -[#7374](https://github.com/ClickHouse/ClickHouse/pull/7374) ([Mikhail Korotov](https://github.com/millb)) -* Fixed issue of using HTTP keep alive timeout instead of TCP keep alive timeout. -[#7351](https://github.com/ClickHouse/ClickHouse/pull/7351) ([Vasily Nemkov](https://github.com/Enmk)) -* Wait for all jobs to finish on exception (fixes rare segfaults). -[#7350](https://github.com/ClickHouse/ClickHouse/pull/7350) ([tavplubix](https://github.com/tavplubix)) -* Don't push to MVs when inserting into Kafka table. -[#7265](https://github.com/ClickHouse/ClickHouse/pull/7265) ([Ivan](https://github.com/abyss7)) -* Disable memory tracker for exception stack. -[#7264](https://github.com/ClickHouse/ClickHouse/pull/7264) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Fixed bad code in transforming query for external database. -[#7252](https://github.com/ClickHouse/ClickHouse/pull/7252) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Avoid use of uninitialized values in MetricsTransmitter. -[#7158](https://github.com/ClickHouse/ClickHouse/pull/7158) ([Azat Khuzhin](https://github.com/azat)) -* Added example config with macros for tests ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.15.3.6, 2019-10-09 - -#### Bug Fix -* Fixed bad_variant in hashed dictionary. -([alesapin](https://github.com/alesapin)) -* Fixed up bug with segmentation fault in ATTACH PART query. -([alesapin](https://github.com/alesapin)) -* Fixed time calculation in `MergeTreeData`. -([Vladimir Chebotarev](https://github.com/excitoon)) -* Commit to Kafka explicitly after the writing is finalized. -[#7175](https://github.com/ClickHouse/ClickHouse/pull/7175) ([Ivan](https://github.com/abyss7)) -* Serialize NULL values correctly in min/max indexes of MergeTree parts. -[#7234](https://github.com/ClickHouse/ClickHouse/pull/7234) ([Alexander Kuzmenkov](https://github.com/akuzm)) - -### ClickHouse release 19.15.2.2, 2019-10-01 - -#### New Feature -* Tiered storage: support to use multiple storage volumes for tables with MergeTree engine. It's possible to store fresh data on SSD and automatically move old data to HDD. ([example](https://clickhouse.github.io/clickhouse-presentations/meetup30/new_features/#12)). [#4918](https://github.com/ClickHouse/ClickHouse/pull/4918) ([Igr](https://github.com/ObjatieGroba)) [#6489](https://github.com/ClickHouse/ClickHouse/pull/6489) ([alesapin](https://github.com/alesapin)) -* Add table function `input` for reading incoming data in `INSERT SELECT` query. [#5450](https://github.com/ClickHouse/ClickHouse/pull/5450) ([palasonic1](https://github.com/palasonic1)) [#6832](https://github.com/ClickHouse/ClickHouse/pull/6832) ([Anton Popov](https://github.com/CurtizJ)) -* Add a `sparse_hashed` dictionary layout, that is functionally equivalent to the `hashed` layout, but is more memory efficient. It uses about twice as less memory at the cost of slower value retrieval. [#6894](https://github.com/ClickHouse/ClickHouse/pull/6894) ([Azat Khuzhin](https://github.com/azat)) -* Implement ability to define list of users for access to dictionaries. Only current connected database using. [#6907](https://github.com/ClickHouse/ClickHouse/pull/6907) ([Guillaume Tassery](https://github.com/YiuRULE)) -* Add `LIMIT` option to `SHOW` query. [#6944](https://github.com/ClickHouse/ClickHouse/pull/6944) ([Philipp Malkovsky](https://github.com/malkfilipp)) -* Add `bitmapSubsetLimit(bitmap, range_start, limit)` function, that returns subset of the smallest `limit` values in set that is no smaller than `range_start`. [#6957](https://github.com/ClickHouse/ClickHouse/pull/6957) ([Zhichang Yu](https://github.com/yuzhichang)) -* Add `bitmapMin` and `bitmapMax` functions. [#6970](https://github.com/ClickHouse/ClickHouse/pull/6970) ([Zhichang Yu](https://github.com/yuzhichang)) -* Add function `repeat` related to [issue-6648](https://github.com/ClickHouse/ClickHouse/issues/6648) [#6999](https://github.com/ClickHouse/ClickHouse/pull/6999) ([flynn](https://github.com/ucasFL)) - -#### Experimental Feature -* Implement (in memory) Merge Join variant that does not change current pipeline. Result is partially sorted by merge key. Set `partial_merge_join = 1` to use this feature. The Merge Join is still in development. [#6940](https://github.com/ClickHouse/ClickHouse/pull/6940) ([Artem Zuikov](https://github.com/4ertus2)) -* Add `S3` engine and table function. It is still in development (no authentication support yet). [#5596](https://github.com/ClickHouse/ClickHouse/pull/5596) ([Vladimir Chebotarev](https://github.com/excitoon)) - -#### Improvement -* Every message read from Kafka is inserted atomically. This resolves almost all known issues with Kafka engine. [#6950](https://github.com/ClickHouse/ClickHouse/pull/6950) ([Ivan](https://github.com/abyss7)) -* Improvements for failover of Distributed queries. Shorten recovery time, also it is now configurable and can be seen in `system.clusters`. [#6399](https://github.com/ClickHouse/ClickHouse/pull/6399) ([Vasily Nemkov](https://github.com/Enmk)) -* Support numeric values for Enums directly in `IN` section. #6766 [#6941](https://github.com/ClickHouse/ClickHouse/pull/6941) ([dimarub2000](https://github.com/dimarub2000)) -* Support (optional, disabled by default) redirects on URL storage. [#6914](https://github.com/ClickHouse/ClickHouse/pull/6914) ([maqroll](https://github.com/maqroll)) -* Add information message when client with an older version connects to a server. [#6893](https://github.com/ClickHouse/ClickHouse/pull/6893) ([Philipp Malkovsky](https://github.com/malkfilipp)) -* Remove maximum backoff sleep time limit for sending data in Distributed tables [#6895](https://github.com/ClickHouse/ClickHouse/pull/6895) ([Azat Khuzhin](https://github.com/azat)) -* Add ability to send profile events (counters) with cumulative values to graphite. It can be enabled under `` in server `config.xml`. [#6969](https://github.com/ClickHouse/ClickHouse/pull/6969) ([Azat Khuzhin](https://github.com/azat)) -* Add automatically cast type `T` to `LowCardinality(T)` while inserting data in column of type `LowCardinality(T)` in Native format via HTTP. [#6891](https://github.com/ClickHouse/ClickHouse/pull/6891) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Add ability to use function `hex` without using `reinterpretAsString` for `Float32`, `Float64`. [#7024](https://github.com/ClickHouse/ClickHouse/pull/7024) ([Mikhail Korotov](https://github.com/millb)) - -#### Build/Testing/Packaging Improvement -* Add gdb-index to clickhouse binary with debug info. It will speed up startup time of `gdb`. [#6947](https://github.com/ClickHouse/ClickHouse/pull/6947) ([alesapin](https://github.com/alesapin)) -* Speed up deb packaging with patched dpkg-deb which uses `pigz`. [#6960](https://github.com/ClickHouse/ClickHouse/pull/6960) ([alesapin](https://github.com/alesapin)) -* Set `enable_fuzzing = 1` to enable libfuzzer instrumentation of all the project code. [#7042](https://github.com/ClickHouse/ClickHouse/pull/7042) ([kyprizel](https://github.com/kyprizel)) -* Add split build smoke test in CI. [#7061](https://github.com/ClickHouse/ClickHouse/pull/7061) ([alesapin](https://github.com/alesapin)) -* Add build with MemorySanitizer to CI. [#7066](https://github.com/ClickHouse/ClickHouse/pull/7066) ([Alexander Kuzmenkov](https://github.com/akuzm)) -* Replace `libsparsehash` with `sparsehash-c11` [#6965](https://github.com/ClickHouse/ClickHouse/pull/6965) ([Azat Khuzhin](https://github.com/azat)) - -#### Bug Fix -* Fixed performance degradation of index analysis on complex keys on large tables. This fixes #6924. [#7075](https://github.com/ClickHouse/ClickHouse/pull/7075) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix logical error causing segfaults when selecting from Kafka empty topic. [#6909](https://github.com/ClickHouse/ClickHouse/pull/6909) ([Ivan](https://github.com/abyss7)) -* Fix too early MySQL connection close in `MySQLBlockInputStream.cpp`. [#6882](https://github.com/ClickHouse/ClickHouse/pull/6882) ([Clément Rodriguez](https://github.com/clemrodriguez)) -* Returned support for very old Linux kernels (fix [#6841](https://github.com/ClickHouse/ClickHouse/issues/6841)) [#6853](https://github.com/ClickHouse/ClickHouse/pull/6853) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix possible data loss in `insert select` query in case of empty block in input stream. #6834 #6862 [#6911](https://github.com/ClickHouse/ClickHouse/pull/6911) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Fix for function `АrrayEnumerateUniqRanked` with empty arrays in params [#6928](https://github.com/ClickHouse/ClickHouse/pull/6928) ([proller](https://github.com/proller)) -* Fix complex queries with array joins and global subqueries. [#6934](https://github.com/ClickHouse/ClickHouse/pull/6934) ([Ivan](https://github.com/abyss7)) -* Fix `Unknown identifier` error in ORDER BY and GROUP BY with multiple JOINs [#7022](https://github.com/ClickHouse/ClickHouse/pull/7022) ([Artem Zuikov](https://github.com/4ertus2)) -* Fixed `MSan` warning while executing function with `LowCardinality` argument. [#7062](https://github.com/ClickHouse/ClickHouse/pull/7062) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) - -#### Backward Incompatible Change -* Changed serialization format of bitmap* aggregate function states to improve performance. Serialized states of bitmap* from previous versions cannot be read. [#6908](https://github.com/ClickHouse/ClickHouse/pull/6908) ([Zhichang Yu](https://github.com/yuzhichang)) - -## ClickHouse release 19.14 -### ClickHouse release 19.14.7.15, 2019-10-02 - -#### Bug Fix -* This release also contains all bug fixes from 19.11.12.69. -* Fixed compatibility for distributed queries between 19.14 and earlier versions. This fixes [#7068](https://github.com/ClickHouse/ClickHouse/issues/7068). [#7069](https://github.com/ClickHouse/ClickHouse/pull/7069) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.14.6.12, 2019-09-19 - -#### Bug Fix -* Fix for function `АrrayEnumerateUniqRanked` with empty arrays in params. [#6928](https://github.com/ClickHouse/ClickHouse/pull/6928) ([proller](https://github.com/proller)) -* Fixed subquery name in queries with `ARRAY JOIN` and `GLOBAL IN subquery` with alias. Use subquery alias for external table name if it is specified. [#6934](https://github.com/ClickHouse/ClickHouse/pull/6934) ([Ivan](https://github.com/abyss7)) - -#### Build/Testing/Packaging Improvement -* Fix [flapping](https://clickhouse-test-reports.s3.yandex.net/6944/aab95fd5175a513413c7395a73a82044bdafb906/functional_stateless_tests_(debug).html) test `00715_fetch_merged_or_mutated_part_zookeeper` by rewriting it to a shell scripts because it needs to wait for mutations to apply. [#6977](https://github.com/ClickHouse/ClickHouse/pull/6977) ([Alexander Kazakov](https://github.com/Akazz)) -* Fixed UBSan and MemSan failure in function `groupUniqArray` with emtpy array argument. It was caused by placing of empty `PaddedPODArray` into hash table zero cell because constructor for zero cell value was not called. [#6937](https://github.com/ClickHouse/ClickHouse/pull/6937) ([Amos Bird](https://github.com/amosbird)) - -### ClickHouse release 19.14.3.3, 2019-09-10 - -#### New Feature -* `WITH FILL` modifier for `ORDER BY`. (continuation of [#5069](https://github.com/ClickHouse/ClickHouse/issues/5069)) [#6610](https://github.com/ClickHouse/ClickHouse/pull/6610) ([Anton Popov](https://github.com/CurtizJ)) -* `WITH TIES` modifier for `LIMIT`. (continuation of [#5069](https://github.com/ClickHouse/ClickHouse/issues/5069)) [#6610](https://github.com/ClickHouse/ClickHouse/pull/6610) ([Anton Popov](https://github.com/CurtizJ)) -* Parse unquoted `NULL` literal as NULL (if setting `format_csv_unquoted_null_literal_as_null=1`). Initialize null fields with default values if data type of this field is not nullable (if setting `input_format_null_as_default=1`). [#5990](https://github.com/ClickHouse/ClickHouse/issues/5990) [#6055](https://github.com/ClickHouse/ClickHouse/pull/6055) ([tavplubix](https://github.com/tavplubix)) -* Support for wildcards in paths of table functions `file` and `hdfs`. If the path contains wildcards, the table will be readonly. Example of usage: `select * from hdfs('hdfs://hdfs1:9000/some_dir/another_dir/*/file{0..9}{0..9}')` and `select * from file('some_dir/{some_file,another_file,yet_another}.tsv', 'TSV', 'value UInt32')`. [#6092](https://github.com/ClickHouse/ClickHouse/pull/6092) ([Olga Khvostikova](https://github.com/stavrolia)) -* New `system.metric_log` table which stores values of `system.events` and `system.metrics` with specified time interval. [#6363](https://github.com/ClickHouse/ClickHouse/issues/6363) [#6467](https://github.com/ClickHouse/ClickHouse/pull/6467) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) [#6530](https://github.com/ClickHouse/ClickHouse/pull/6530) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Allow to write ClickHouse text logs to `system.text_log` table. [#6037](https://github.com/ClickHouse/ClickHouse/issues/6037) [#6103](https://github.com/ClickHouse/ClickHouse/pull/6103) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) [#6164](https://github.com/ClickHouse/ClickHouse/pull/6164) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Show private symbols in stack traces (this is done via parsing symbol tables of ELF files). Added information about file and line number in stack traces if debug info is present. Speedup symbol name lookup with indexing symbols present in program. Added new SQL functions for introspection: `demangle` and `addressToLine`. Renamed function `symbolizeAddress` to `addressToSymbol` for consistency. Function `addressToSymbol` will return mangled name for performance reasons and you have to apply `demangle`. Added setting `allow_introspection_functions` which is turned off by default. [#6201](https://github.com/ClickHouse/ClickHouse/pull/6201) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Table function `values` (the name is case-insensitive). It allows to read from `VALUES` list proposed in [#5984](https://github.com/ClickHouse/ClickHouse/issues/5984). Example: `SELECT * FROM VALUES('a UInt64, s String', (1, 'one'), (2, 'two'), (3, 'three'))`. [#6217](https://github.com/ClickHouse/ClickHouse/issues/6217). [#6209](https://github.com/ClickHouse/ClickHouse/pull/6209) ([dimarub2000](https://github.com/dimarub2000)) -* Added an ability to alter storage settings. Syntax: `ALTER TABLE
MODIFY SETTING = `. [#6366](https://github.com/ClickHouse/ClickHouse/pull/6366) [#6669](https://github.com/ClickHouse/ClickHouse/pull/6669) [#6685](https://github.com/ClickHouse/ClickHouse/pull/6685) ([alesapin](https://github.com/alesapin)) -* Support for removing of detached parts. Syntax: `ALTER TABLE DROP DETACHED PART ''`. [#6158](https://github.com/ClickHouse/ClickHouse/pull/6158) ([tavplubix](https://github.com/tavplubix)) -* Table constraints. Allows to add constraint to table definition which will be checked at insert. [#5273](https://github.com/ClickHouse/ClickHouse/pull/5273) ([Gleb Novikov](https://github.com/NanoBjorn)) [#6652](https://github.com/ClickHouse/ClickHouse/pull/6652) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Suppport for cascaded materialized views. [#6324](https://github.com/ClickHouse/ClickHouse/pull/6324) ([Amos Bird](https://github.com/amosbird)) -* Turn on query profiler by default to sample every query execution thread once a second. [#6283](https://github.com/ClickHouse/ClickHouse/pull/6283) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Input format `ORC`. [#6454](https://github.com/ClickHouse/ClickHouse/pull/6454) [#6703](https://github.com/ClickHouse/ClickHouse/pull/6703) ([akonyaev90](https://github.com/akonyaev90)) -* Added two new functions: `sigmoid` and `tanh` (that are useful for machine learning applications). [#6254](https://github.com/ClickHouse/ClickHouse/pull/6254) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Function `hasToken(haystack, token)`, `hasTokenCaseInsensitive(haystack, token)` to check if given token is in haystack. Token is a maximal length substring between two non alphanumeric ASCII characters (or boundaries of haystack). Token must be a constant string. Supported by tokenbf_v1 index specialization. [#6596](https://github.com/ClickHouse/ClickHouse/pull/6596), [#6662](https://github.com/ClickHouse/ClickHouse/pull/6662) ([Vasily Nemkov](https://github.com/Enmk)) -* New function `neighbor(value, offset[, default_value])`. Allows to reach prev/next value within column in a block of data. [#5925](https://github.com/ClickHouse/ClickHouse/pull/5925) ([Alex Krash](https://github.com/alex-krash)) [6685365ab8c5b74f9650492c88a012596eb1b0c6](https://github.com/ClickHouse/ClickHouse/commit/6685365ab8c5b74f9650492c88a012596eb1b0c6) [341e2e4587a18065c2da1ca888c73389f48ce36c](https://github.com/ClickHouse/ClickHouse/commit/341e2e4587a18065c2da1ca888c73389f48ce36c) [Alexey Milovidov](https://github.com/alexey-milovidov) -* Created a function `currentUser()`, returning login of authorized user. Added alias `user()` for compatibility with MySQL. [#6470](https://github.com/ClickHouse/ClickHouse/pull/6470) ([Alex Krash](https://github.com/alex-krash)) -* New aggregate functions `quantilesExactInclusive` and `quantilesExactExclusive` which were proposed in [#5885](https://github.com/ClickHouse/ClickHouse/issues/5885). [#6477](https://github.com/ClickHouse/ClickHouse/pull/6477) ([dimarub2000](https://github.com/dimarub2000)) -* Function `bitmapRange(bitmap, range_begin, range_end)` which returns new set with specified range (not include the `range_end`). [#6314](https://github.com/ClickHouse/ClickHouse/pull/6314) ([Zhichang Yu](https://github.com/yuzhichang)) -* Function `geohashesInBox(longitude_min, latitude_min, longitude_max, latitude_max, precision)` which creates array of precision-long strings of geohash-boxes covering provided area. [#6127](https://github.com/ClickHouse/ClickHouse/pull/6127) ([Vasily Nemkov](https://github.com/Enmk)) -* Implement support for INSERT query with `Kafka` tables. [#6012](https://github.com/ClickHouse/ClickHouse/pull/6012) ([Ivan](https://github.com/abyss7)) -* Added support for `_partition` and `_timestamp` virtual columns to Kafka engine. [#6400](https://github.com/ClickHouse/ClickHouse/pull/6400) ([Ivan](https://github.com/abyss7)) -* Possibility to remove sensitive data from `query_log`, server logs, process list with regexp-based rules. [#5710](https://github.com/ClickHouse/ClickHouse/pull/5710) ([filimonov](https://github.com/filimonov)) - -#### Experimental Feature -* Input and output data format `Template`. It allows to specify custom format string for input and output. [#4354](https://github.com/ClickHouse/ClickHouse/issues/4354) [#6727](https://github.com/ClickHouse/ClickHouse/pull/6727) ([tavplubix](https://github.com/tavplubix)) -* Implementation of `LIVE VIEW` tables that were originally proposed in [#2898](https://github.com/ClickHouse/ClickHouse/pull/2898), prepared in [#3925](https://github.com/ClickHouse/ClickHouse/issues/3925), and then updated in [#5541](https://github.com/ClickHouse/ClickHouse/issues/5541). See [#5541](https://github.com/ClickHouse/ClickHouse/issues/5541) for detailed description. [#5541](https://github.com/ClickHouse/ClickHouse/issues/5541) ([vzakaznikov](https://github.com/vzakaznikov)) [#6425](https://github.com/ClickHouse/ClickHouse/pull/6425) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) [#6656](https://github.com/ClickHouse/ClickHouse/pull/6656) ([vzakaznikov](https://github.com/vzakaznikov)) Note that `LIVE VIEW` feature may be removed in next versions. - -#### Bug Fix -* This release also contains all bug fixes from 19.13 and 19.11. -* Fix segmentation fault when the table has skip indices and vertical merge happens. [#6723](https://github.com/ClickHouse/ClickHouse/pull/6723) ([alesapin](https://github.com/alesapin)) -* Fix per-column TTL with non-trivial column defaults. Previously in case of force TTL merge with `OPTIMIZE ... FINAL` query, expired values was replaced by type defaults instead of user-specified column defaults. [#6796](https://github.com/ClickHouse/ClickHouse/pull/6796) ([Anton Popov](https://github.com/CurtizJ)) -* Fix Kafka messages duplication problem on normal server restart. [#6597](https://github.com/ClickHouse/ClickHouse/pull/6597) ([Ivan](https://github.com/abyss7)) -* Fixed infinite loop when reading Kafka messages. Do not pause/resume consumer on subscription at all - otherwise it may get paused indefinitely in some scenarios. [#6354](https://github.com/ClickHouse/ClickHouse/pull/6354) ([Ivan](https://github.com/abyss7)) -* Fix `Key expression contains comparison between inconvertible types` exception in `bitmapContains` function. [#6136](https://github.com/ClickHouse/ClickHouse/issues/6136) [#6146](https://github.com/ClickHouse/ClickHouse/issues/6146) [#6156](https://github.com/ClickHouse/ClickHouse/pull/6156) ([dimarub2000](https://github.com/dimarub2000)) -* Fix segfault with enabled `optimize_skip_unused_shards` and missing sharding key. [#6384](https://github.com/ClickHouse/ClickHouse/pull/6384) ([Anton Popov](https://github.com/CurtizJ)) -* Fixed wrong code in mutations that may lead to memory corruption. Fixed segfault with read of address `0x14c0` that may happed due to concurrent `DROP TABLE` and `SELECT` from `system.parts` or `system.parts_columns`. Fixed race condition in preparation of mutation queries. Fixed deadlock caused by `OPTIMIZE` of Replicated tables and concurrent modification operations like ALTERs. [#6514](https://github.com/ClickHouse/ClickHouse/pull/6514) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Removed extra verbose logging in MySQL interface [#6389](https://github.com/ClickHouse/ClickHouse/pull/6389) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Return the ability to parse boolean settings from 'true' and 'false' in the configuration file. [#6278](https://github.com/ClickHouse/ClickHouse/pull/6278) ([alesapin](https://github.com/alesapin)) -* Fix crash in `quantile` and `median` function over `Nullable(Decimal128)`. [#6378](https://github.com/ClickHouse/ClickHouse/pull/6378) ([Artem Zuikov](https://github.com/4ertus2)) -* Fixed possible incomplete result returned by `SELECT` query with `WHERE` condition on primary key contained conversion to Float type. It was caused by incorrect checking of monotonicity in `toFloat` function. [#6248](https://github.com/ClickHouse/ClickHouse/issues/6248) [#6374](https://github.com/ClickHouse/ClickHouse/pull/6374) ([dimarub2000](https://github.com/dimarub2000)) -* Check `max_expanded_ast_elements` setting for mutations. Clear mutations after `TRUNCATE TABLE`. [#6205](https://github.com/ClickHouse/ClickHouse/pull/6205) ([Winter Zhang](https://github.com/zhang2014)) -* Fix JOIN results for key columns when used with `join_use_nulls`. Attach Nulls instead of columns defaults. [#6249](https://github.com/ClickHouse/ClickHouse/pull/6249) ([Artem Zuikov](https://github.com/4ertus2)) -* Fix for skip indices with vertical merge and alter. Fix for `Bad size of marks file` exception. [#6594](https://github.com/ClickHouse/ClickHouse/issues/6594) [#6713](https://github.com/ClickHouse/ClickHouse/pull/6713) ([alesapin](https://github.com/alesapin)) -* Fix rare crash in `ALTER MODIFY COLUMN` and vertical merge when one of merged/altered parts is empty (0 rows) [#6746](https://github.com/ClickHouse/ClickHouse/issues/6746) [#6780](https://github.com/ClickHouse/ClickHouse/pull/6780) ([alesapin](https://github.com/alesapin)) -* Fixed bug in conversion of `LowCardinality` types in `AggregateFunctionFactory`. This fixes [#6257](https://github.com/ClickHouse/ClickHouse/issues/6257). [#6281](https://github.com/ClickHouse/ClickHouse/pull/6281) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Fix wrong behavior and possible segfaults in `topK` and `topKWeighted` aggregated functions. [#6404](https://github.com/ClickHouse/ClickHouse/pull/6404) ([Anton Popov](https://github.com/CurtizJ)) -* Fixed unsafe code around `getIdentifier` function. [#6401](https://github.com/ClickHouse/ClickHouse/issues/6401) [#6409](https://github.com/ClickHouse/ClickHouse/pull/6409) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed bug in MySQL wire protocol (is used while connecting to ClickHouse form MySQL client). Caused by heap buffer overflow in `PacketPayloadWriteBuffer`. [#6212](https://github.com/ClickHouse/ClickHouse/pull/6212) ([Yuriy Baranov](https://github.com/yurriy)) -* Fixed memory leak in `bitmapSubsetInRange` function. [#6819](https://github.com/ClickHouse/ClickHouse/pull/6819) ([Zhichang Yu](https://github.com/yuzhichang)) -* Fix rare bug when mutation executed after granularity change. [#6816](https://github.com/ClickHouse/ClickHouse/pull/6816) ([alesapin](https://github.com/alesapin)) -* Allow protobuf message with all fields by default. [#6132](https://github.com/ClickHouse/ClickHouse/pull/6132) ([Vitaly Baranov](https://github.com/vitlibar)) -* Resolve a bug with `nullIf` function when we send a `NULL` argument on the second argument. [#6446](https://github.com/ClickHouse/ClickHouse/pull/6446) ([Guillaume Tassery](https://github.com/YiuRULE)) -* Fix rare bug with wrong memory allocation/deallocation in complex key cache dictionaries with string fields which leads to infinite memory consumption (looks like memory leak). Bug reproduces when string size was a power of two starting from eight (8, 16, 32, etc). [#6447](https://github.com/ClickHouse/ClickHouse/pull/6447) ([alesapin](https://github.com/alesapin)) -* Fixed Gorilla encoding on small sequences which caused exception `Cannot write after end of buffer`. [#6398](https://github.com/ClickHouse/ClickHouse/issues/6398) [#6444](https://github.com/ClickHouse/ClickHouse/pull/6444) ([Vasily Nemkov](https://github.com/Enmk)) -* Allow to use not nullable types in JOINs with `join_use_nulls` enabled. [#6705](https://github.com/ClickHouse/ClickHouse/pull/6705) ([Artem Zuikov](https://github.com/4ertus2)) -* Disable `Poco::AbstractConfiguration` substitutions in query in `clickhouse-client`. [#6706](https://github.com/ClickHouse/ClickHouse/pull/6706) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Avoid deadlock in `REPLACE PARTITION`. [#6677](https://github.com/ClickHouse/ClickHouse/pull/6677) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Using `arrayReduce` for constant arguments may lead to segfault. [#6242](https://github.com/ClickHouse/ClickHouse/issues/6242) [#6326](https://github.com/ClickHouse/ClickHouse/pull/6326) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix inconsistent parts which can appear if replica was restored after `DROP PARTITION`. [#6522](https://github.com/ClickHouse/ClickHouse/issues/6522) [#6523](https://github.com/ClickHouse/ClickHouse/pull/6523) ([tavplubix](https://github.com/tavplubix)) -* Fixed hang in `JSONExtractRaw` function. [#6195](https://github.com/ClickHouse/ClickHouse/issues/6195) [#6198](https://github.com/ClickHouse/ClickHouse/pull/6198) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix bug with incorrect skip indices serialization and aggregation with adaptive granularity. [#6594](https://github.com/ClickHouse/ClickHouse/issues/6594). [#6748](https://github.com/ClickHouse/ClickHouse/pull/6748) ([alesapin](https://github.com/alesapin)) -* Fix `WITH ROLLUP` and `WITH CUBE` modifiers of `GROUP BY` with two-level aggregation. [#6225](https://github.com/ClickHouse/ClickHouse/pull/6225) ([Anton Popov](https://github.com/CurtizJ)) -* Fix bug with writing secondary indices marks with adaptive granularity. [#6126](https://github.com/ClickHouse/ClickHouse/pull/6126) ([alesapin](https://github.com/alesapin)) -* Fix initialization order while server startup. Since `StorageMergeTree::background_task_handle` is initialized in `startup()` the `MergeTreeBlockOutputStream::write()` may try to use it before initialization. Just check if it is initialized. [#6080](https://github.com/ClickHouse/ClickHouse/pull/6080) ([Ivan](https://github.com/abyss7)) -* Clearing the data buffer from the previous read operation that was completed with an error. [#6026](https://github.com/ClickHouse/ClickHouse/pull/6026) ([Nikolay](https://github.com/bopohaa)) -* Fix bug with enabling adaptive granularity when creating a new replica for Replicated\*MergeTree table. [#6394](https://github.com/ClickHouse/ClickHouse/issues/6394) [#6452](https://github.com/ClickHouse/ClickHouse/pull/6452) ([alesapin](https://github.com/alesapin)) -* Fixed possible crash during server startup in case of exception happened in `libunwind` during exception at access to uninitialized `ThreadStatus` structure. [#6456](https://github.com/ClickHouse/ClickHouse/pull/6456) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) -* Fix crash in `yandexConsistentHash` function. Found by fuzz test. [#6304](https://github.com/ClickHouse/ClickHouse/issues/6304) [#6305](https://github.com/ClickHouse/ClickHouse/pull/6305) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed the possibility of hanging queries when server is overloaded and global thread pool becomes near full. This have higher chance to happen on clusters with large number of shards (hundreds), because distributed queries allocate a thread per connection to each shard. For example, this issue may reproduce if a cluster of 330 shards is processing 30 concurrent distributed queries. This issue affects all versions starting from 19.2. [#6301](https://github.com/ClickHouse/ClickHouse/pull/6301) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed logic of `arrayEnumerateUniqRanked` function. [#6423](https://github.com/ClickHouse/ClickHouse/pull/6423) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix segfault when decoding symbol table. [#6603](https://github.com/ClickHouse/ClickHouse/pull/6603) ([Amos Bird](https://github.com/amosbird)) -* Fixed irrelevant exception in cast of `LowCardinality(Nullable)` to not-Nullable column in case if it doesn't contain Nulls (e.g. in query like `SELECT CAST(CAST('Hello' AS LowCardinality(Nullable(String))) AS String)`. [#6094](https://github.com/ClickHouse/ClickHouse/issues/6094) [#6119](https://github.com/ClickHouse/ClickHouse/pull/6119) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Removed extra quoting of description in `system.settings` table. [#6696](https://github.com/ClickHouse/ClickHouse/issues/6696) [#6699](https://github.com/ClickHouse/ClickHouse/pull/6699) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Avoid possible deadlock in `TRUNCATE` of Replicated table. [#6695](https://github.com/ClickHouse/ClickHouse/pull/6695) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix reading in order of sorting key. [#6189](https://github.com/ClickHouse/ClickHouse/pull/6189) ([Anton Popov](https://github.com/CurtizJ)) -* Fix `ALTER TABLE ... UPDATE` query for tables with `enable_mixed_granularity_parts=1`. [#6543](https://github.com/ClickHouse/ClickHouse/pull/6543) ([alesapin](https://github.com/alesapin)) -* Fix bug opened by [#4405](https://github.com/ClickHouse/ClickHouse/pull/4405) (since 19.4.0). Reproduces in queries to Distributed tables over MergeTree tables when we doesn't query any columns (`SELECT 1`). [#6236](https://github.com/ClickHouse/ClickHouse/pull/6236) ([alesapin](https://github.com/alesapin)) -* Fixed overflow in integer division of signed type to unsigned type. The behaviour was exactly as in C or C++ language (integer promotion rules) that may be surprising. Please note that the overflow is still possible when dividing large signed number to large unsigned number or vice-versa (but that case is less usual). The issue existed in all server versions. [#6214](https://github.com/ClickHouse/ClickHouse/issues/6214) [#6233](https://github.com/ClickHouse/ClickHouse/pull/6233) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Limit maximum sleep time for throttling when `max_execution_speed` or `max_execution_speed_bytes` is set. Fixed false errors like `Estimated query execution time (inf seconds) is too long`. [#5547](https://github.com/ClickHouse/ClickHouse/issues/5547) [#6232](https://github.com/ClickHouse/ClickHouse/pull/6232) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed issues about using `MATERIALIZED` columns and aliases in `MaterializedView`. [#448](https://github.com/ClickHouse/ClickHouse/issues/448) [#3484](https://github.com/ClickHouse/ClickHouse/issues/3484) [#3450](https://github.com/ClickHouse/ClickHouse/issues/3450) [#2878](https://github.com/ClickHouse/ClickHouse/issues/2878) [#2285](https://github.com/ClickHouse/ClickHouse/issues/2285) [#3796](https://github.com/ClickHouse/ClickHouse/pull/3796) ([Amos Bird](https://github.com/amosbird)) [#6316](https://github.com/ClickHouse/ClickHouse/pull/6316) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix `FormatFactory` behaviour for input streams which are not implemented as processor. [#6495](https://github.com/ClickHouse/ClickHouse/pull/6495) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Fixed typo. [#6631](https://github.com/ClickHouse/ClickHouse/pull/6631) ([Alex Ryndin](https://github.com/alexryndin)) -* Typo in the error message ( is -> are ). [#6839](https://github.com/ClickHouse/ClickHouse/pull/6839) ([Denis Zhuravlev](https://github.com/den-crane)) -* Fixed error while parsing of columns list from string if type contained a comma (this issue was relevant for `File`, `URL`, `HDFS` storages) [#6217](https://github.com/ClickHouse/ClickHouse/issues/6217). [#6209](https://github.com/ClickHouse/ClickHouse/pull/6209) ([dimarub2000](https://github.com/dimarub2000)) - -#### Security Fix -* This release also contains all bug security fixes from 19.13 and 19.11. -* Fixed the possibility of a fabricated query to cause server crash due to stack overflow in SQL parser. Fixed the possibility of stack overflow in Merge and Distributed tables, materialized views and conditions for row-level security that involve subqueries. [#6433](https://github.com/ClickHouse/ClickHouse/pull/6433) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Improvement -* Correct implementation of ternary logic for `AND/OR`. [#6048](https://github.com/ClickHouse/ClickHouse/pull/6048) ([Alexander Kazakov](https://github.com/Akazz)) -* Now values and rows with expired TTL will be removed after `OPTIMIZE ... FINAL` query from old parts without TTL infos or with outdated TTL infos, e.g. after `ALTER ... MODIFY TTL` query. Added queries `SYSTEM STOP/START TTL MERGES` to disallow/allow assign merges with TTL and filter expired values in all merges. [#6274](https://github.com/ClickHouse/ClickHouse/pull/6274) ([Anton Popov](https://github.com/CurtizJ)) -* Possibility to change the location of ClickHouse history file for client using `CLICKHOUSE_HISTORY_FILE` env. [#6840](https://github.com/ClickHouse/ClickHouse/pull/6840) ([filimonov](https://github.com/filimonov)) -* Remove `dry_run` flag from `InterpreterSelectQuery`. ... [#6375](https://github.com/ClickHouse/ClickHouse/pull/6375) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Support `ASOF JOIN` with `ON` section. [#6211](https://github.com/ClickHouse/ClickHouse/pull/6211) ([Artem Zuikov](https://github.com/4ertus2)) -* Better support of skip indexes for mutations and replication. Support for `MATERIALIZE/CLEAR INDEX ... IN PARTITION` query. `UPDATE x = x` recalculates all indices that use column `x`. [#5053](https://github.com/ClickHouse/ClickHouse/pull/5053) ([Nikita Vasilev](https://github.com/nikvas0)) -* Allow to `ATTACH` live views (for example, at the server startup) regardless to `allow_experimental_live_view` setting. [#6754](https://github.com/ClickHouse/ClickHouse/pull/6754) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* For stack traces gathered by query profiler, do not include stack frames generated by the query profiler itself. [#6250](https://github.com/ClickHouse/ClickHouse/pull/6250) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Now table functions `values`, `file`, `url`, `hdfs` have support for ALIAS columns. [#6255](https://github.com/ClickHouse/ClickHouse/pull/6255) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Throw an exception if `config.d` file doesn't have the corresponding root element as the config file. [#6123](https://github.com/ClickHouse/ClickHouse/pull/6123) ([dimarub2000](https://github.com/dimarub2000)) -* Print extra info in exception message for `no space left on device`. [#6182](https://github.com/ClickHouse/ClickHouse/issues/6182), [#6252](https://github.com/ClickHouse/ClickHouse/issues/6252) [#6352](https://github.com/ClickHouse/ClickHouse/pull/6352) ([tavplubix](https://github.com/tavplubix)) -* When determining shards of a `Distributed` table to be covered by a read query (for `optimize_skip_unused_shards` = 1) ClickHouse now checks conditions from both `prewhere` and `where` clauses of select statement. [#6521](https://github.com/ClickHouse/ClickHouse/pull/6521) ([Alexander Kazakov](https://github.com/Akazz)) -* Enabled `SIMDJSON` for machines without AVX2 but with SSE 4.2 and PCLMUL instruction set. [#6285](https://github.com/ClickHouse/ClickHouse/issues/6285) [#6320](https://github.com/ClickHouse/ClickHouse/pull/6320) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* ClickHouse can work on filesystems without `O_DIRECT` support (such as ZFS and BtrFS) without additional tuning. [#4449](https://github.com/ClickHouse/ClickHouse/issues/4449) [#6730](https://github.com/ClickHouse/ClickHouse/pull/6730) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Support push down predicate for final subquery. [#6120](https://github.com/ClickHouse/ClickHouse/pull/6120) ([TCeason](https://github.com/TCeason)) [#6162](https://github.com/ClickHouse/ClickHouse/pull/6162) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Better `JOIN ON` keys extraction [#6131](https://github.com/ClickHouse/ClickHouse/pull/6131) ([Artem Zuikov](https://github.com/4ertus2)) -* Upated `SIMDJSON`. [#6285](https://github.com/ClickHouse/ClickHouse/issues/6285). [#6306](https://github.com/ClickHouse/ClickHouse/pull/6306) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Optimize selecting of smallest column for `SELECT count()` query. [#6344](https://github.com/ClickHouse/ClickHouse/pull/6344) ([Amos Bird](https://github.com/amosbird)) -* Added `strict` parameter in `windowFunnel()`. When the `strict` is set, the `windowFunnel()` applies conditions only for the unique values. [#6548](https://github.com/ClickHouse/ClickHouse/pull/6548) ([achimbab](https://github.com/achimbab)) -* Safer interface of `mysqlxx::Pool`. [#6150](https://github.com/ClickHouse/ClickHouse/pull/6150) ([avasiliev](https://github.com/avasiliev)) -* Options line size when executing with `--help` option now corresponds with terminal size. [#6590](https://github.com/ClickHouse/ClickHouse/pull/6590) ([dimarub2000](https://github.com/dimarub2000)) -* Disable "read in order" optimization for aggregation without keys. [#6599](https://github.com/ClickHouse/ClickHouse/pull/6599) ([Anton Popov](https://github.com/CurtizJ)) -* HTTP status code for `INCORRECT_DATA` and `TYPE_MISMATCH` error codes was changed from default `500 Internal Server Error` to `400 Bad Request`. [#6271](https://github.com/ClickHouse/ClickHouse/pull/6271) ([Alexander Rodin](https://github.com/a-rodin)) -* Move Join object from `ExpressionAction` into `AnalyzedJoin`. `ExpressionAnalyzer` and `ExpressionAction` do not know about `Join` class anymore. Its logic is hidden by `AnalyzedJoin` iface. [#6801](https://github.com/ClickHouse/ClickHouse/pull/6801) ([Artem Zuikov](https://github.com/4ertus2)) -* Fixed possible deadlock of distributed queries when one of shards is localhost but the query is sent via network connection. [#6759](https://github.com/ClickHouse/ClickHouse/pull/6759) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Changed semantic of multiple tables `RENAME` to avoid possible deadlocks. [#6757](https://github.com/ClickHouse/ClickHouse/issues/6757). [#6756](https://github.com/ClickHouse/ClickHouse/pull/6756) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Rewritten MySQL compatibility server to prevent loading full packet payload in memory. Decreased memory consumption for each connection to approximately `2 * DBMS_DEFAULT_BUFFER_SIZE` (read/write buffers). [#5811](https://github.com/ClickHouse/ClickHouse/pull/5811) ([Yuriy Baranov](https://github.com/yurriy)) -* Move AST alias interpreting logic out of parser that doesn't have to know anything about query semantics. [#6108](https://github.com/ClickHouse/ClickHouse/pull/6108) ([Artem Zuikov](https://github.com/4ertus2)) -* Slightly more safe parsing of `NamesAndTypesList`. [#6408](https://github.com/ClickHouse/ClickHouse/issues/6408). [#6410](https://github.com/ClickHouse/ClickHouse/pull/6410) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* `clickhouse-copier`: Allow use `where_condition` from config with `partition_key` alias in query for checking partition existence (Earlier it was used only in reading data queries). [#6577](https://github.com/ClickHouse/ClickHouse/pull/6577) ([proller](https://github.com/proller)) -* Added optional message argument in `throwIf`. ([#5772](https://github.com/ClickHouse/ClickHouse/issues/5772)) [#6329](https://github.com/ClickHouse/ClickHouse/pull/6329) ([Vdimir](https://github.com/Vdimir)) -* Server exception got while sending insertion data is now being processed in client as well. [#5891](https://github.com/ClickHouse/ClickHouse/issues/5891) [#6711](https://github.com/ClickHouse/ClickHouse/pull/6711) ([dimarub2000](https://github.com/dimarub2000)) -* Added a metric `DistributedFilesToInsert` that shows the total number of files in filesystem that are selected to send to remote servers by Distributed tables. The number is summed across all shards. [#6600](https://github.com/ClickHouse/ClickHouse/pull/6600) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Move most of JOINs prepare logic from `ExpressionAction/ExpressionAnalyzer` to `AnalyzedJoin`. [#6785](https://github.com/ClickHouse/ClickHouse/pull/6785) ([Artem Zuikov](https://github.com/4ertus2)) -* Fix TSan [warning](https://clickhouse-test-reports.s3.yandex.net/6399/c1c1d1daa98e199e620766f1bd06a5921050a00d/functional_stateful_tests_(thread).html) 'lock-order-inversion'. [#6740](https://github.com/ClickHouse/ClickHouse/pull/6740) ([Vasily Nemkov](https://github.com/Enmk)) -* Better information messages about lack of Linux capabilities. Logging fatal errors with "fatal" level, that will make it easier to find in `system.text_log`. [#6441](https://github.com/ClickHouse/ClickHouse/pull/6441) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* When enable dumping temporary data to the disk to restrict memory usage during `GROUP BY`, `ORDER BY`, it didn't check the free disk space. The fix add a new setting `min_free_disk_space`, when the free disk space it smaller then the threshold, the query will stop and throw `ErrorCodes::NOT_ENOUGH_SPACE`. [#6678](https://github.com/ClickHouse/ClickHouse/pull/6678) ([Weiqing Xu](https://github.com/weiqxu)) [#6691](https://github.com/ClickHouse/ClickHouse/pull/6691) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Removed recursive rwlock by thread. It makes no sense, because threads are reused between queries. `SELECT` query may acquire a lock in one thread, hold a lock from another thread and exit from first thread. In the same time, first thread can be reused by `DROP` query. This will lead to false "Attempt to acquire exclusive lock recursively" messages. [#6771](https://github.com/ClickHouse/ClickHouse/pull/6771) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Split `ExpressionAnalyzer.appendJoin()`. Prepare a place in `ExpressionAnalyzer` for `MergeJoin`. [#6524](https://github.com/ClickHouse/ClickHouse/pull/6524) ([Artem Zuikov](https://github.com/4ertus2)) -* Added `mysql_native_password` authentication plugin to MySQL compatibility server. [#6194](https://github.com/ClickHouse/ClickHouse/pull/6194) ([Yuriy Baranov](https://github.com/yurriy)) -* Less number of `clock_gettime` calls; fixed ABI compatibility between debug/release in `Allocator` (insignificant issue). [#6197](https://github.com/ClickHouse/ClickHouse/pull/6197) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Move `collectUsedColumns` from `ExpressionAnalyzer` to `SyntaxAnalyzer`. `SyntaxAnalyzer` makes `required_source_columns` itself now. [#6416](https://github.com/ClickHouse/ClickHouse/pull/6416) ([Artem Zuikov](https://github.com/4ertus2)) -* Add setting `joined_subquery_requires_alias` to require aliases for subselects and table functions in `FROM` that more than one table is present (i.e. queries with JOINs). [#6733](https://github.com/ClickHouse/ClickHouse/pull/6733) ([Artem Zuikov](https://github.com/4ertus2)) -* Extract `GetAggregatesVisitor` class from `ExpressionAnalyzer`. [#6458](https://github.com/ClickHouse/ClickHouse/pull/6458) ([Artem Zuikov](https://github.com/4ertus2)) -* `system.query_log`: change data type of `type` column to `Enum`. [#6265](https://github.com/ClickHouse/ClickHouse/pull/6265) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) -* Static linking of `sha256_password` authentication plugin. [#6512](https://github.com/ClickHouse/ClickHouse/pull/6512) ([Yuriy Baranov](https://github.com/yurriy)) -* Avoid extra dependency for the setting `compile` to work. In previous versions, the user may get error like `cannot open crti.o`, `unable to find library -lc` etc. [#6309](https://github.com/ClickHouse/ClickHouse/pull/6309) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* More validation of the input that may come from malicious replica. [#6303](https://github.com/ClickHouse/ClickHouse/pull/6303) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Now `clickhouse-obfuscator` file is available in `clickhouse-client` package. In previous versions it was available as `clickhouse obfuscator` (with whitespace). [#5816](https://github.com/ClickHouse/ClickHouse/issues/5816) [#6609](https://github.com/ClickHouse/ClickHouse/pull/6609) ([dimarub2000](https://github.com/dimarub2000)) -* Fixed deadlock when we have at least two queries that read at least two tables in different order and another query that performs DDL operation on one of tables. Fixed another very rare deadlock. [#6764](https://github.com/ClickHouse/ClickHouse/pull/6764) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Added `os_thread_ids` column to `system.processes` and `system.query_log` for better debugging possibilities. [#6763](https://github.com/ClickHouse/ClickHouse/pull/6763) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* A workaround for PHP mysqlnd extension bugs which occur when `sha256_password` is used as a default authentication plugin (described in [#6031](https://github.com/ClickHouse/ClickHouse/issues/6031)). [#6113](https://github.com/ClickHouse/ClickHouse/pull/6113) ([Yuriy Baranov](https://github.com/yurriy)) -* Remove unneeded place with changed nullability columns. [#6693](https://github.com/ClickHouse/ClickHouse/pull/6693) ([Artem Zuikov](https://github.com/4ertus2)) -* Set default value of `queue_max_wait_ms` to zero, because current value (five seconds) makes no sense. There are rare circumstances when this settings has any use. Added settings `replace_running_query_max_wait_ms`, `kafka_max_wait_ms` and `connection_pool_max_wait_ms` for disambiguation. [#6692](https://github.com/ClickHouse/ClickHouse/pull/6692) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Extract `SelectQueryExpressionAnalyzer` from `ExpressionAnalyzer`. Keep the last one for non-select queries. [#6499](https://github.com/ClickHouse/ClickHouse/pull/6499) ([Artem Zuikov](https://github.com/4ertus2)) -* Removed duplicating input and output formats. [#6239](https://github.com/ClickHouse/ClickHouse/pull/6239) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Allow user to override `poll_interval` and `idle_connection_timeout` settings on connection. [#6230](https://github.com/ClickHouse/ClickHouse/pull/6230) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* `MergeTree` now has an additional option `ttl_only_drop_parts` (disabled by default) to avoid partial pruning of parts, so that they dropped completely when all the rows in a part are expired. [#6191](https://github.com/ClickHouse/ClickHouse/pull/6191) ([Sergi Vladykin](https://github.com/svladykin)) -* Type checks for set index functions. Throw exception if function got a wrong type. This fixes fuzz test with UBSan. [#6511](https://github.com/ClickHouse/ClickHouse/pull/6511) ([Nikita Vasilev](https://github.com/nikvas0)) - -#### Performance Improvement -* Optimize queries with `ORDER BY expressions` clause, where `expressions` have coinciding prefix with sorting key in `MergeTree` tables. This optimization is controlled by `optimize_read_in_order` setting. [#6054](https://github.com/ClickHouse/ClickHouse/pull/6054) [#6629](https://github.com/ClickHouse/ClickHouse/pull/6629) ([Anton Popov](https://github.com/CurtizJ)) -* Allow to use multiple threads during parts loading and removal. [#6372](https://github.com/ClickHouse/ClickHouse/issues/6372) [#6074](https://github.com/ClickHouse/ClickHouse/issues/6074) [#6438](https://github.com/ClickHouse/ClickHouse/pull/6438) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Implemented batch variant of updating aggregate function states. It may lead to performance benefits. [#6435](https://github.com/ClickHouse/ClickHouse/pull/6435) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Using `FastOps` library for functions `exp`, `log`, `sigmoid`, `tanh`. FastOps is a fast vector math library from Michael Parakhin (Yandex CTO). Improved performance of `exp` and `log` functions more than 6 times. The functions `exp` and `log` from `Float32` argument will return `Float32` (in previous versions they always return `Float64`). Now `exp(nan)` may return `inf`. The result of `exp` and `log` functions may be not the nearest machine representable number to the true answer. [#6254](https://github.com/ClickHouse/ClickHouse/pull/6254) ([alexey-milovidov](https://github.com/alexey-milovidov)) Using Danila Kutenin variant to make fastops working [#6317](https://github.com/ClickHouse/ClickHouse/pull/6317) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Disable consecutive key optimization for `UInt8/16`. [#6298](https://github.com/ClickHouse/ClickHouse/pull/6298) [#6701](https://github.com/ClickHouse/ClickHouse/pull/6701) ([akuzm](https://github.com/akuzm)) -* Improved performance of `simdjson` library by getting rid of dynamic allocation in `ParsedJson::Iterator`. [#6479](https://github.com/ClickHouse/ClickHouse/pull/6479) ([Vitaly Baranov](https://github.com/vitlibar)) -* Pre-fault pages when allocating memory with `mmap()`. [#6667](https://github.com/ClickHouse/ClickHouse/pull/6667) ([akuzm](https://github.com/akuzm)) -* Fix performance bug in `Decimal` comparison. [#6380](https://github.com/ClickHouse/ClickHouse/pull/6380) ([Artem Zuikov](https://github.com/4ertus2)) - -#### Build/Testing/Packaging Improvement -* Remove Compiler (runtime template instantiation) because we've win over it's performance. [#6646](https://github.com/ClickHouse/ClickHouse/pull/6646) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Added performance test to show degradation of performance in gcc-9 in more isolated way. [#6302](https://github.com/ClickHouse/ClickHouse/pull/6302) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Added table function `numbers_mt`, which is multithreaded version of `numbers`. Updated performance tests with hash functions. [#6554](https://github.com/ClickHouse/ClickHouse/pull/6554) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Comparison mode in `clickhouse-benchmark` [#6220](https://github.com/ClickHouse/ClickHouse/issues/6220) [#6343](https://github.com/ClickHouse/ClickHouse/pull/6343) ([dimarub2000](https://github.com/dimarub2000)) -* Best effort for printing stack traces. Also added `SIGPROF` as a debugging signal to print stack trace of a running thread. [#6529](https://github.com/ClickHouse/ClickHouse/pull/6529) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Every function in its own file, part 10. [#6321](https://github.com/ClickHouse/ClickHouse/pull/6321) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Remove doubled const `TABLE_IS_READ_ONLY`. [#6566](https://github.com/ClickHouse/ClickHouse/pull/6566) ([filimonov](https://github.com/filimonov)) -* Formatting changes for `StringHashMap` PR [#5417](https://github.com/ClickHouse/ClickHouse/issues/5417). [#6700](https://github.com/ClickHouse/ClickHouse/pull/6700) ([akuzm](https://github.com/akuzm)) -* Better subquery for join creation in `ExpressionAnalyzer`. [#6824](https://github.com/ClickHouse/ClickHouse/pull/6824) ([Artem Zuikov](https://github.com/4ertus2)) -* Remove a redundant condition (found by PVS Studio). [#6775](https://github.com/ClickHouse/ClickHouse/pull/6775) ([akuzm](https://github.com/akuzm)) -* Separate the hash table interface for `ReverseIndex`. [#6672](https://github.com/ClickHouse/ClickHouse/pull/6672) ([akuzm](https://github.com/akuzm)) -* Refactoring of settings. [#6689](https://github.com/ClickHouse/ClickHouse/pull/6689) ([alesapin](https://github.com/alesapin)) -* Add comments for `set` index functions. [#6319](https://github.com/ClickHouse/ClickHouse/pull/6319) ([Nikita Vasilev](https://github.com/nikvas0)) -* Increase OOM score in debug version on Linux. [#6152](https://github.com/ClickHouse/ClickHouse/pull/6152) ([akuzm](https://github.com/akuzm)) -* HDFS HA now work in debug build. [#6650](https://github.com/ClickHouse/ClickHouse/pull/6650) ([Weiqing Xu](https://github.com/weiqxu)) -* Added a test to `transform_query_for_external_database`. [#6388](https://github.com/ClickHouse/ClickHouse/pull/6388) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Add test for multiple materialized views for Kafka table. [#6509](https://github.com/ClickHouse/ClickHouse/pull/6509) ([Ivan](https://github.com/abyss7)) -* Make a better build scheme. [#6500](https://github.com/ClickHouse/ClickHouse/pull/6500) ([Ivan](https://github.com/abyss7)) -* Fixed `test_external_dictionaries` integration in case it was executed under non root user. [#6507](https://github.com/ClickHouse/ClickHouse/pull/6507) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* The bug reproduces when total size of written packets exceeds `DBMS_DEFAULT_BUFFER_SIZE`. [#6204](https://github.com/ClickHouse/ClickHouse/pull/6204) ([Yuriy Baranov](https://github.com/yurriy)) -* Added a test for `RENAME` table race condition [#6752](https://github.com/ClickHouse/ClickHouse/pull/6752) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Avoid data race on Settings in `KILL QUERY`. [#6753](https://github.com/ClickHouse/ClickHouse/pull/6753) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Add integration test for handling errors by a cache dictionary. [#6755](https://github.com/ClickHouse/ClickHouse/pull/6755) ([Vitaly Baranov](https://github.com/vitlibar)) -* Disable parsing of ELF object files on Mac OS, because it makes no sense. [#6578](https://github.com/ClickHouse/ClickHouse/pull/6578) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Attempt to make changelog generator better. [#6327](https://github.com/ClickHouse/ClickHouse/pull/6327) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Adding `-Wshadow` switch to the GCC. [#6325](https://github.com/ClickHouse/ClickHouse/pull/6325) ([kreuzerkrieg](https://github.com/kreuzerkrieg)) -* Removed obsolete code for `mimalloc` support. [#6715](https://github.com/ClickHouse/ClickHouse/pull/6715) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* `zlib-ng` determines x86 capabilities and saves this info to global variables. This is done in defalteInit call, which may be made by different threads simultaneously. To avoid multithreaded writes, do it on library startup. [#6141](https://github.com/ClickHouse/ClickHouse/pull/6141) ([akuzm](https://github.com/akuzm)) -* Regression test for a bug which in join which was fixed in [#5192](https://github.com/ClickHouse/ClickHouse/issues/5192). [#6147](https://github.com/ClickHouse/ClickHouse/pull/6147) ([Bakhtiyor Ruziev](https://github.com/theruziev)) -* Fixed MSan report. [#6144](https://github.com/ClickHouse/ClickHouse/pull/6144) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix flapping TTL test. [#6782](https://github.com/ClickHouse/ClickHouse/pull/6782) ([Anton Popov](https://github.com/CurtizJ)) -* Fixed false data race in `MergeTreeDataPart::is_frozen` field. [#6583](https://github.com/ClickHouse/ClickHouse/pull/6583) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed timeouts in fuzz test. In previous version, it managed to find false hangup in query `SELECT * FROM numbers_mt(gccMurmurHash(''))`. [#6582](https://github.com/ClickHouse/ClickHouse/pull/6582) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Added debug checks to `static_cast` of columns. [#6581](https://github.com/ClickHouse/ClickHouse/pull/6581) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Support for Oracle Linux in official RPM packages. [#6356](https://github.com/ClickHouse/ClickHouse/issues/6356) [#6585](https://github.com/ClickHouse/ClickHouse/pull/6585) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Changed json perftests from `once` to `loop` type. [#6536](https://github.com/ClickHouse/ClickHouse/pull/6536) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* `odbc-bridge.cpp` defines `main()` so it should not be included in `clickhouse-lib`. [#6538](https://github.com/ClickHouse/ClickHouse/pull/6538) ([Orivej Desh](https://github.com/orivej)) -* Test for crash in `FULL|RIGHT JOIN` with nulls in right table's keys. [#6362](https://github.com/ClickHouse/ClickHouse/pull/6362) ([Artem Zuikov](https://github.com/4ertus2)) -* Added a test for the limit on expansion of aliases just in case. [#6442](https://github.com/ClickHouse/ClickHouse/pull/6442) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Switched from `boost::filesystem` to `std::filesystem` where appropriate. [#6253](https://github.com/ClickHouse/ClickHouse/pull/6253) [#6385](https://github.com/ClickHouse/ClickHouse/pull/6385) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Added RPM packages to website. [#6251](https://github.com/ClickHouse/ClickHouse/pull/6251) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Add a test for fixed `Unknown identifier` exception in `IN` section. [#6708](https://github.com/ClickHouse/ClickHouse/pull/6708) ([Artem Zuikov](https://github.com/4ertus2)) -* Simplify `shared_ptr_helper` because people facing difficulties understanding it. [#6675](https://github.com/ClickHouse/ClickHouse/pull/6675) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Added performance tests for fixed Gorilla and DoubleDelta codec. [#6179](https://github.com/ClickHouse/ClickHouse/pull/6179) ([Vasily Nemkov](https://github.com/Enmk)) -* Split the integration test `test_dictionaries` into 4 separate tests. [#6776](https://github.com/ClickHouse/ClickHouse/pull/6776) ([Vitaly Baranov](https://github.com/vitlibar)) -* Fix PVS-Studio warning in `PipelineExecutor`. [#6777](https://github.com/ClickHouse/ClickHouse/pull/6777) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Allow to use `library` dictionary source with ASan. [#6482](https://github.com/ClickHouse/ClickHouse/pull/6482) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Added option to generate changelog from a list of PRs. [#6350](https://github.com/ClickHouse/ClickHouse/pull/6350) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Lock the `TinyLog` storage when reading. [#6226](https://github.com/ClickHouse/ClickHouse/pull/6226) ([akuzm](https://github.com/akuzm)) -* Check for broken symlinks in CI. [#6634](https://github.com/ClickHouse/ClickHouse/pull/6634) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Increase timeout for "stack overflow" test because it may take a long time in debug build. [#6637](https://github.com/ClickHouse/ClickHouse/pull/6637) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Added a check for double whitespaces. [#6643](https://github.com/ClickHouse/ClickHouse/pull/6643) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix `new/delete` memory tracking when build with sanitizers. Tracking is not clear. It only prevents memory limit exceptions in tests. [#6450](https://github.com/ClickHouse/ClickHouse/pull/6450) ([Artem Zuikov](https://github.com/4ertus2)) -* Enable back the check of undefined symbols while linking. [#6453](https://github.com/ClickHouse/ClickHouse/pull/6453) ([Ivan](https://github.com/abyss7)) -* Avoid rebuilding `hyperscan` every day. [#6307](https://github.com/ClickHouse/ClickHouse/pull/6307) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed UBSan report in `ProtobufWriter`. [#6163](https://github.com/ClickHouse/ClickHouse/pull/6163) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Don't allow to use query profiler with sanitizers because it is not compatible. [#6769](https://github.com/ClickHouse/ClickHouse/pull/6769) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Add test for reloading a dictionary after fail by timer. [#6114](https://github.com/ClickHouse/ClickHouse/pull/6114) ([Vitaly Baranov](https://github.com/vitlibar)) -* Fix inconsistency in `PipelineExecutor::prepareProcessor` argument type. [#6494](https://github.com/ClickHouse/ClickHouse/pull/6494) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Added a test for bad URIs. [#6493](https://github.com/ClickHouse/ClickHouse/pull/6493) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Added more checks to `CAST` function. This should get more information about segmentation fault in fuzzy test. [#6346](https://github.com/ClickHouse/ClickHouse/pull/6346) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Added `gcc-9` support to `docker/builder` container that builds image locally. [#6333](https://github.com/ClickHouse/ClickHouse/pull/6333) ([Gleb Novikov](https://github.com/NanoBjorn)) -* Test for primary key with `LowCardinality(String)`. [#5044](https://github.com/ClickHouse/ClickHouse/issues/5044) [#6219](https://github.com/ClickHouse/ClickHouse/pull/6219) ([dimarub2000](https://github.com/dimarub2000)) -* Fixed tests affected by slow stack traces printing. [#6315](https://github.com/ClickHouse/ClickHouse/pull/6315) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Add a test case for crash in `groupUniqArray` fixed in [#6029](https://github.com/ClickHouse/ClickHouse/pull/6029). [#4402](https://github.com/ClickHouse/ClickHouse/issues/4402) [#6129](https://github.com/ClickHouse/ClickHouse/pull/6129) ([akuzm](https://github.com/akuzm)) -* Fixed indices mutations tests. [#6645](https://github.com/ClickHouse/ClickHouse/pull/6645) ([Nikita Vasilev](https://github.com/nikvas0)) -* In performance test, do not read query log for queries we didn't run. [#6427](https://github.com/ClickHouse/ClickHouse/pull/6427) ([akuzm](https://github.com/akuzm)) -* Materialized view now could be created with any low cardinality types regardless to the setting about suspicious low cardinality types. [#6428](https://github.com/ClickHouse/ClickHouse/pull/6428) ([Olga Khvostikova](https://github.com/stavrolia)) -* Updated tests for `send_logs_level` setting. [#6207](https://github.com/ClickHouse/ClickHouse/pull/6207) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Fix build under gcc-8.2. [#6196](https://github.com/ClickHouse/ClickHouse/pull/6196) ([Max Akhmedov](https://github.com/zlobober)) -* Fix build with internal libc++. [#6724](https://github.com/ClickHouse/ClickHouse/pull/6724) ([Ivan](https://github.com/abyss7)) -* Fix shared build with `rdkafka` library [#6101](https://github.com/ClickHouse/ClickHouse/pull/6101) ([Ivan](https://github.com/abyss7)) -* Fixes for Mac OS build (incomplete). [#6390](https://github.com/ClickHouse/ClickHouse/pull/6390) ([alexey-milovidov](https://github.com/alexey-milovidov)) [#6429](https://github.com/ClickHouse/ClickHouse/pull/6429) ([alex-zaitsev](https://github.com/alex-zaitsev)) -* Fix "splitted" build. [#6618](https://github.com/ClickHouse/ClickHouse/pull/6618) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Other build fixes: [#6186](https://github.com/ClickHouse/ClickHouse/pull/6186) ([Amos Bird](https://github.com/amosbird)) [#6486](https://github.com/ClickHouse/ClickHouse/pull/6486) [#6348](https://github.com/ClickHouse/ClickHouse/pull/6348) ([vxider](https://github.com/Vxider)) [#6744](https://github.com/ClickHouse/ClickHouse/pull/6744) ([Ivan](https://github.com/abyss7)) [#6016](https://github.com/ClickHouse/ClickHouse/pull/6016) [#6421](https://github.com/ClickHouse/ClickHouse/pull/6421) [#6491](https://github.com/ClickHouse/ClickHouse/pull/6491) ([proller](https://github.com/proller)) - -#### Backward Incompatible Change -* Removed rarely used table function `catBoostPool` and storage `CatBoostPool`. If you have used this table function, please write email to `clickhouse-feedback@yandex-team.com`. Note that CatBoost integration remains and will be supported. [#6279](https://github.com/ClickHouse/ClickHouse/pull/6279) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Disable `ANY RIGHT JOIN` and `ANY FULL JOIN` by default. Set `any_join_distinct_right_table_keys` setting to enable them. [#5126](https://github.com/ClickHouse/ClickHouse/issues/5126) [#6351](https://github.com/ClickHouse/ClickHouse/pull/6351) ([Artem Zuikov](https://github.com/4ertus2)) - -## ClickHouse release 19.13 -### ClickHouse release 19.13.6.51, 2019-10-02 - -#### Bug Fix -* This release also contains all bug fixes from 19.11.12.69. - -### ClickHouse release 19.13.5.44, 2019-09-20 - -#### Bug Fix -* This release also contains all bug fixes from 19.14.6.12. -* Fixed possible inconsistent state of table while executing `DROP` query for replicated table while zookeeper is not accessible. [#6045](https://github.com/ClickHouse/ClickHouse/issues/6045) [#6413](https://github.com/ClickHouse/ClickHouse/pull/6413) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) -* Fix for data race in StorageMerge [#6717](https://github.com/ClickHouse/ClickHouse/pull/6717) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix bug introduced in query profiler which leads to endless recv from socket. [#6386](https://github.com/ClickHouse/ClickHouse/pull/6386) ([alesapin](https://github.com/alesapin)) -* Fix excessive CPU usage while executing `JSONExtractRaw` function over a boolean value. [#6208](https://github.com/ClickHouse/ClickHouse/pull/6208) ([Vitaly Baranov](https://github.com/vitlibar)) -* Fixes the regression while pushing to materialized view. [#6415](https://github.com/ClickHouse/ClickHouse/pull/6415) ([Ivan](https://github.com/abyss7)) -* Table function `url` had the vulnerability allowed the attacker to inject arbitrary HTTP headers in the request. This issue was found by [Nikita Tikhomirov](https://github.com/NSTikhomirov). [#6466](https://github.com/ClickHouse/ClickHouse/pull/6466) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix useless `AST` check in Set index. [#6510](https://github.com/ClickHouse/ClickHouse/issues/6510) [#6651](https://github.com/ClickHouse/ClickHouse/pull/6651) ([Nikita Vasilev](https://github.com/nikvas0)) -* Fixed parsing of `AggregateFunction` values embedded in query. [#6575](https://github.com/ClickHouse/ClickHouse/issues/6575) [#6773](https://github.com/ClickHouse/ClickHouse/pull/6773) ([Zhichang Yu](https://github.com/yuzhichang)) -* Fixed wrong behaviour of `trim` functions family. [#6647](https://github.com/ClickHouse/ClickHouse/pull/6647) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.13.4.32, 2019-09-10 - -#### Bug Fix -* This release also contains all bug security fixes from 19.11.9.52 and 19.11.10.54. -* Fixed data race in `system.parts` table and `ALTER` query. [#6245](https://github.com/ClickHouse/ClickHouse/issues/6245) [#6513](https://github.com/ClickHouse/ClickHouse/pull/6513) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed mismatched header in streams happened in case of reading from empty distributed table with sample and prewhere. [#6167](https://github.com/ClickHouse/ClickHouse/issues/6167) ([Lixiang Qian](https://github.com/fancyqlx)) [#6823](https://github.com/ClickHouse/ClickHouse/pull/6823) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Fixed crash when using `IN` clause with a subquery with a tuple. [#6125](https://github.com/ClickHouse/ClickHouse/issues/6125) [#6550](https://github.com/ClickHouse/ClickHouse/pull/6550) ([tavplubix](https://github.com/tavplubix)) -* Fix case with same column names in `GLOBAL JOIN ON` section. [#6181](https://github.com/ClickHouse/ClickHouse/pull/6181) ([Artem Zuikov](https://github.com/4ertus2)) -* Fix crash when casting types to `Decimal` that do not support it. Throw exception instead. [#6297](https://github.com/ClickHouse/ClickHouse/pull/6297) ([Artem Zuikov](https://github.com/4ertus2)) -* Fixed crash in `extractAll()` function. [#6644](https://github.com/ClickHouse/ClickHouse/pull/6644) ([Artem Zuikov](https://github.com/4ertus2)) -* Query transformation for `MySQL`, `ODBC`, `JDBC` table functions now works properly for `SELECT WHERE` queries with multiple `AND` expressions. [#6381](https://github.com/ClickHouse/ClickHouse/issues/6381) [#6676](https://github.com/ClickHouse/ClickHouse/pull/6676) ([dimarub2000](https://github.com/dimarub2000)) -* Added previous declaration checks for MySQL 8 integration. [#6569](https://github.com/ClickHouse/ClickHouse/pull/6569) ([Rafael David Tinoco](https://github.com/rafaeldtinoco)) - -#### Security Fix -* Fix two vulnerabilities in codecs in decompression phase (malicious user can fabricate compressed data that will lead to buffer overflow in decompression). [#6670](https://github.com/ClickHouse/ClickHouse/pull/6670) ([Artem Zuikov](https://github.com/4ertus2)) - - -### ClickHouse release 19.13.3.26, 2019-08-22 - -#### Bug Fix -* Fix `ALTER TABLE ... UPDATE` query for tables with `enable_mixed_granularity_parts=1`. [#6543](https://github.com/ClickHouse/ClickHouse/pull/6543) ([alesapin](https://github.com/alesapin)) -* Fix NPE when using IN clause with a subquery with a tuple. [#6125](https://github.com/ClickHouse/ClickHouse/issues/6125) [#6550](https://github.com/ClickHouse/ClickHouse/pull/6550) ([tavplubix](https://github.com/tavplubix)) -* Fixed an issue that if a stale replica becomes alive, it may still have data parts that were removed by DROP PARTITION. [#6522](https://github.com/ClickHouse/ClickHouse/issues/6522) [#6523](https://github.com/ClickHouse/ClickHouse/pull/6523) ([tavplubix](https://github.com/tavplubix)) -* Fixed issue with parsing CSV [#6426](https://github.com/ClickHouse/ClickHouse/issues/6426) [#6559](https://github.com/ClickHouse/ClickHouse/pull/6559) ([tavplubix](https://github.com/tavplubix)) -* Fixed data race in system.parts table and ALTER query. This fixes [#6245](https://github.com/ClickHouse/ClickHouse/issues/6245). [#6513](https://github.com/ClickHouse/ClickHouse/pull/6513) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed wrong code in mutations that may lead to memory corruption. Fixed segfault with read of address `0x14c0` that may happed due to concurrent `DROP TABLE` and `SELECT` from `system.parts` or `system.parts_columns`. Fixed race condition in preparation of mutation queries. Fixed deadlock caused by `OPTIMIZE` of Replicated tables and concurrent modification operations like ALTERs. [#6514](https://github.com/ClickHouse/ClickHouse/pull/6514) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed possible data loss after `ALTER DELETE` query on table with skipping index. [#6224](https://github.com/ClickHouse/ClickHouse/issues/6224) [#6282](https://github.com/ClickHouse/ClickHouse/pull/6282) ([Nikita Vasilev](https://github.com/nikvas0)) - -#### Security Fix -* If the attacker has write access to ZooKeeper and is able to run custom server available from the network where ClickHouse run, it can create custom-built malicious server that will act as ClickHouse replica and register it in ZooKeeper. When another replica will fetch data part from malicious replica, it can force clickhouse-server to write to arbitrary path on filesystem. Found by Eldar Zaitov, information security team at Yandex. [#6247](https://github.com/ClickHouse/ClickHouse/pull/6247) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.13.2.19, 2019-08-14 - -#### New Feature -* Sampling profiler on query level. [Example](https://gist.github.com/alexey-milovidov/92758583dd41c24c360fdb8d6a4da194). [#4247](https://github.com/ClickHouse/ClickHouse/issues/4247) ([laplab](https://github.com/laplab)) [#6124](https://github.com/ClickHouse/ClickHouse/pull/6124) ([alexey-milovidov](https://github.com/alexey-milovidov)) [#6250](https://github.com/ClickHouse/ClickHouse/pull/6250) [#6283](https://github.com/ClickHouse/ClickHouse/pull/6283) [#6386](https://github.com/ClickHouse/ClickHouse/pull/6386) -* Allow to specify a list of columns with `COLUMNS('regexp')` expression that works like a more sophisticated variant of `*` asterisk. [#5951](https://github.com/ClickHouse/ClickHouse/pull/5951) ([mfridental](https://github.com/mfridental)), ([alexey-milovidov](https://github.com/alexey-milovidov)) -* `CREATE TABLE AS table_function()` is now possible [#6057](https://github.com/ClickHouse/ClickHouse/pull/6057) ([dimarub2000](https://github.com/dimarub2000)) -* Adam optimizer for stochastic gradient descent is used by default in `stochasticLinearRegression()` and `stochasticLogisticRegression()` aggregate functions, because it shows good quality without almost any tuning. [#6000](https://github.com/ClickHouse/ClickHouse/pull/6000) ([Quid37](https://github.com/Quid37)) -* Added functions for working with the сustom week number [#5212](https://github.com/ClickHouse/ClickHouse/pull/5212) ([Andy Yang](https://github.com/andyyzh)) -* `RENAME` queries now work with all storages. [#5953](https://github.com/ClickHouse/ClickHouse/pull/5953) ([Ivan](https://github.com/abyss7)) -* Now client receive logs from server with any desired level by setting `send_logs_level` regardless to the log level specified in server settings. [#5964](https://github.com/ClickHouse/ClickHouse/pull/5964) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) - -#### Backward Incompatible Change -* The setting `input_format_defaults_for_omitted_fields` is enabled by default. Inserts in Distributed tables need this setting to be the same on cluster (you need to set it before rolling update). It enables calculation of complex default expressions for omitted fields in `JSONEachRow` and `CSV*` formats. It should be the expected behavior but may lead to negligible performance difference. [#6043](https://github.com/ClickHouse/ClickHouse/pull/6043) ([Artem Zuikov](https://github.com/4ertus2)), [#5625](https://github.com/ClickHouse/ClickHouse/pull/5625) ([akuzm](https://github.com/akuzm)) - -#### Experimental features -* New query processing pipeline. Use `experimental_use_processors=1` option to enable it. Use for your own trouble. [#4914](https://github.com/ClickHouse/ClickHouse/pull/4914) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) - -#### Bug Fix -* Kafka integration has been fixed in this version. -* Fixed `DoubleDelta` encoding of `Int64` for large `DoubleDelta` values, improved `DoubleDelta` encoding for random data for `Int32`. [#5998](https://github.com/ClickHouse/ClickHouse/pull/5998) ([Vasily Nemkov](https://github.com/Enmk)) -* Fixed overestimation of `max_rows_to_read` if the setting `merge_tree_uniform_read_distribution` is set to 0. [#6019](https://github.com/ClickHouse/ClickHouse/pull/6019) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Improvement -* Throws an exception if `config.d` file doesn't have the corresponding root element as the config file [#6123](https://github.com/ClickHouse/ClickHouse/pull/6123) ([dimarub2000](https://github.com/dimarub2000)) - -#### Performance Improvement -* Optimize `count()`. Now it uses the smallest column (if possible). [#6028](https://github.com/ClickHouse/ClickHouse/pull/6028) ([Amos Bird](https://github.com/amosbird)) - -#### Build/Testing/Packaging Improvement -* Report memory usage in performance tests. [#5899](https://github.com/ClickHouse/ClickHouse/pull/5899) ([akuzm](https://github.com/akuzm)) -* Fix build with external `libcxx` [#6010](https://github.com/ClickHouse/ClickHouse/pull/6010) ([Ivan](https://github.com/abyss7)) -* Fix shared build with `rdkafka` library [#6101](https://github.com/ClickHouse/ClickHouse/pull/6101) ([Ivan](https://github.com/abyss7)) - -## ClickHouse release 19.11 - -### ClickHouse release 19.11.13.74, 2019-11-01 - -#### Bug Fix -* Fixed rare crash in `ALTER MODIFY COLUMN` and vertical merge when one of merged/altered parts is empty (0 rows). [#6780](https://github.com/ClickHouse/ClickHouse/pull/6780) ([alesapin](https://github.com/alesapin)) -* Manual update of `SIMDJSON`. This fixes possible flooding of stderr files with bogus json diagnostic messages. [#7548](https://github.com/ClickHouse/ClickHouse/pull/7548) ([Alexander Kazakov](https://github.com/Akazz)) -* Fixed bug with `mrk` file extension for mutations ([alesapin](https://github.com/alesapin)) - -### ClickHouse release 19.11.12.69, 2019-10-02 - -#### Bug Fix -* Fixed performance degradation of index analysis on complex keys on large tables. This fixes [#6924](https://github.com/ClickHouse/ClickHouse/issues/6924). [#7075](https://github.com/ClickHouse/ClickHouse/pull/7075) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Avoid rare SIGSEGV while sending data in tables with Distributed engine (`Failed to send batch: file with index XXXXX is absent`). [#7032](https://github.com/ClickHouse/ClickHouse/pull/7032) ([Azat Khuzhin](https://github.com/azat)) -* Fix `Unknown identifier` with multiple joins. This fixes [#5254](https://github.com/ClickHouse/ClickHouse/issues/5254). [#7022](https://github.com/ClickHouse/ClickHouse/pull/7022) ([Artem Zuikov](https://github.com/4ertus2)) - -### ClickHouse release 19.11.11.57, 2019-09-13 -* Fix logical error causing segfaults when selecting from Kafka empty topic. [#6902](https://github.com/ClickHouse/ClickHouse/issues/6902) [#6909](https://github.com/ClickHouse/ClickHouse/pull/6909) ([Ivan](https://github.com/abyss7)) -* Fix for function `АrrayEnumerateUniqRanked` with empty arrays in params. [#6928](https://github.com/ClickHouse/ClickHouse/pull/6928) ([proller](https://github.com/proller)) - -### ClickHouse release 19.11.10.54, 2019-09-10 - -#### Bug Fix -* Do store offsets for Kafka messages manually to be able to commit them all at once for all partitions. Fixes potential duplication in "one consumer - many partitions" scenario. [#6872](https://github.com/ClickHouse/ClickHouse/pull/6872) ([Ivan](https://github.com/abyss7)) - -### ClickHouse release 19.11.9.52, 2019-09-6 -* Improve error handling in cache dictionaries. [#6737](https://github.com/ClickHouse/ClickHouse/pull/6737) ([Vitaly Baranov](https://github.com/vitlibar)) -* Fixed bug in function `arrayEnumerateUniqRanked`. [#6779](https://github.com/ClickHouse/ClickHouse/pull/6779) ([proller](https://github.com/proller)) -* Fix `JSONExtract` function while extracting a `Tuple` from JSON. [#6718](https://github.com/ClickHouse/ClickHouse/pull/6718) ([Vitaly Baranov](https://github.com/vitlibar)) -* Fixed possible data loss after `ALTER DELETE` query on table with skipping index. [#6224](https://github.com/ClickHouse/ClickHouse/issues/6224) [#6282](https://github.com/ClickHouse/ClickHouse/pull/6282) ([Nikita Vasilev](https://github.com/nikvas0)) -* Fixed performance test. [#6392](https://github.com/ClickHouse/ClickHouse/pull/6392) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Parquet: Fix reading boolean columns. [#6579](https://github.com/ClickHouse/ClickHouse/pull/6579) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed wrong behaviour of `nullIf` function for constant arguments. [#6518](https://github.com/ClickHouse/ClickHouse/pull/6518) ([Guillaume Tassery](https://github.com/YiuRULE)) [#6580](https://github.com/ClickHouse/ClickHouse/pull/6580) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix Kafka messages duplication problem on normal server restart. [#6597](https://github.com/ClickHouse/ClickHouse/pull/6597) ([Ivan](https://github.com/abyss7)) -* Fixed an issue when long `ALTER UPDATE` or `ALTER DELETE` may prevent regular merges to run. Prevent mutations from executing if there is no enough free threads available. [#6502](https://github.com/ClickHouse/ClickHouse/issues/6502) [#6617](https://github.com/ClickHouse/ClickHouse/pull/6617) ([tavplubix](https://github.com/tavplubix)) -* Fixed error with processing "timezone" in server configuration file. [#6709](https://github.com/ClickHouse/ClickHouse/pull/6709) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix kafka tests. [#6805](https://github.com/ClickHouse/ClickHouse/pull/6805) ([Ivan](https://github.com/abyss7)) - -#### Security Fix -* If the attacker has write access to ZooKeeper and is able to run custom server available from the network where ClickHouse runs, it can create custom-built malicious server that will act as ClickHouse replica and register it in ZooKeeper. When another replica will fetch data part from malicious replica, it can force clickhouse-server to write to arbitrary path on filesystem. Found by Eldar Zaitov, information security team at Yandex. [#6247](https://github.com/ClickHouse/ClickHouse/pull/6247) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.11.8.46, 2019-08-22 - -#### Bug Fix -* Fix `ALTER TABLE ... UPDATE` query for tables with `enable_mixed_granularity_parts=1`. [#6543](https://github.com/ClickHouse/ClickHouse/pull/6543) ([alesapin](https://github.com/alesapin)) -* Fix NPE when using IN clause with a subquery with a tuple. [#6125](https://github.com/ClickHouse/ClickHouse/issues/6125) [#6550](https://github.com/ClickHouse/ClickHouse/pull/6550) ([tavplubix](https://github.com/tavplubix)) -* Fixed an issue that if a stale replica becomes alive, it may still have data parts that were removed by DROP PARTITION. [#6522](https://github.com/ClickHouse/ClickHouse/issues/6522) [#6523](https://github.com/ClickHouse/ClickHouse/pull/6523) ([tavplubix](https://github.com/tavplubix)) -* Fixed issue with parsing CSV [#6426](https://github.com/ClickHouse/ClickHouse/issues/6426) [#6559](https://github.com/ClickHouse/ClickHouse/pull/6559) ([tavplubix](https://github.com/tavplubix)) -* Fixed data race in system.parts table and ALTER query. This fixes [#6245](https://github.com/ClickHouse/ClickHouse/issues/6245). [#6513](https://github.com/ClickHouse/ClickHouse/pull/6513) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed wrong code in mutations that may lead to memory corruption. Fixed segfault with read of address `0x14c0` that may happed due to concurrent `DROP TABLE` and `SELECT` from `system.parts` or `system.parts_columns`. Fixed race condition in preparation of mutation queries. Fixed deadlock caused by `OPTIMIZE` of Replicated tables and concurrent modification operations like ALTERs. [#6514](https://github.com/ClickHouse/ClickHouse/pull/6514) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.11.7.40, 2019-08-14 - -#### Bug fix -* Kafka integration has been fixed in this version. -* Fix segfault when using `arrayReduce` for constant arguments. [#6326](https://github.com/ClickHouse/ClickHouse/pull/6326) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed `toFloat()` monotonicity. [#6374](https://github.com/ClickHouse/ClickHouse/pull/6374) ([dimarub2000](https://github.com/dimarub2000)) -* Fix segfault with enabled `optimize_skip_unused_shards` and missing sharding key. [#6384](https://github.com/ClickHouse/ClickHouse/pull/6384) ([CurtizJ](https://github.com/CurtizJ)) -* Fixed logic of `arrayEnumerateUniqRanked` function. [#6423](https://github.com/ClickHouse/ClickHouse/pull/6423) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Removed extra verbose logging from MySQL handler. [#6389](https://github.com/ClickHouse/ClickHouse/pull/6389) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix wrong behavior and possible segfaults in `topK` and `topKWeighted` aggregated functions. [#6404](https://github.com/ClickHouse/ClickHouse/pull/6404) ([CurtizJ](https://github.com/CurtizJ)) -* Do not expose virtual columns in `system.columns` table. This is required for backward compatibility. [#6406](https://github.com/ClickHouse/ClickHouse/pull/6406) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix bug with memory allocation for string fields in complex key cache dictionary. [#6447](https://github.com/ClickHouse/ClickHouse/pull/6447) ([alesapin](https://github.com/alesapin)) -* Fix bug with enabling adaptive granularity when creating new replica for `Replicated*MergeTree` table. [#6452](https://github.com/ClickHouse/ClickHouse/pull/6452) ([alesapin](https://github.com/alesapin)) -* Fix infinite loop when reading Kafka messages. [#6354](https://github.com/ClickHouse/ClickHouse/pull/6354) ([abyss7](https://github.com/abyss7)) -* Fixed the possibility of a fabricated query to cause server crash due to stack overflow in SQL parser and possibility of stack overflow in `Merge` and `Distributed` tables [#6433](https://github.com/ClickHouse/ClickHouse/pull/6433) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed Gorilla encoding error on small sequences. [#6444](https://github.com/ClickHouse/ClickHouse/pull/6444) ([Enmk](https://github.com/Enmk)) - -#### Improvement -* Allow user to override `poll_interval` and `idle_connection_timeout` settings on connection. [#6230](https://github.com/ClickHouse/ClickHouse/pull/6230) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.11.5.28, 2019-08-05 - -#### Bug fix -* Fixed the possibility of hanging queries when server is overloaded. [#6301](https://github.com/ClickHouse/ClickHouse/pull/6301) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix FPE in yandexConsistentHash function. This fixes [#6304](https://github.com/ClickHouse/ClickHouse/issues/6304). [#6126](https://github.com/ClickHouse/ClickHouse/pull/6126) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed bug in conversion of `LowCardinality` types in `AggregateFunctionFactory`. This fixes [#6257](https://github.com/ClickHouse/ClickHouse/issues/6257). [#6281](https://github.com/ClickHouse/ClickHouse/pull/6281) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Fix parsing of `bool` settings from `true` and `false` strings in configuration files. [#6278](https://github.com/ClickHouse/ClickHouse/pull/6278) ([alesapin](https://github.com/alesapin)) -* Fix rare bug with incompatible stream headers in queries to `Distributed` table over `MergeTree` table when part of `WHERE` moves to `PREWHERE`. [#6236](https://github.com/ClickHouse/ClickHouse/pull/6236) ([alesapin](https://github.com/alesapin)) -* Fixed overflow in integer division of signed type to unsigned type. This fixes [#6214](https://github.com/ClickHouse/ClickHouse/issues/6214). [#6233](https://github.com/ClickHouse/ClickHouse/pull/6233) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Backward Incompatible Change -* `Kafka` still broken. - -### ClickHouse release 19.11.4.24, 2019-08-01 - -#### Bug Fix -* Fix bug with writing secondary indices marks with adaptive granularity. [#6126](https://github.com/ClickHouse/ClickHouse/pull/6126) ([alesapin](https://github.com/alesapin)) -* Fix `WITH ROLLUP` and `WITH CUBE` modifiers of `GROUP BY` with two-level aggregation. [#6225](https://github.com/ClickHouse/ClickHouse/pull/6225) ([Anton Popov](https://github.com/CurtizJ)) -* Fixed hang in `JSONExtractRaw` function. Fixed [#6195](https://github.com/ClickHouse/ClickHouse/issues/6195) [#6198](https://github.com/ClickHouse/ClickHouse/pull/6198) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix segfault in ExternalLoader::reloadOutdated(). [#6082](https://github.com/ClickHouse/ClickHouse/pull/6082) ([Vitaly Baranov](https://github.com/vitlibar)) -* Fixed the case when server may close listening sockets but not shutdown and continue serving remaining queries. You may end up with two running clickhouse-server processes. Sometimes, the server may return an error `bad_function_call` for remaining queries. [#6231](https://github.com/ClickHouse/ClickHouse/pull/6231) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed useless and incorrect condition on update field for initial loading of external dictionaries via ODBC, MySQL, ClickHouse and HTTP. This fixes [#6069](https://github.com/ClickHouse/ClickHouse/issues/6069) [#6083](https://github.com/ClickHouse/ClickHouse/pull/6083) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed irrelevant exception in cast of `LowCardinality(Nullable)` to not-Nullable column in case if it doesn't contain Nulls (e.g. in query like `SELECT CAST(CAST('Hello' AS LowCardinality(Nullable(String))) AS String)`. [#6094](https://github.com/ClickHouse/ClickHouse/issues/6094) [#6119](https://github.com/ClickHouse/ClickHouse/pull/6119) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Fix non-deterministic result of "uniq" aggregate function in extreme rare cases. The bug was present in all ClickHouse versions. [#6058](https://github.com/ClickHouse/ClickHouse/pull/6058) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Segfault when we set a little bit too high CIDR on the function `IPv6CIDRToRange`. [#6068](https://github.com/ClickHouse/ClickHouse/pull/6068) ([Guillaume Tassery](https://github.com/YiuRULE)) -* Fixed small memory leak when server throw many exceptions from many different contexts. [#6144](https://github.com/ClickHouse/ClickHouse/pull/6144) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix the situation when consumer got paused before subscription and not resumed afterwards. [#6075](https://github.com/ClickHouse/ClickHouse/pull/6075) ([Ivan](https://github.com/abyss7)) Note that Kafka is broken in this version. -* Clearing the Kafka data buffer from the previous read operation that was completed with an error [#6026](https://github.com/ClickHouse/ClickHouse/pull/6026) ([Nikolay](https://github.com/bopohaa)) Note that Kafka is broken in this version. -* Since `StorageMergeTree::background_task_handle` is initialized in `startup()` the `MergeTreeBlockOutputStream::write()` may try to use it before initialization. Just check if it is initialized. [#6080](https://github.com/ClickHouse/ClickHouse/pull/6080) ([Ivan](https://github.com/abyss7)) - -#### Build/Testing/Packaging Improvement -* Added official `rpm` packages. [#5740](https://github.com/ClickHouse/ClickHouse/pull/5740) ([proller](https://github.com/proller)) ([alesapin](https://github.com/alesapin)) -* Add an ability to build `.rpm` and `.tgz` packages with `packager` script. [#5769](https://github.com/ClickHouse/ClickHouse/pull/5769) ([alesapin](https://github.com/alesapin)) -* Fixes for "Arcadia" build system. [#6223](https://github.com/ClickHouse/ClickHouse/pull/6223) ([proller](https://github.com/proller)) - -#### Backward Incompatible Change -* `Kafka` is broken in this version. - - -### ClickHouse release 19.11.3.11, 2019-07-18 - -#### New Feature -* Added support for prepared statements. [#5331](https://github.com/ClickHouse/ClickHouse/pull/5331/) ([Alexander](https://github.com/sanych73)) [#5630](https://github.com/ClickHouse/ClickHouse/pull/5630) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* `DoubleDelta` and `Gorilla` column codecs [#5600](https://github.com/ClickHouse/ClickHouse/pull/5600) ([Vasily Nemkov](https://github.com/Enmk)) -* Added `os_thread_priority` setting that allows to control the "nice" value of query processing threads that is used by OS to adjust dynamic scheduling priority. It requires `CAP_SYS_NICE` capabilities to work. This implements [#5858](https://github.com/ClickHouse/ClickHouse/issues/5858) [#5909](https://github.com/ClickHouse/ClickHouse/pull/5909) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Implement `_topic`, `_offset`, `_key` columns for Kafka engine [#5382](https://github.com/ClickHouse/ClickHouse/pull/5382) ([Ivan](https://github.com/abyss7)) Note that Kafka is broken in this version. -* Add aggregate function combinator `-Resample` [#5590](https://github.com/ClickHouse/ClickHouse/pull/5590) ([hcz](https://github.com/hczhcz)) -* Aggregate functions `groupArrayMovingSum(win_size)(x)` and `groupArrayMovingAvg(win_size)(x)`, which calculate moving sum/avg with or without window-size limitation. [#5595](https://github.com/ClickHouse/ClickHouse/pull/5595) ([inv2004](https://github.com/inv2004)) -* Add synonim `arrayFlatten` <-> `flatten` [#5764](https://github.com/ClickHouse/ClickHouse/pull/5764) ([hcz](https://github.com/hczhcz)) -* Intergate H3 function `geoToH3` from Uber. [#4724](https://github.com/ClickHouse/ClickHouse/pull/4724) ([Remen Ivan](https://github.com/BHYCHIK)) [#5805](https://github.com/ClickHouse/ClickHouse/pull/5805) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Bug Fix -* Implement DNS cache with asynchronous update. Separate thread resolves all hosts and updates DNS cache with period (setting `dns_cache_update_period`). It should help, when ip of hosts changes frequently. [#5857](https://github.com/ClickHouse/ClickHouse/pull/5857) ([Anton Popov](https://github.com/CurtizJ)) -* Fix segfault in `Delta` codec which affects columns with values less than 32 bits size. The bug led to random memory corruption. [#5786](https://github.com/ClickHouse/ClickHouse/pull/5786) ([alesapin](https://github.com/alesapin)) -* Fix segfault in TTL merge with non-physical columns in block. [#5819](https://github.com/ClickHouse/ClickHouse/pull/5819) ([Anton Popov](https://github.com/CurtizJ)) -* Fix rare bug in checking of part with `LowCardinality` column. Previously `checkDataPart` always fails for part with `LowCardinality` column. [#5832](https://github.com/ClickHouse/ClickHouse/pull/5832) ([alesapin](https://github.com/alesapin)) -* Avoid hanging connections when server thread pool is full. It is important for connections from `remote` table function or connections to a shard without replicas when there is long connection timeout. This fixes [#5878](https://github.com/ClickHouse/ClickHouse/issues/5878) [#5881](https://github.com/ClickHouse/ClickHouse/pull/5881) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Support for constant arguments to `evalMLModel` function. This fixes [#5817](https://github.com/ClickHouse/ClickHouse/issues/5817) [#5820](https://github.com/ClickHouse/ClickHouse/pull/5820) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed the issue when ClickHouse determines default time zone as `UCT` instead of `UTC`. This fixes [#5804](https://github.com/ClickHouse/ClickHouse/issues/5804). [#5828](https://github.com/ClickHouse/ClickHouse/pull/5828) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed buffer underflow in `visitParamExtractRaw`. This fixes [#5901](https://github.com/ClickHouse/ClickHouse/issues/5901) [#5902](https://github.com/ClickHouse/ClickHouse/pull/5902) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Now distributed `DROP/ALTER/TRUNCATE/OPTIMIZE ON CLUSTER` queries will be executed directly on leader replica. [#5757](https://github.com/ClickHouse/ClickHouse/pull/5757) ([alesapin](https://github.com/alesapin)) -* Fix `coalesce` for `ColumnConst` with `ColumnNullable` + related changes. [#5755](https://github.com/ClickHouse/ClickHouse/pull/5755) ([Artem Zuikov](https://github.com/4ertus2)) -* Fix the `ReadBufferFromKafkaConsumer` so that it keeps reading new messages after `commit()` even if it was stalled before [#5852](https://github.com/ClickHouse/ClickHouse/pull/5852) ([Ivan](https://github.com/abyss7)) -* Fix `FULL` and `RIGHT` JOIN results when joining on `Nullable` keys in right table. [#5859](https://github.com/ClickHouse/ClickHouse/pull/5859) ([Artem Zuikov](https://github.com/4ertus2)) -* Possible fix of infinite sleeping of low-priority queries. [#5842](https://github.com/ClickHouse/ClickHouse/pull/5842) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix race condition, which cause that some queries may not appear in query_log after `SYSTEM FLUSH LOGS` query. [#5456](https://github.com/ClickHouse/ClickHouse/issues/5456) [#5685](https://github.com/ClickHouse/ClickHouse/pull/5685) ([Anton Popov](https://github.com/CurtizJ)) -* Fixed `heap-use-after-free` ASan warning in ClusterCopier caused by watch which try to use already removed copier object. [#5871](https://github.com/ClickHouse/ClickHouse/pull/5871) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Fixed wrong `StringRef` pointer returned by some implementations of `IColumn::deserializeAndInsertFromArena`. This bug affected only unit-tests. [#5973](https://github.com/ClickHouse/ClickHouse/pull/5973) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Prevent source and intermediate array join columns of masking same name columns. [#5941](https://github.com/ClickHouse/ClickHouse/pull/5941) ([Artem Zuikov](https://github.com/4ertus2)) -* Fix insert and select query to MySQL engine with MySQL style identifier quoting. [#5704](https://github.com/ClickHouse/ClickHouse/pull/5704) ([Winter Zhang](https://github.com/zhang2014)) -* Now `CHECK TABLE` query can work with MergeTree engine family. It returns check status and message if any for each part (or file in case of simplier engines). Also, fix bug in fetch of a broken part. [#5865](https://github.com/ClickHouse/ClickHouse/pull/5865) ([alesapin](https://github.com/alesapin)) -* Fix SPLIT_SHARED_LIBRARIES runtime [#5793](https://github.com/ClickHouse/ClickHouse/pull/5793) ([Danila Kutenin](https://github.com/danlark1)) -* Fixed time zone initialization when `/etc/localtime` is a relative symlink like `../usr/share/zoneinfo/Europe/Moscow` [#5922](https://github.com/ClickHouse/ClickHouse/pull/5922) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* clickhouse-copier: Fix use-after free on shutdown [#5752](https://github.com/ClickHouse/ClickHouse/pull/5752) ([proller](https://github.com/proller)) -* Updated `simdjson`. Fixed the issue that some invalid JSONs with zero bytes successfully parse. [#5938](https://github.com/ClickHouse/ClickHouse/pull/5938) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix shutdown of SystemLogs [#5802](https://github.com/ClickHouse/ClickHouse/pull/5802) ([Anton Popov](https://github.com/CurtizJ)) -* Fix hanging when condition in invalidate_query depends on a dictionary. [#6011](https://github.com/ClickHouse/ClickHouse/pull/6011) ([Vitaly Baranov](https://github.com/vitlibar)) - -#### Improvement -* Allow unresolvable addresses in cluster configuration. They will be considered unavailable and tried to resolve at every connection attempt. This is especially useful for Kubernetes. This fixes [#5714](https://github.com/ClickHouse/ClickHouse/issues/5714) [#5924](https://github.com/ClickHouse/ClickHouse/pull/5924) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Close idle TCP connections (with one hour timeout by default). This is especially important for large clusters with multiple distributed tables on every server, because every server can possibly keep a connection pool to every other server, and after peak query concurrency, connections will stall. This fixes [#5879](https://github.com/ClickHouse/ClickHouse/issues/5879) [#5880](https://github.com/ClickHouse/ClickHouse/pull/5880) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Better quality of `topK` function. Changed the SavingSpace set behavior to remove the last element if the new element have a bigger weight. [#5833](https://github.com/ClickHouse/ClickHouse/issues/5833) [#5850](https://github.com/ClickHouse/ClickHouse/pull/5850) ([Guillaume Tassery](https://github.com/YiuRULE)) -* URL functions to work with domains now can work for incomplete URLs without scheme [#5725](https://github.com/ClickHouse/ClickHouse/pull/5725) ([alesapin](https://github.com/alesapin)) -* Checksums added to the `system.parts_columns` table. [#5874](https://github.com/ClickHouse/ClickHouse/pull/5874) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) -* Added `Enum` data type as a synonim for `Enum8` or `Enum16`. [#5886](https://github.com/ClickHouse/ClickHouse/pull/5886) ([dimarub2000](https://github.com/dimarub2000)) -* Full bit transpose variant for `T64` codec. Could lead to better compression with `zstd`. [#5742](https://github.com/ClickHouse/ClickHouse/pull/5742) ([Artem Zuikov](https://github.com/4ertus2)) -* Condition on `startsWith` function now can uses primary key. This fixes [#5310](https://github.com/ClickHouse/ClickHouse/issues/5310) and [#5882](https://github.com/ClickHouse/ClickHouse/issues/5882) [#5919](https://github.com/ClickHouse/ClickHouse/pull/5919) ([dimarub2000](https://github.com/dimarub2000)) -* Allow to use `clickhouse-copier` with cross-replication cluster topology by permitting empty database name. [#5745](https://github.com/ClickHouse/ClickHouse/pull/5745) ([nvartolomei](https://github.com/nvartolomei)) -* Use `UTC` as default timezone on a system without `tzdata` (e.g. bare Docker container). Before this patch, error message `Could not determine local time zone` was printed and server or client refused to start. [#5827](https://github.com/ClickHouse/ClickHouse/pull/5827) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Returned back support for floating point argument in function `quantileTiming` for backward compatibility. [#5911](https://github.com/ClickHouse/ClickHouse/pull/5911) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Show which table is missing column in error messages. [#5768](https://github.com/ClickHouse/ClickHouse/pull/5768) ([Ivan](https://github.com/abyss7)) -* Disallow run query with same query_id by various users [#5430](https://github.com/ClickHouse/ClickHouse/pull/5430) ([proller](https://github.com/proller)) -* More robust code for sending metrics to Graphite. It will work even during long multiple `RENAME TABLE` operation. [#5875](https://github.com/ClickHouse/ClickHouse/pull/5875) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* More informative error messages will be displayed when ThreadPool cannot schedule a task for execution. This fixes [#5305](https://github.com/ClickHouse/ClickHouse/issues/5305) [#5801](https://github.com/ClickHouse/ClickHouse/pull/5801) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Inverting ngramSearch to be more intuitive [#5807](https://github.com/ClickHouse/ClickHouse/pull/5807) ([Danila Kutenin](https://github.com/danlark1)) -* Add user parsing in HDFS engine builder [#5946](https://github.com/ClickHouse/ClickHouse/pull/5946) ([akonyaev90](https://github.com/akonyaev90)) -* Update default value of `max_ast_elements parameter` [#5933](https://github.com/ClickHouse/ClickHouse/pull/5933) ([Artem Konovalov](https://github.com/izebit)) -* Added a notion of obsolete settings. The obsolete setting `allow_experimental_low_cardinality_type` can be used with no effect. [0f15c01c6802f7ce1a1494c12c846be8c98944cd](https://github.com/ClickHouse/ClickHouse/commit/0f15c01c6802f7ce1a1494c12c846be8c98944cd) [Alexey Milovidov](https://github.com/alexey-milovidov) - -#### Performance Improvement -* Increase number of streams to SELECT from Merge table for more uniform distribution of threads. Added setting `max_streams_multiplier_for_merge_tables`. This fixes [#5797](https://github.com/ClickHouse/ClickHouse/issues/5797) [#5915](https://github.com/ClickHouse/ClickHouse/pull/5915) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Build/Testing/Packaging Improvement -* Add a backward compatibility test for client-server interaction with different versions of clickhouse. [#5868](https://github.com/ClickHouse/ClickHouse/pull/5868) ([alesapin](https://github.com/alesapin)) -* Test coverage information in every commit and pull request. [#5896](https://github.com/ClickHouse/ClickHouse/pull/5896) ([alesapin](https://github.com/alesapin)) -* Cooperate with address sanitizer to support our custom allocators (`Arena` and `ArenaWithFreeLists`) for better debugging of "use-after-free" errors. [#5728](https://github.com/ClickHouse/ClickHouse/pull/5728) ([akuzm](https://github.com/akuzm)) -* Switch to [LLVM libunwind implementation](https://github.com/llvm-mirror/libunwind) for C++ exception handling and for stack traces printing [#4828](https://github.com/ClickHouse/ClickHouse/pull/4828) ([Nikita Lapkov](https://github.com/laplab)) -* Add two more warnings from -Weverything [#5923](https://github.com/ClickHouse/ClickHouse/pull/5923) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Allow to build ClickHouse with Memory Sanitizer. [#3949](https://github.com/ClickHouse/ClickHouse/pull/3949) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed ubsan report about `bitTest` function in fuzz test. [#5943](https://github.com/ClickHouse/ClickHouse/pull/5943) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Docker: added possibility to init a ClickHouse instance which requires authentication. [#5727](https://github.com/ClickHouse/ClickHouse/pull/5727) ([Korviakov Andrey](https://github.com/shurshun)) -* Update librdkafka to version 1.1.0 [#5872](https://github.com/ClickHouse/ClickHouse/pull/5872) ([Ivan](https://github.com/abyss7)) -* Add global timeout for integration tests and disable some of them in tests code. [#5741](https://github.com/ClickHouse/ClickHouse/pull/5741) ([alesapin](https://github.com/alesapin)) -* Fix some ThreadSanitizer failures. [#5854](https://github.com/ClickHouse/ClickHouse/pull/5854) ([akuzm](https://github.com/akuzm)) -* The `--no-undefined` option forces the linker to check all external names for existence while linking. It's very useful to track real dependencies between libraries in the split build mode. [#5855](https://github.com/ClickHouse/ClickHouse/pull/5855) ([Ivan](https://github.com/abyss7)) -* Added performance test for [#5797](https://github.com/ClickHouse/ClickHouse/issues/5797) [#5914](https://github.com/ClickHouse/ClickHouse/pull/5914) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed compatibility with gcc-7. [#5840](https://github.com/ClickHouse/ClickHouse/pull/5840) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Added support for gcc-9. This fixes [#5717](https://github.com/ClickHouse/ClickHouse/issues/5717) [#5774](https://github.com/ClickHouse/ClickHouse/pull/5774) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed error when libunwind can be linked incorrectly. [#5948](https://github.com/ClickHouse/ClickHouse/pull/5948) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed a few warnings found by PVS-Studio. [#5921](https://github.com/ClickHouse/ClickHouse/pull/5921) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Added initial support for `clang-tidy` static analyzer. [#5806](https://github.com/ClickHouse/ClickHouse/pull/5806) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Convert BSD/Linux endian macros( 'be64toh' and 'htobe64') to the Mac OS X equivalents [#5785](https://github.com/ClickHouse/ClickHouse/pull/5785) ([Fu Chen](https://github.com/fredchenbj)) -* Improved integration tests guide. [#5796](https://github.com/ClickHouse/ClickHouse/pull/5796) ([Vladimir Chebotarev](https://github.com/excitoon)) -* Fixing build at macosx + gcc9 [#5822](https://github.com/ClickHouse/ClickHouse/pull/5822) ([filimonov](https://github.com/filimonov)) -* Fix a hard-to-spot typo: aggreAGte -> aggregate. [#5753](https://github.com/ClickHouse/ClickHouse/pull/5753) ([akuzm](https://github.com/akuzm)) -* Fix freebsd build [#5760](https://github.com/ClickHouse/ClickHouse/pull/5760) ([proller](https://github.com/proller)) -* Add link to experimental YouTube channel to website [#5845](https://github.com/ClickHouse/ClickHouse/pull/5845) ([Ivan Blinkov](https://github.com/blinkov)) -* CMake: add option for coverage flags: WITH_COVERAGE [#5776](https://github.com/ClickHouse/ClickHouse/pull/5776) ([proller](https://github.com/proller)) -* Fix initial size of some inline PODArray's. [#5787](https://github.com/ClickHouse/ClickHouse/pull/5787) ([akuzm](https://github.com/akuzm)) -* clickhouse-server.postinst: fix os detection for centos 6 [#5788](https://github.com/ClickHouse/ClickHouse/pull/5788) ([proller](https://github.com/proller)) -* Added Arch linux package generation. [#5719](https://github.com/ClickHouse/ClickHouse/pull/5719) ([Vladimir Chebotarev](https://github.com/excitoon)) -* Split Common/config.h by libs (dbms) [#5715](https://github.com/ClickHouse/ClickHouse/pull/5715) ([proller](https://github.com/proller)) -* Fixes for "Arcadia" build platform [#5795](https://github.com/ClickHouse/ClickHouse/pull/5795) ([proller](https://github.com/proller)) -* Fixes for unconventional build (gcc9, no submodules) [#5792](https://github.com/ClickHouse/ClickHouse/pull/5792) ([proller](https://github.com/proller)) -* Require explicit type in unalignedStore because it was proven to be bug-prone [#5791](https://github.com/ClickHouse/ClickHouse/pull/5791) ([akuzm](https://github.com/akuzm)) -* Fixes MacOS build [#5830](https://github.com/ClickHouse/ClickHouse/pull/5830) ([filimonov](https://github.com/filimonov)) -* Performance test concerning the new JIT feature with bigger dataset, as requested here [#5263](https://github.com/ClickHouse/ClickHouse/issues/5263) [#5887](https://github.com/ClickHouse/ClickHouse/pull/5887) ([Guillaume Tassery](https://github.com/YiuRULE)) -* Run stateful tests in stress test [12693e568722f11e19859742f56428455501fd2a](https://github.com/ClickHouse/ClickHouse/commit/12693e568722f11e19859742f56428455501fd2a) ([alesapin](https://github.com/alesapin)) - -#### Backward Incompatible Change -* `Kafka` is broken in this version. -* Enable `adaptive_index_granularity` = 10MB by default for new `MergeTree` tables. If you created new MergeTree tables on version 19.11+, downgrade to versions prior to 19.6 will be impossible. [#5628](https://github.com/ClickHouse/ClickHouse/pull/5628) ([alesapin](https://github.com/alesapin)) -* Removed obsolete undocumented embedded dictionaries that were used by Yandex.Metrica. The functions `OSIn`, `SEIn`, `OSToRoot`, `SEToRoot`, `OSHierarchy`, `SEHierarchy` are no longer available. If you are using these functions, write email to clickhouse-feedback@yandex-team.com. Note: at the last moment we decided to keep these functions for a while. [#5780](https://github.com/ClickHouse/ClickHouse/pull/5780) ([alexey-milovidov](https://github.com/alexey-milovidov)) - - -## ClickHouse release 19.10 -### ClickHouse release 19.10.1.5, 2019-07-12 - -#### New Feature -* Add new column codec: `T64`. Made for (U)IntX/EnumX/Data(Time)/DecimalX columns. It should be good for columns with constant or small range values. Codec itself allows enlarge or shrink data type without re-compression. [#5557](https://github.com/ClickHouse/ClickHouse/pull/5557) ([Artem Zuikov](https://github.com/4ertus2)) -* Add database engine `MySQL` that allow to view all the tables in remote MySQL server [#5599](https://github.com/ClickHouse/ClickHouse/pull/5599) ([Winter Zhang](https://github.com/zhang2014)) -* `bitmapContains` implementation. It's 2x faster than `bitmapHasAny` if the second bitmap contains one element. [#5535](https://github.com/ClickHouse/ClickHouse/pull/5535) ([Zhichang Yu](https://github.com/yuzhichang)) -* Support for `crc32` function (with behaviour exactly as in MySQL or PHP). Do not use it if you need a hash function. [#5661](https://github.com/ClickHouse/ClickHouse/pull/5661) ([Remen Ivan](https://github.com/BHYCHIK)) -* Implemented `SYSTEM START/STOP DISTRIBUTED SENDS` queries to control asynchronous inserts into `Distributed` tables. [#4935](https://github.com/ClickHouse/ClickHouse/pull/4935) ([Winter Zhang](https://github.com/zhang2014)) - -#### Bug Fix -* Ignore query execution limits and max parts size for merge limits while executing mutations. [#5659](https://github.com/ClickHouse/ClickHouse/pull/5659) ([Anton Popov](https://github.com/CurtizJ)) -* Fix bug which may lead to deduplication of normal blocks (extremely rare) and insertion of duplicate blocks (more often). [#5549](https://github.com/ClickHouse/ClickHouse/pull/5549) ([alesapin](https://github.com/alesapin)) -* Fix of function `arrayEnumerateUniqRanked` for arguments with empty arrays [#5559](https://github.com/ClickHouse/ClickHouse/pull/5559) ([proller](https://github.com/proller)) -* Don't subscribe to Kafka topics without intent to poll any messages. [#5698](https://github.com/ClickHouse/ClickHouse/pull/5698) ([Ivan](https://github.com/abyss7)) -* Make setting `join_use_nulls` get no effect for types that cannot be inside Nullable [#5700](https://github.com/ClickHouse/ClickHouse/pull/5700) ([Olga Khvostikova](https://github.com/stavrolia)) -* Fixed `Incorrect size of index granularity` errors [#5720](https://github.com/ClickHouse/ClickHouse/pull/5720) ([coraxster](https://github.com/coraxster)) -* Fix Float to Decimal convert overflow [#5607](https://github.com/ClickHouse/ClickHouse/pull/5607) ([coraxster](https://github.com/coraxster)) -* Flush buffer when `WriteBufferFromHDFS`'s destructor is called. This fixes writing into `HDFS`. [#5684](https://github.com/ClickHouse/ClickHouse/pull/5684) ([Xindong Peng](https://github.com/eejoin)) - -#### Improvement -* Treat empty cells in `CSV` as default values when the setting `input_format_defaults_for_omitted_fields` is enabled. [#5625](https://github.com/ClickHouse/ClickHouse/pull/5625) ([akuzm](https://github.com/akuzm)) -* Non-blocking loading of external dictionaries. [#5567](https://github.com/ClickHouse/ClickHouse/pull/5567) ([Vitaly Baranov](https://github.com/vitlibar)) -* Network timeouts can be dynamically changed for already established connections according to the settings. [#4558](https://github.com/ClickHouse/ClickHouse/pull/4558) ([Konstantin Podshumok](https://github.com/podshumok)) -* Using "public_suffix_list" for functions `firstSignificantSubdomain`, `cutToFirstSignificantSubdomain`. It's using a perfect hash table generated by `gperf` with a list generated from the file: [https://publicsuffix.org/list/public_suffix_list.dat](https://publicsuffix.org/list/public_suffix_list.dat). (for example, now we recognize the domain `ac.uk` as non-significant). [#5030](https://github.com/ClickHouse/ClickHouse/pull/5030) ([Guillaume Tassery](https://github.com/YiuRULE)) -* Adopted `IPv6` data type in system tables; unified client info columns in `system.processes` and `system.query_log` [#5640](https://github.com/ClickHouse/ClickHouse/pull/5640) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Using sessions for connections with MySQL compatibility protocol. #5476 [#5646](https://github.com/ClickHouse/ClickHouse/pull/5646) ([Yuriy Baranov](https://github.com/yurriy)) -* Support more `ALTER` queries `ON CLUSTER`. [#5593](https://github.com/ClickHouse/ClickHouse/pull/5593) [#5613](https://github.com/ClickHouse/ClickHouse/pull/5613) ([sundyli](https://github.com/sundy-li)) -* Support `` section in `clickhouse-local` config file. [#5540](https://github.com/ClickHouse/ClickHouse/pull/5540) ([proller](https://github.com/proller)) -* Allow run query with `remote` table function in `clickhouse-local` [#5627](https://github.com/ClickHouse/ClickHouse/pull/5627) ([proller](https://github.com/proller)) - -#### Performance Improvement -* Add the possibility to write the final mark at the end of MergeTree columns. It allows to avoid useless reads for keys that are out of table data range. It is enabled only if adaptive index granularity is in use. [#5624](https://github.com/ClickHouse/ClickHouse/pull/5624) ([alesapin](https://github.com/alesapin)) -* Improved performance of MergeTree tables on very slow filesystems by reducing number of `stat` syscalls. [#5648](https://github.com/ClickHouse/ClickHouse/pull/5648) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed performance degradation in reading from MergeTree tables that was introduced in version 19.6. Fixes #5631. [#5633](https://github.com/ClickHouse/ClickHouse/pull/5633) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Build/Testing/Packaging Improvement -* Implemented `TestKeeper` as an implementation of ZooKeeper interface used for testing [#5643](https://github.com/ClickHouse/ClickHouse/pull/5643) ([alexey-milovidov](https://github.com/alexey-milovidov)) ([levushkin aleksej](https://github.com/alexey-milovidov)) -* From now on `.sql` tests can be run isolated by server, in parallel, with random database. It allows to run them faster, add new tests with custom server configurations, and be sure that different tests doesn't affect each other. [#5554](https://github.com/ClickHouse/ClickHouse/pull/5554) ([Ivan](https://github.com/abyss7)) -* Remove `` and `` from performance tests [#5672](https://github.com/ClickHouse/ClickHouse/pull/5672) ([Olga Khvostikova](https://github.com/stavrolia)) -* Fixed "select_format" performance test for `Pretty` formats [#5642](https://github.com/ClickHouse/ClickHouse/pull/5642) ([alexey-milovidov](https://github.com/alexey-milovidov)) - - -## ClickHouse release 19.9 -### ClickHouse release 19.9.3.31, 2019-07-05 - -#### Bug Fix -* Fix segfault in Delta codec which affects columns with values less than 32 bits size. The bug led to random memory corruption. [#5786](https://github.com/ClickHouse/ClickHouse/pull/5786) ([alesapin](https://github.com/alesapin)) -* Fix rare bug in checking of part with LowCardinality column. [#5832](https://github.com/ClickHouse/ClickHouse/pull/5832) ([alesapin](https://github.com/alesapin)) -* Fix segfault in TTL merge with non-physical columns in block. [#5819](https://github.com/ClickHouse/ClickHouse/pull/5819) ([Anton Popov](https://github.com/CurtizJ)) -* Fix potential infinite sleeping of low-priority queries. [#5842](https://github.com/ClickHouse/ClickHouse/pull/5842) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix how ClickHouse determines default time zone as UCT instead of UTC. [#5828](https://github.com/ClickHouse/ClickHouse/pull/5828) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix bug about executing distributed DROP/ALTER/TRUNCATE/OPTIMIZE ON CLUSTER queries on follower replica before leader replica. Now they will be executed directly on leader replica. [#5757](https://github.com/ClickHouse/ClickHouse/pull/5757) ([alesapin](https://github.com/alesapin)) -* Fix race condition, which cause that some queries may not appear in query_log instantly after SYSTEM FLUSH LOGS query. [#5685](https://github.com/ClickHouse/ClickHouse/pull/5685) ([Anton Popov](https://github.com/CurtizJ)) -* Added missing support for constant arguments to `evalMLModel` function. [#5820](https://github.com/ClickHouse/ClickHouse/pull/5820) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.9.2.4, 2019-06-24 - -#### New Feature -* Print information about frozen parts in `system.parts` table. [#5471](https://github.com/ClickHouse/ClickHouse/pull/5471) ([proller](https://github.com/proller)) -* Ask client password on clickhouse-client start on tty if not set in arguments [#5092](https://github.com/ClickHouse/ClickHouse/pull/5092) ([proller](https://github.com/proller)) -* Implement `dictGet` and `dictGetOrDefault` functions for Decimal types. [#5394](https://github.com/ClickHouse/ClickHouse/pull/5394) ([Artem Zuikov](https://github.com/4ertus2)) - -#### Improvement -* Debian init: Add service stop timeout [#5522](https://github.com/ClickHouse/ClickHouse/pull/5522) ([proller](https://github.com/proller)) -* Add setting forbidden by default to create table with suspicious types for LowCardinality [#5448](https://github.com/ClickHouse/ClickHouse/pull/5448) ([Olga Khvostikova](https://github.com/stavrolia)) -* Regression functions return model weights when not used as State in function `evalMLMethod`. [#5411](https://github.com/ClickHouse/ClickHouse/pull/5411) ([Quid37](https://github.com/Quid37)) -* Rename and improve regression methods. [#5492](https://github.com/ClickHouse/ClickHouse/pull/5492) ([Quid37](https://github.com/Quid37)) -* Clearer interfaces of string searchers. [#5586](https://github.com/ClickHouse/ClickHouse/pull/5586) ([Danila Kutenin](https://github.com/danlark1)) - -#### Bug Fix -* Fix potential data loss in Kafka [#5445](https://github.com/ClickHouse/ClickHouse/pull/5445) ([Ivan](https://github.com/abyss7)) -* Fix potential infinite loop in `PrettySpace` format when called with zero columns [#5560](https://github.com/ClickHouse/ClickHouse/pull/5560) ([Olga Khvostikova](https://github.com/stavrolia)) -* Fixed UInt32 overflow bug in linear models. Allow eval ML model for non-const model argument. [#5516](https://github.com/ClickHouse/ClickHouse/pull/5516) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* `ALTER TABLE ... DROP INDEX IF EXISTS ...` should not raise an exception if provided index does not exist [#5524](https://github.com/ClickHouse/ClickHouse/pull/5524) ([Gleb Novikov](https://github.com/NanoBjorn)) -* Fix segfault with `bitmapHasAny` in scalar subquery [#5528](https://github.com/ClickHouse/ClickHouse/pull/5528) ([Zhichang Yu](https://github.com/yuzhichang)) -* Fixed error when replication connection pool doesn't retry to resolve host, even when DNS cache was dropped. [#5534](https://github.com/ClickHouse/ClickHouse/pull/5534) ([alesapin](https://github.com/alesapin)) -* Fixed `ALTER ... MODIFY TTL` on ReplicatedMergeTree. [#5539](https://github.com/ClickHouse/ClickHouse/pull/5539) ([Anton Popov](https://github.com/CurtizJ)) -* Fix INSERT into Distributed table with MATERIALIZED column [#5429](https://github.com/ClickHouse/ClickHouse/pull/5429) ([Azat Khuzhin](https://github.com/azat)) -* Fix bad alloc when truncate Join storage [#5437](https://github.com/ClickHouse/ClickHouse/pull/5437) ([TCeason](https://github.com/TCeason)) -* In recent versions of package tzdata some of files are symlinks now. The current mechanism for detecting default timezone gets broken and gives wrong names for some timezones. Now at least we force the timezone name to the contents of TZ if provided. [#5443](https://github.com/ClickHouse/ClickHouse/pull/5443) ([Ivan](https://github.com/abyss7)) -* Fix some extremely rare cases with MultiVolnitsky searcher when the constant needles in sum are at least 16KB long. The algorithm missed or overwrote the previous results which can lead to the incorrect result of `multiSearchAny`. [#5588](https://github.com/ClickHouse/ClickHouse/pull/5588) ([Danila Kutenin](https://github.com/danlark1)) -* Fix the issue when settings for ExternalData requests couldn't use ClickHouse settings. Also, for now, settings `date_time_input_format` and `low_cardinality_allow_in_native_format` cannot be used because of the ambiguity of names (in external data it can be interpreted as table format and in the query it can be a setting). [#5455](https://github.com/ClickHouse/ClickHouse/pull/5455) ([Danila Kutenin](https://github.com/danlark1)) -* Fix bug when parts were removed only from FS without dropping them from Zookeeper. [#5520](https://github.com/ClickHouse/ClickHouse/pull/5520) ([alesapin](https://github.com/alesapin)) -* Remove debug logging from MySQL protocol [#5478](https://github.com/ClickHouse/ClickHouse/pull/5478) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Skip ZNONODE during DDL query processing [#5489](https://github.com/ClickHouse/ClickHouse/pull/5489) ([Azat Khuzhin](https://github.com/azat)) -* Fix mix `UNION ALL` result column type. There were cases with inconsistent data and column types of resulting columns. [#5503](https://github.com/ClickHouse/ClickHouse/pull/5503) ([Artem Zuikov](https://github.com/4ertus2)) -* Throw an exception on wrong integers in `dictGetT` functions instead of crash. [#5446](https://github.com/ClickHouse/ClickHouse/pull/5446) ([Artem Zuikov](https://github.com/4ertus2)) -* Fix wrong element_count and load_factor for hashed dictionary in `system.dictionaries` table. [#5440](https://github.com/ClickHouse/ClickHouse/pull/5440) ([Azat Khuzhin](https://github.com/azat)) - -#### Build/Testing/Packaging Improvement -* Fixed build without `Brotli` HTTP compression support (`ENABLE_BROTLI=OFF` cmake variable). [#5521](https://github.com/ClickHouse/ClickHouse/pull/5521) ([Anton Yuzhaninov](https://github.com/citrin)) -* Include roaring.h as roaring/roaring.h [#5523](https://github.com/ClickHouse/ClickHouse/pull/5523) ([Orivej Desh](https://github.com/orivej)) -* Fix gcc9 warnings in hyperscan (#line directive is evil!) [#5546](https://github.com/ClickHouse/ClickHouse/pull/5546) ([Danila Kutenin](https://github.com/danlark1)) -* Fix all warnings when compiling with gcc-9. Fix some contrib issues. Fix gcc9 ICE and submit it to bugzilla. [#5498](https://github.com/ClickHouse/ClickHouse/pull/5498) ([Danila Kutenin](https://github.com/danlark1)) -* Fixed linking with lld [#5477](https://github.com/ClickHouse/ClickHouse/pull/5477) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Remove unused specializations in dictionaries [#5452](https://github.com/ClickHouse/ClickHouse/pull/5452) ([Artem Zuikov](https://github.com/4ertus2)) -* Improvement performance tests for formatting and parsing tables for different types of files [#5497](https://github.com/ClickHouse/ClickHouse/pull/5497) ([Olga Khvostikova](https://github.com/stavrolia)) -* Fixes for parallel test run [#5506](https://github.com/ClickHouse/ClickHouse/pull/5506) ([proller](https://github.com/proller)) -* Docker: use configs from clickhouse-test [#5531](https://github.com/ClickHouse/ClickHouse/pull/5531) ([proller](https://github.com/proller)) -* Fix compile for FreeBSD [#5447](https://github.com/ClickHouse/ClickHouse/pull/5447) ([proller](https://github.com/proller)) -* Upgrade boost to 1.70 [#5570](https://github.com/ClickHouse/ClickHouse/pull/5570) ([proller](https://github.com/proller)) -* Fix build clickhouse as submodule [#5574](https://github.com/ClickHouse/ClickHouse/pull/5574) ([proller](https://github.com/proller)) -* Improve JSONExtract performance tests [#5444](https://github.com/ClickHouse/ClickHouse/pull/5444) ([Vitaly Baranov](https://github.com/vitlibar)) - -## ClickHouse release 19.8 -### ClickHouse release 19.8.3.8, 2019-06-11 - -#### New Features -* Added functions to work with JSON [#4686](https://github.com/ClickHouse/ClickHouse/pull/4686) ([hcz](https://github.com/hczhcz)) [#5124](https://github.com/ClickHouse/ClickHouse/pull/5124). ([Vitaly Baranov](https://github.com/vitlibar)) -* Add a function basename, with a similar behaviour to a basename function, which exists in a lot of languages (`os.path.basename` in python, `basename` in PHP, etc...). Work with both an UNIX-like path or a Windows path. [#5136](https://github.com/ClickHouse/ClickHouse/pull/5136) ([Guillaume Tassery](https://github.com/YiuRULE)) -* Added `LIMIT n, m BY` or `LIMIT m OFFSET n BY` syntax to set offset of n for LIMIT BY clause. [#5138](https://github.com/ClickHouse/ClickHouse/pull/5138) ([Anton Popov](https://github.com/CurtizJ)) -* Added new data type `SimpleAggregateFunction`, which allows to have columns with light aggregation in an `AggregatingMergeTree`. This can only be used with simple functions like `any`, `anyLast`, `sum`, `min`, `max`. [#4629](https://github.com/ClickHouse/ClickHouse/pull/4629) ([Boris Granveaud](https://github.com/bgranvea)) -* Added support for non-constant arguments in function `ngramDistance` [#5198](https://github.com/ClickHouse/ClickHouse/pull/5198) ([Danila Kutenin](https://github.com/danlark1)) -* Added functions `skewPop`, `skewSamp`, `kurtPop` and `kurtSamp` to compute for sequence skewness, sample skewness, kurtosis and sample kurtosis respectively. [#5200](https://github.com/ClickHouse/ClickHouse/pull/5200) ([hcz](https://github.com/hczhcz)) -* Support rename operation for `MaterializeView` storage. [#5209](https://github.com/ClickHouse/ClickHouse/pull/5209) ([Guillaume Tassery](https://github.com/YiuRULE)) -* Added server which allows connecting to ClickHouse using MySQL client. [#4715](https://github.com/ClickHouse/ClickHouse/pull/4715) ([Yuriy Baranov](https://github.com/yurriy)) -* Add `toDecimal*OrZero` and `toDecimal*OrNull` functions. [#5291](https://github.com/ClickHouse/ClickHouse/pull/5291) ([Artem Zuikov](https://github.com/4ertus2)) -* Support Decimal types in functions: `quantile`, `quantiles`, `median`, `quantileExactWeighted`, `quantilesExactWeighted`, medianExactWeighted. [#5304](https://github.com/ClickHouse/ClickHouse/pull/5304) ([Artem Zuikov](https://github.com/4ertus2)) -* Added `toValidUTF8` function, which replaces all invalid UTF-8 characters by replacement character � (U+FFFD). [#5322](https://github.com/ClickHouse/ClickHouse/pull/5322) ([Danila Kutenin](https://github.com/danlark1)) -* Added `format` function. Formatting constant pattern (simplified Python format pattern) with the strings listed in the arguments. [#5330](https://github.com/ClickHouse/ClickHouse/pull/5330) ([Danila Kutenin](https://github.com/danlark1)) -* Added `system.detached_parts` table containing information about detached parts of `MergeTree` tables. [#5353](https://github.com/ClickHouse/ClickHouse/pull/5353) ([akuzm](https://github.com/akuzm)) -* Added `ngramSearch` function to calculate the non-symmetric difference between needle and haystack. [#5418](https://github.com/ClickHouse/ClickHouse/pull/5418)[#5422](https://github.com/ClickHouse/ClickHouse/pull/5422) ([Danila Kutenin](https://github.com/danlark1)) -* Implementation of basic machine learning methods (stochastic linear regression and logistic regression) using aggregate functions interface. Has different strategies for updating model weights (simple gradient descent, momentum method, Nesterov method). Also supports mini-batches of custom size. [#4943](https://github.com/ClickHouse/ClickHouse/pull/4943) ([Quid37](https://github.com/Quid37)) -* Implementation of `geohashEncode` and `geohashDecode` functions. [#5003](https://github.com/ClickHouse/ClickHouse/pull/5003) ([Vasily Nemkov](https://github.com/Enmk)) -* Added aggregate function `timeSeriesGroupSum`, which can aggregate different time series that sample timestamp not alignment. It will use linear interpolation between two sample timestamp and then sum time-series together. Added aggregate function `timeSeriesGroupRateSum`, which calculates the rate of time-series and then sum rates together. [#4542](https://github.com/ClickHouse/ClickHouse/pull/4542) ([Yangkuan Liu](https://github.com/LiuYangkuan)) -* Added functions `IPv4CIDRtoIPv4Range` and `IPv6CIDRtoIPv6Range` to calculate the lower and higher bounds for an IP in the subnet using a CIDR. [#5095](https://github.com/ClickHouse/ClickHouse/pull/5095) ([Guillaume Tassery](https://github.com/YiuRULE)) -* Add a X-ClickHouse-Summary header when we send a query using HTTP with enabled setting `send_progress_in_http_headers`. Return the usual information of X-ClickHouse-Progress, with additional information like how many rows and bytes were inserted in the query. [#5116](https://github.com/ClickHouse/ClickHouse/pull/5116) ([Guillaume Tassery](https://github.com/YiuRULE)) - -#### Improvements -* Added `max_parts_in_total` setting for MergeTree family of tables (default: 100 000) that prevents unsafe specification of partition key #5166. [#5171](https://github.com/ClickHouse/ClickHouse/pull/5171) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* `clickhouse-obfuscator`: derive seed for individual columns by combining initial seed with column name, not column position. This is intended to transform datasets with multiple related tables, so that tables will remain JOINable after transformation. [#5178](https://github.com/ClickHouse/ClickHouse/pull/5178) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Added functions `JSONExtractRaw`, `JSONExtractKeyAndValues`. Renamed functions `jsonExtract` to `JSONExtract`. When something goes wrong these functions return the correspondent values, not `NULL`. Modified function `JSONExtract`, now it gets the return type from its last parameter and doesn't inject nullables. Implemented fallback to RapidJSON in case AVX2 instructions are not available. Simdjson library updated to a new version. [#5235](https://github.com/ClickHouse/ClickHouse/pull/5235) ([Vitaly Baranov](https://github.com/vitlibar)) -* Now `if` and `multiIf` functions don't rely on the condition's `Nullable`, but rely on the branches for sql compatibility. [#5238](https://github.com/ClickHouse/ClickHouse/pull/5238) ([Jian Wu](https://github.com/janplus)) -* `In` predicate now generates `Null` result from `Null` input like the `Equal` function. [#5152](https://github.com/ClickHouse/ClickHouse/pull/5152) ([Jian Wu](https://github.com/janplus)) -* Check the time limit every (flush_interval / poll_timeout) number of rows from Kafka. This allows to break the reading from Kafka consumer more frequently and to check the time limits for the top-level streams [#5249](https://github.com/ClickHouse/ClickHouse/pull/5249) ([Ivan](https://github.com/abyss7)) -* Link rdkafka with bundled SASL. It should allow to use SASL SCRAM authentication [#5253](https://github.com/ClickHouse/ClickHouse/pull/5253) ([Ivan](https://github.com/abyss7)) -* Batched version of RowRefList for ALL JOINS. [#5267](https://github.com/ClickHouse/ClickHouse/pull/5267) ([Artem Zuikov](https://github.com/4ertus2)) -* clickhouse-server: more informative listen error messages. [#5268](https://github.com/ClickHouse/ClickHouse/pull/5268) ([proller](https://github.com/proller)) -* Support dictionaries in clickhouse-copier for functions in `` [#5270](https://github.com/ClickHouse/ClickHouse/pull/5270) ([proller](https://github.com/proller)) -* Add new setting `kafka_commit_every_batch` to regulate Kafka committing policy. -It allows to set commit mode: after every batch of messages is handled, or after the whole block is written to the storage. It's a trade-off between losing some messages or reading them twice in some extreme situations. [#5308](https://github.com/ClickHouse/ClickHouse/pull/5308) ([Ivan](https://github.com/abyss7)) -* Make `windowFunnel` support other Unsigned Integer Types. [#5320](https://github.com/ClickHouse/ClickHouse/pull/5320) ([sundyli](https://github.com/sundy-li)) -* Allow to shadow virtual column `_table` in Merge engine. [#5325](https://github.com/ClickHouse/ClickHouse/pull/5325) ([Ivan](https://github.com/abyss7)) -* Make `sequenceMatch` aggregate functions support other unsigned Integer types [#5339](https://github.com/ClickHouse/ClickHouse/pull/5339) ([sundyli](https://github.com/sundy-li)) -* Better error messages if checksum mismatch is most likely caused by hardware failures. [#5355](https://github.com/ClickHouse/ClickHouse/pull/5355) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Check that underlying tables support sampling for `StorageMerge` [#5366](https://github.com/ClickHouse/ClickHouse/pull/5366) ([Ivan](https://github.com/abyss7)) -* Сlose MySQL connections after their usage in external dictionaries. It is related to issue #893. [#5395](https://github.com/ClickHouse/ClickHouse/pull/5395) ([Clément Rodriguez](https://github.com/clemrodriguez)) -* Improvements of MySQL Wire Protocol. Changed name of format to MySQLWire. Using RAII for calling RSA_free. Disabling SSL if context cannot be created. [#5419](https://github.com/ClickHouse/ClickHouse/pull/5419) ([Yuriy Baranov](https://github.com/yurriy)) -* clickhouse-client: allow to run with unaccessable history file (read-only, no disk space, file is directory, ...). [#5431](https://github.com/ClickHouse/ClickHouse/pull/5431) ([proller](https://github.com/proller)) -* Respect query settings in asynchronous INSERTs into Distributed tables. [#4936](https://github.com/ClickHouse/ClickHouse/pull/4936) ([TCeason](https://github.com/TCeason)) -* Renamed functions `leastSqr` to `simpleLinearRegression`, `LinearRegression` to `linearRegression`, `LogisticRegression` to `logisticRegression`. [#5391](https://github.com/ClickHouse/ClickHouse/pull/5391) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) - -#### Performance Improvements -* Parallelize processing of parts of non-replicated MergeTree tables in ALTER MODIFY query. [#4639](https://github.com/ClickHouse/ClickHouse/pull/4639) ([Ivan Kush](https://github.com/IvanKush)) -* Optimizations in regular expressions extraction. [#5193](https://github.com/ClickHouse/ClickHouse/pull/5193) [#5191](https://github.com/ClickHouse/ClickHouse/pull/5191) ([Danila Kutenin](https://github.com/danlark1)) -* Do not add right join key column to join result if it's used only in join on section. [#5260](https://github.com/ClickHouse/ClickHouse/pull/5260) ([Artem Zuikov](https://github.com/4ertus2)) -* Freeze the Kafka buffer after first empty response. It avoids multiple invokations of `ReadBuffer::next()` for empty result in some row-parsing streams. [#5283](https://github.com/ClickHouse/ClickHouse/pull/5283) ([Ivan](https://github.com/abyss7)) -* `concat` function optimization for multiple arguments. [#5357](https://github.com/ClickHouse/ClickHouse/pull/5357) ([Danila Kutenin](https://github.com/danlark1)) -* Query optimisation. Allow push down IN statement while rewriting commа/cross join into inner one. [#5396](https://github.com/ClickHouse/ClickHouse/pull/5396) ([Artem Zuikov](https://github.com/4ertus2)) -* Upgrade our LZ4 implementation with reference one to have faster decompression. [#5070](https://github.com/ClickHouse/ClickHouse/pull/5070) ([Danila Kutenin](https://github.com/danlark1)) -* Implemented MSD radix sort (based on kxsort), and partial sorting. [#5129](https://github.com/ClickHouse/ClickHouse/pull/5129) ([Evgenii Pravda](https://github.com/kvinty)) - -#### Bug Fixes -* Fix push require columns with join [#5192](https://github.com/ClickHouse/ClickHouse/pull/5192) ([Winter Zhang](https://github.com/zhang2014)) -* Fixed bug, when ClickHouse is run by systemd, the command `sudo service clickhouse-server forcerestart` was not working as expected. [#5204](https://github.com/ClickHouse/ClickHouse/pull/5204) ([proller](https://github.com/proller)) -* Fix http error codes in DataPartsExchange (interserver http server on 9009 port always returned code 200, even on errors). [#5216](https://github.com/ClickHouse/ClickHouse/pull/5216) ([proller](https://github.com/proller)) -* Fix SimpleAggregateFunction for String longer than MAX_SMALL_STRING_SIZE [#5311](https://github.com/ClickHouse/ClickHouse/pull/5311) ([Azat Khuzhin](https://github.com/azat)) -* Fix error for `Decimal` to `Nullable(Decimal)` conversion in IN. Support other Decimal to Decimal conversions (including different scales). [#5350](https://github.com/ClickHouse/ClickHouse/pull/5350) ([Artem Zuikov](https://github.com/4ertus2)) -* Fixed FPU clobbering in simdjson library that lead to wrong calculation of `uniqHLL` and `uniqCombined` aggregate function and math functions such as `log`. [#5354](https://github.com/ClickHouse/ClickHouse/pull/5354) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed handling mixed const/nonconst cases in JSON functions. [#5435](https://github.com/ClickHouse/ClickHouse/pull/5435) ([Vitaly Baranov](https://github.com/vitlibar)) -* Fix `retention` function. Now all conditions that satisfy in a row of data are added to the data state. [#5119](https://github.com/ClickHouse/ClickHouse/pull/5119) ([小路](https://github.com/nicelulu)) -* Fix result type for `quantileExact` with Decimals. [#5304](https://github.com/ClickHouse/ClickHouse/pull/5304) ([Artem Zuikov](https://github.com/4ertus2)) - -#### Documentation -* Translate documentation for `CollapsingMergeTree` to chinese. [#5168](https://github.com/ClickHouse/ClickHouse/pull/5168) ([张风啸](https://github.com/AlexZFX)) -* Translate some documentation about table engines to chinese. - [#5134](https://github.com/ClickHouse/ClickHouse/pull/5134) - [#5328](https://github.com/ClickHouse/ClickHouse/pull/5328) - ([never lee](https://github.com/neverlee)) - - -#### Build/Testing/Packaging Improvements -* Fix some sanitizer reports that show probable use-after-free.[#5139](https://github.com/ClickHouse/ClickHouse/pull/5139) [#5143](https://github.com/ClickHouse/ClickHouse/pull/5143) [#5393](https://github.com/ClickHouse/ClickHouse/pull/5393) ([Ivan](https://github.com/abyss7)) -* Move performance tests out of separate directories for convenience. [#5158](https://github.com/ClickHouse/ClickHouse/pull/5158) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix incorrect performance tests. [#5255](https://github.com/ClickHouse/ClickHouse/pull/5255) ([alesapin](https://github.com/alesapin)) -* Added a tool to calculate checksums caused by bit flips to debug hardware issues. [#5334](https://github.com/ClickHouse/ClickHouse/pull/5334) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Make runner script more usable. [#5340](https://github.com/ClickHouse/ClickHouse/pull/5340)[#5360](https://github.com/ClickHouse/ClickHouse/pull/5360) ([filimonov](https://github.com/filimonov)) -* Add small instruction how to write performance tests. [#5408](https://github.com/ClickHouse/ClickHouse/pull/5408) ([alesapin](https://github.com/alesapin)) -* Add ability to make substitutions in create, fill and drop query in performance tests [#5367](https://github.com/ClickHouse/ClickHouse/pull/5367) ([Olga Khvostikova](https://github.com/stavrolia)) - -## ClickHouse release 19.7 - -### ClickHouse release 19.7.5.29, 2019-07-05 - -#### Bug Fix -* Fix performance regression in some queries with JOIN. [#5192](https://github.com/ClickHouse/ClickHouse/pull/5192) ([Winter Zhang](https://github.com/zhang2014)) - -### ClickHouse release 19.7.5.27, 2019-06-09 - -#### New features -* Added bitmap related functions `bitmapHasAny` and `bitmapHasAll` analogous to `hasAny` and `hasAll` functions for arrays. [#5279](https://github.com/ClickHouse/ClickHouse/pull/5279) ([Sergi Vladykin](https://github.com/svladykin)) - -#### Bug Fixes -* Fix segfault on `minmax` INDEX with Null value. [#5246](https://github.com/ClickHouse/ClickHouse/pull/5246) ([Nikita Vasilev](https://github.com/nikvas0)) -* Mark all input columns in LIMIT BY as required output. It fixes 'Not found column' error in some distributed queries. [#5407](https://github.com/ClickHouse/ClickHouse/pull/5407) ([Constantin S. Pan](https://github.com/kvap)) -* Fix "Column '0' already exists" error in `SELECT .. PREWHERE` on column with DEFAULT [#5397](https://github.com/ClickHouse/ClickHouse/pull/5397) ([proller](https://github.com/proller)) -* Fix `ALTER MODIFY TTL` query on `ReplicatedMergeTree`. [#5539](https://github.com/ClickHouse/ClickHouse/pull/5539/commits) ([Anton Popov](https://github.com/CurtizJ)) -* Don't crash the server when Kafka consumers have failed to start. [#5285](https://github.com/ClickHouse/ClickHouse/pull/5285) ([Ivan](https://github.com/abyss7)) -* Fixed bitmap functions produce wrong result. [#5359](https://github.com/ClickHouse/ClickHouse/pull/5359) ([Andy Yang](https://github.com/andyyzh)) -* Fix element_count for hashed dictionary (do not include duplicates) [#5440](https://github.com/ClickHouse/ClickHouse/pull/5440) ([Azat Khuzhin](https://github.com/azat)) -* Use contents of environment variable TZ as the name for timezone. It helps to correctly detect default timezone in some cases.[#5443](https://github.com/ClickHouse/ClickHouse/pull/5443) ([Ivan](https://github.com/abyss7)) -* Do not try to convert integers in `dictGetT` functions, because it doesn't work correctly. Throw an exception instead. [#5446](https://github.com/ClickHouse/ClickHouse/pull/5446) ([Artem Zuikov](https://github.com/4ertus2)) -* Fix settings in ExternalData HTTP request. [#5455](https://github.com/ClickHouse/ClickHouse/pull/5455) ([Danila - Kutenin](https://github.com/danlark1)) -* Fix bug when parts were removed only from FS without dropping them from Zookeeper. [#5520](https://github.com/ClickHouse/ClickHouse/pull/5520) ([alesapin](https://github.com/alesapin)) -* Fix segmentation fault in `bitmapHasAny` function. [#5528](https://github.com/ClickHouse/ClickHouse/pull/5528) ([Zhichang Yu](https://github.com/yuzhichang)) -* Fixed error when replication connection pool doesn't retry to resolve host, even when DNS cache was dropped. [#5534](https://github.com/ClickHouse/ClickHouse/pull/5534) ([alesapin](https://github.com/alesapin)) -* Fixed `DROP INDEX IF EXISTS` query. Now `ALTER TABLE ... DROP INDEX IF EXISTS ...` query doesn't raise an exception if provided index does not exist. [#5524](https://github.com/ClickHouse/ClickHouse/pull/5524) ([Gleb Novikov](https://github.com/NanoBjorn)) -* Fix union all supertype column. There were cases with inconsistent data and column types of resulting columns. [#5503](https://github.com/ClickHouse/ClickHouse/pull/5503) ([Artem Zuikov](https://github.com/4ertus2)) -* Skip ZNONODE during DDL query processing. Before if another node removes the znode in task queue, the one that -did not process it, but already get list of children, will terminate the DDLWorker thread. [#5489](https://github.com/ClickHouse/ClickHouse/pull/5489) ([Azat Khuzhin](https://github.com/azat)) -* Fix INSERT into Distributed() table with MATERIALIZED column. [#5429](https://github.com/ClickHouse/ClickHouse/pull/5429) ([Azat Khuzhin](https://github.com/azat)) - -### ClickHouse release 19.7.3.9, 2019-05-30 - -#### New Features -* Allow to limit the range of a setting that can be specified by user. - These constraints can be set up in user settings profile. -[#4931](https://github.com/ClickHouse/ClickHouse/pull/4931) ([Vitaly -Baranov](https://github.com/vitlibar)) -* Add a second version of the function `groupUniqArray` with an optional - `max_size` parameter that limits the size of the resulting array. This -behavior is similar to `groupArray(max_size)(x)` function. -[#5026](https://github.com/ClickHouse/ClickHouse/pull/5026) ([Guillaume -Tassery](https://github.com/YiuRULE)) -* For TSVWithNames/CSVWithNames input file formats, column order can now be - determined from file header. This is controlled by -`input_format_with_names_use_header` parameter. -[#5081](https://github.com/ClickHouse/ClickHouse/pull/5081) -([Alexander](https://github.com/Akazz)) - -#### Bug Fixes -* Crash with uncompressed_cache + JOIN during merge (#5197) -[#5133](https://github.com/ClickHouse/ClickHouse/pull/5133) ([Danila -Kutenin](https://github.com/danlark1)) -* Segmentation fault on a clickhouse-client query to system tables. #5066 -[#5127](https://github.com/ClickHouse/ClickHouse/pull/5127) -([Ivan](https://github.com/abyss7)) -* Data loss on heavy load via KafkaEngine (#4736) -[#5080](https://github.com/ClickHouse/ClickHouse/pull/5080) -([Ivan](https://github.com/abyss7)) -* Fixed very rare data race condition that could happen when executing a query with UNION ALL involving at least two SELECTs from system.columns, system.tables, system.parts, system.parts_tables or tables of Merge family and performing ALTER of columns of the related tables concurrently. [#5189](https://github.com/ClickHouse/ClickHouse/pull/5189) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Performance Improvements -* Use radix sort for sorting by single numeric column in `ORDER BY` without - `LIMIT`. [#5106](https://github.com/ClickHouse/ClickHouse/pull/5106), -[#4439](https://github.com/ClickHouse/ClickHouse/pull/4439) -([Evgenii Pravda](https://github.com/kvinty), -[alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Documentation -* Translate documentation for some table engines to Chinese. - [#5107](https://github.com/ClickHouse/ClickHouse/pull/5107), -[#5094](https://github.com/ClickHouse/ClickHouse/pull/5094), -[#5087](https://github.com/ClickHouse/ClickHouse/pull/5087) -([张风啸](https://github.com/AlexZFX)), -[#5068](https://github.com/ClickHouse/ClickHouse/pull/5068) ([never -lee](https://github.com/neverlee)) - -#### Build/Testing/Packaging Improvements -* Print UTF-8 characters properly in `clickhouse-test`. - [#5084](https://github.com/ClickHouse/ClickHouse/pull/5084) -([alexey-milovidov](https://github.com/alexey-milovidov)) -* Add command line parameter for clickhouse-client to always load suggestion - data. [#5102](https://github.com/ClickHouse/ClickHouse/pull/5102) -([alexey-milovidov](https://github.com/alexey-milovidov)) -* Resolve some of PVS-Studio warnings. - [#5082](https://github.com/ClickHouse/ClickHouse/pull/5082) -([alexey-milovidov](https://github.com/alexey-milovidov)) -* Update LZ4 [#5040](https://github.com/ClickHouse/ClickHouse/pull/5040) ([Danila - Kutenin](https://github.com/danlark1)) -* Add gperf to build requirements for upcoming pull request #5030. - [#5110](https://github.com/ClickHouse/ClickHouse/pull/5110) -([proller](https://github.com/proller)) - -## ClickHouse release 19.6 -### ClickHouse release 19.6.3.18, 2019-06-13 - -#### Bug Fixes -* Fixed IN condition pushdown for queries from table functions `mysql` and `odbc` and corresponding table engines. This fixes #3540 and #2384. [#5313](https://github.com/ClickHouse/ClickHouse/pull/5313) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix deadlock in Zookeeper. [#5297](https://github.com/ClickHouse/ClickHouse/pull/5297) ([github1youlc](https://github.com/github1youlc)) -* Allow quoted decimals in CSV. [#5284](https://github.com/ClickHouse/ClickHouse/pull/5284) ([Artem Zuikov](https://github.com/4ertus2) -* Disallow conversion from float Inf/NaN into Decimals (throw exception). [#5282](https://github.com/ClickHouse/ClickHouse/pull/5282) ([Artem Zuikov](https://github.com/4ertus2)) -* Fix data race in rename query. [#5247](https://github.com/ClickHouse/ClickHouse/pull/5247) ([Winter Zhang](https://github.com/zhang2014)) -* Temporarily disable LFAlloc. Usage of LFAlloc might lead to a lot of MAP_FAILED in allocating UncompressedCache and in a result to crashes of queries at high loaded servers. [cfdba93](https://github.com/ClickHouse/ClickHouse/commit/cfdba938ce22f16efeec504f7f90206a515b1280)([Danila Kutenin](https://github.com/danlark1)) - -### ClickHouse release 19.6.2.11, 2019-05-13 - -#### New Features -* TTL expressions for columns and tables. [#4212](https://github.com/ClickHouse/ClickHouse/pull/4212) ([Anton Popov](https://github.com/CurtizJ)) -* Added support for `brotli` compression for HTTP responses (Accept-Encoding: br) [#4388](https://github.com/ClickHouse/ClickHouse/pull/4388) ([Mikhail](https://github.com/fandyushin)) -* Added new function `isValidUTF8` for checking whether a set of bytes is correctly utf-8 encoded. [#4934](https://github.com/ClickHouse/ClickHouse/pull/4934) ([Danila Kutenin](https://github.com/danlark1)) -* Add new load balancing policy `first_or_random` which sends queries to the first specified host and if it's inaccessible send queries to random hosts of shard. Useful for cross-replication topology setups. [#5012](https://github.com/ClickHouse/ClickHouse/pull/5012) ([nvartolomei](https://github.com/nvartolomei)) - -#### Experimental Features -* Add setting `index_granularity_bytes` (adaptive index granularity) for MergeTree* tables family. [#4826](https://github.com/ClickHouse/ClickHouse/pull/4826) ([alesapin](https://github.com/alesapin)) - -#### Improvements -* Added support for non-constant and negative size and length arguments for function `substringUTF8`. [#4989](https://github.com/ClickHouse/ClickHouse/pull/4989) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Disable push-down to right table in left join, left table in right join, and both tables in full join. This fixes wrong JOIN results in some cases. [#4846](https://github.com/ClickHouse/ClickHouse/pull/4846) ([Ivan](https://github.com/abyss7)) -* `clickhouse-copier`: auto upload task configuration from `--task-file` option [#4876](https://github.com/ClickHouse/ClickHouse/pull/4876) ([proller](https://github.com/proller)) -* Added typos handler for storage factory and table functions factory. [#4891](https://github.com/ClickHouse/ClickHouse/pull/4891) ([Danila Kutenin](https://github.com/danlark1)) -* Support asterisks and qualified asterisks for multiple joins without subqueries [#4898](https://github.com/ClickHouse/ClickHouse/pull/4898) ([Artem Zuikov](https://github.com/4ertus2)) -* Make missing column error message more user friendly. [#4915](https://github.com/ClickHouse/ClickHouse/pull/4915) ([Artem Zuikov](https://github.com/4ertus2)) - -#### Performance Improvements -* Significant speedup of ASOF JOIN [#4924](https://github.com/ClickHouse/ClickHouse/pull/4924) ([Martijn Bakker](https://github.com/Gladdy)) - -#### Backward Incompatible Changes -* HTTP header `Query-Id` was renamed to `X-ClickHouse-Query-Id` for consistency. [#4972](https://github.com/ClickHouse/ClickHouse/pull/4972) ([Mikhail](https://github.com/fandyushin)) - -#### Bug Fixes -* Fixed potential null pointer dereference in `clickhouse-copier`. [#4900](https://github.com/ClickHouse/ClickHouse/pull/4900) ([proller](https://github.com/proller)) -* Fixed error on query with JOIN + ARRAY JOIN [#4938](https://github.com/ClickHouse/ClickHouse/pull/4938) ([Artem Zuikov](https://github.com/4ertus2)) -* Fixed hanging on start of the server when a dictionary depends on another dictionary via a database with engine=Dictionary. [#4962](https://github.com/ClickHouse/ClickHouse/pull/4962) ([Vitaly Baranov](https://github.com/vitlibar)) -* Partially fix distributed_product_mode = local. It's possible to allow columns of local tables in where/having/order by/... via table aliases. Throw exception if table does not have alias. There's not possible to access to the columns without table aliases yet. [#4986](https://github.com/ClickHouse/ClickHouse/pull/4986) ([Artem Zuikov](https://github.com/4ertus2)) -* Fix potentially wrong result for `SELECT DISTINCT` with `JOIN` [#5001](https://github.com/ClickHouse/ClickHouse/pull/5001) ([Artem Zuikov](https://github.com/4ertus2)) -* Fixed very rare data race condition that could happen when executing a query with UNION ALL involving at least two SELECTs from system.columns, system.tables, system.parts, system.parts_tables or tables of Merge family and performing ALTER of columns of the related tables concurrently. [#5189](https://github.com/ClickHouse/ClickHouse/pull/5189) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Build/Testing/Packaging Improvements -* Fixed test failures when running clickhouse-server on different host [#4713](https://github.com/ClickHouse/ClickHouse/pull/4713) ([Vasily Nemkov](https://github.com/Enmk)) -* clickhouse-test: Disable color control sequences in non tty environment. [#4937](https://github.com/ClickHouse/ClickHouse/pull/4937) ([alesapin](https://github.com/alesapin)) -* clickhouse-test: Allow use any test database (remove `test.` qualification where it possible) [#5008](https://github.com/ClickHouse/ClickHouse/pull/5008) ([proller](https://github.com/proller)) -* Fix ubsan errors [#5037](https://github.com/ClickHouse/ClickHouse/pull/5037) ([Vitaly Baranov](https://github.com/vitlibar)) -* Yandex LFAlloc was added to ClickHouse to allocate MarkCache and UncompressedCache data in different ways to catch segfaults more reliable [#4995](https://github.com/ClickHouse/ClickHouse/pull/4995) ([Danila Kutenin](https://github.com/danlark1)) -* Python util to help with backports and changelogs. [#4949](https://github.com/ClickHouse/ClickHouse/pull/4949) ([Ivan](https://github.com/abyss7)) - - -## ClickHouse release 19.5 -### ClickHouse release 19.5.4.22, 2019-05-13 - -#### Bug fixes -* Fixed possible crash in bitmap* functions [#5220](https://github.com/ClickHouse/ClickHouse/pull/5220) [#5228](https://github.com/ClickHouse/ClickHouse/pull/5228) ([Andy Yang](https://github.com/andyyzh)) -* Fixed very rare data race condition that could happen when executing a query with UNION ALL involving at least two SELECTs from system.columns, system.tables, system.parts, system.parts_tables or tables of Merge family and performing ALTER of columns of the related tables concurrently. [#5189](https://github.com/ClickHouse/ClickHouse/pull/5189) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed error `Set for IN is not created yet in case of using single LowCardinality column in the left part of IN`. This error happened if LowCardinality column was the part of primary key. #5031 [#5154](https://github.com/ClickHouse/ClickHouse/pull/5154) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Modification of retention function: If a row satisfies both the first and NTH condition, only the first satisfied condition is added to the data state. Now all conditions that satisfy in a row of data are added to the data state. [#5119](https://github.com/ClickHouse/ClickHouse/pull/5119) ([小路](https://github.com/nicelulu)) - - -### ClickHouse release 19.5.3.8, 2019-04-18 - -#### Bug fixes -* Fixed type of setting `max_partitions_per_insert_block` from boolean to UInt64. [#5028](https://github.com/ClickHouse/ClickHouse/pull/5028) ([Mohammad Hossein Sekhavat](https://github.com/mhsekhavat)) - - -### ClickHouse release 19.5.2.6, 2019-04-15 - -#### New Features - -* [Hyperscan](https://github.com/intel/hyperscan) multiple regular expression matching was added (functions `multiMatchAny`, `multiMatchAnyIndex`, `multiFuzzyMatchAny`, `multiFuzzyMatchAnyIndex`). [#4780](https://github.com/ClickHouse/ClickHouse/pull/4780), [#4841](https://github.com/ClickHouse/ClickHouse/pull/4841) ([Danila Kutenin](https://github.com/danlark1)) -* `multiSearchFirstPosition` function was added. [#4780](https://github.com/ClickHouse/ClickHouse/pull/4780) ([Danila Kutenin](https://github.com/danlark1)) -* Implement the predefined expression filter per row for tables. [#4792](https://github.com/ClickHouse/ClickHouse/pull/4792) ([Ivan](https://github.com/abyss7)) -* A new type of data skipping indices based on bloom filters (can be used for `equal`, `in` and `like` functions). [#4499](https://github.com/ClickHouse/ClickHouse/pull/4499) ([Nikita Vasilev](https://github.com/nikvas0)) -* Added `ASOF JOIN` which allows to run queries that join to the most recent value known. [#4774](https://github.com/ClickHouse/ClickHouse/pull/4774) [#4867](https://github.com/ClickHouse/ClickHouse/pull/4867) [#4863](https://github.com/ClickHouse/ClickHouse/pull/4863) [#4875](https://github.com/ClickHouse/ClickHouse/pull/4875) ([Martijn Bakker](https://github.com/Gladdy), [Artem Zuikov](https://github.com/4ertus2)) -* Rewrite multiple `COMMA JOIN` to `CROSS JOIN`. Then rewrite them to `INNER JOIN` if possible. [#4661](https://github.com/ClickHouse/ClickHouse/pull/4661) ([Artem Zuikov](https://github.com/4ertus2)) - -#### Improvement - -* `topK` and `topKWeighted` now supports custom `loadFactor` (fixes issue [#4252](https://github.com/ClickHouse/ClickHouse/issues/4252)). [#4634](https://github.com/ClickHouse/ClickHouse/pull/4634) ([Kirill Danshin](https://github.com/kirillDanshin)) -* Allow to use `parallel_replicas_count > 1` even for tables without sampling (the setting is simply ignored for them). In previous versions it was lead to exception. [#4637](https://github.com/ClickHouse/ClickHouse/pull/4637) ([Alexey Elymanov](https://github.com/digitalist)) -* Support for `CREATE OR REPLACE VIEW`. Allow to create a view or set a new definition in a single statement. [#4654](https://github.com/ClickHouse/ClickHouse/pull/4654) ([Boris Granveaud](https://github.com/bgranvea)) -* `Buffer` table engine now supports `PREWHERE`. [#4671](https://github.com/ClickHouse/ClickHouse/pull/4671) ([Yangkuan Liu](https://github.com/LiuYangkuan)) -* Add ability to start replicated table without metadata in zookeeper in `readonly` mode. [#4691](https://github.com/ClickHouse/ClickHouse/pull/4691) ([alesapin](https://github.com/alesapin)) -* Fixed flicker of progress bar in clickhouse-client. The issue was most noticeable when using `FORMAT Null` with streaming queries. [#4811](https://github.com/ClickHouse/ClickHouse/pull/4811) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Allow to disable functions with `hyperscan` library on per user basis to limit potentially excessive and uncontrolled resource usage. [#4816](https://github.com/ClickHouse/ClickHouse/pull/4816) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Add version number logging in all errors. [#4824](https://github.com/ClickHouse/ClickHouse/pull/4824) ([proller](https://github.com/proller)) -* Added restriction to the `multiMatch` functions which requires string size to fit into `unsigned int`. Also added the number of arguments limit to the `multiSearch` functions. [#4834](https://github.com/ClickHouse/ClickHouse/pull/4834) ([Danila Kutenin](https://github.com/danlark1)) -* Improved usage of scratch space and error handling in Hyperscan. [#4866](https://github.com/ClickHouse/ClickHouse/pull/4866) ([Danila Kutenin](https://github.com/danlark1)) -* Fill `system.graphite_detentions` from a table config of `*GraphiteMergeTree` engine tables. [#4584](https://github.com/ClickHouse/ClickHouse/pull/4584) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) -* Rename `trigramDistance` function to `ngramDistance` and add more functions with `CaseInsensitive` and `UTF`. [#4602](https://github.com/ClickHouse/ClickHouse/pull/4602) ([Danila Kutenin](https://github.com/danlark1)) -* Improved data skipping indices calculation. [#4640](https://github.com/ClickHouse/ClickHouse/pull/4640) ([Nikita Vasilev](https://github.com/nikvas0)) -* Keep ordinary, `DEFAULT`, `MATERIALIZED` and `ALIAS` columns in a single list (fixes issue [#2867](https://github.com/ClickHouse/ClickHouse/issues/2867)). [#4707](https://github.com/ClickHouse/ClickHouse/pull/4707) ([Alex Zatelepin](https://github.com/ztlpn)) - -#### Bug Fix - -* Avoid `std::terminate` in case of memory allocation failure. Now `std::bad_alloc` exception is thrown as expected. [#4665](https://github.com/ClickHouse/ClickHouse/pull/4665) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixes capnproto reading from buffer. Sometimes files wasn't loaded successfully by HTTP. [#4674](https://github.com/ClickHouse/ClickHouse/pull/4674) ([Vladislav](https://github.com/smirnov-vs)) -* Fix error `Unknown log entry type: 0` after `OPTIMIZE TABLE FINAL` query. [#4683](https://github.com/ClickHouse/ClickHouse/pull/4683) ([Amos Bird](https://github.com/amosbird)) -* Wrong arguments to `hasAny` or `hasAll` functions may lead to segfault. [#4698](https://github.com/ClickHouse/ClickHouse/pull/4698) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Deadlock may happen while executing `DROP DATABASE dictionary` query. [#4701](https://github.com/ClickHouse/ClickHouse/pull/4701) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix undefined behavior in `median` and `quantile` functions. [#4702](https://github.com/ClickHouse/ClickHouse/pull/4702) ([hcz](https://github.com/hczhcz)) -* Fix compression level detection when `network_compression_method` in lowercase. Broken in v19.1. [#4706](https://github.com/ClickHouse/ClickHouse/pull/4706) ([proller](https://github.com/proller)) -* Fixed ignorance of `UTC` setting (fixes issue [#4658](https://github.com/ClickHouse/ClickHouse/issues/4658)). [#4718](https://github.com/ClickHouse/ClickHouse/pull/4718) ([proller](https://github.com/proller)) -* Fix `histogram` function behaviour with `Distributed` tables. [#4741](https://github.com/ClickHouse/ClickHouse/pull/4741) ([olegkv](https://github.com/olegkv)) -* Fixed tsan report `destroy of a locked mutex`. [#4742](https://github.com/ClickHouse/ClickHouse/pull/4742) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed TSan report on shutdown due to race condition in system logs usage. Fixed potential use-after-free on shutdown when part_log is enabled. [#4758](https://github.com/ClickHouse/ClickHouse/pull/4758) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix recheck parts in `ReplicatedMergeTreeAlterThread` in case of error. [#4772](https://github.com/ClickHouse/ClickHouse/pull/4772) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Arithmetic operations on intermediate aggregate function states were not working for constant arguments (such as subquery results). [#4776](https://github.com/ClickHouse/ClickHouse/pull/4776) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Always backquote column names in metadata. Otherwise it's impossible to create a table with column named `index` (server won't restart due to malformed `ATTACH` query in metadata). [#4782](https://github.com/ClickHouse/ClickHouse/pull/4782) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix crash in `ALTER ... MODIFY ORDER BY` on `Distributed` table. [#4790](https://github.com/ClickHouse/ClickHouse/pull/4790) ([TCeason](https://github.com/TCeason)) -* Fix segfault in `JOIN ON` with enabled `enable_optimize_predicate_expression`. [#4794](https://github.com/ClickHouse/ClickHouse/pull/4794) ([Winter Zhang](https://github.com/zhang2014)) -* Fix bug with adding an extraneous row after consuming a protobuf message from Kafka. [#4808](https://github.com/ClickHouse/ClickHouse/pull/4808) ([Vitaly Baranov](https://github.com/vitlibar)) -* Fix crash of `JOIN` on not-nullable vs nullable column. Fix `NULLs` in right keys in `ANY JOIN` + `join_use_nulls`. [#4815](https://github.com/ClickHouse/ClickHouse/pull/4815) ([Artem Zuikov](https://github.com/4ertus2)) -* Fix segmentation fault in `clickhouse-copier`. [#4835](https://github.com/ClickHouse/ClickHouse/pull/4835) ([proller](https://github.com/proller)) -* Fixed race condition in `SELECT` from `system.tables` if the table is renamed or altered concurrently. [#4836](https://github.com/ClickHouse/ClickHouse/pull/4836) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed data race when fetching data part that is already obsolete. [#4839](https://github.com/ClickHouse/ClickHouse/pull/4839) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed rare data race that can happen during `RENAME` table of MergeTree family. [#4844](https://github.com/ClickHouse/ClickHouse/pull/4844) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed segmentation fault in function `arrayIntersect`. Segmentation fault could happen if function was called with mixed constant and ordinary arguments. [#4847](https://github.com/ClickHouse/ClickHouse/pull/4847) ([Lixiang Qian](https://github.com/fancyqlx)) -* Fixed reading from `Array(LowCardinality)` column in rare case when column contained a long sequence of empty arrays. [#4850](https://github.com/ClickHouse/ClickHouse/pull/4850) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Fix crash in `FULL/RIGHT JOIN` when we joining on nullable vs not nullable. [#4855](https://github.com/ClickHouse/ClickHouse/pull/4855) ([Artem Zuikov](https://github.com/4ertus2)) -* Fix `No message received` exception while fetching parts between replicas. [#4856](https://github.com/ClickHouse/ClickHouse/pull/4856) ([alesapin](https://github.com/alesapin)) -* Fixed `arrayIntersect` function wrong result in case of several repeated values in single array. [#4871](https://github.com/ClickHouse/ClickHouse/pull/4871) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Fix a race condition during concurrent `ALTER COLUMN` queries that could lead to a server crash (fixes issue [#3421](https://github.com/ClickHouse/ClickHouse/issues/3421)). [#4592](https://github.com/ClickHouse/ClickHouse/pull/4592) ([Alex Zatelepin](https://github.com/ztlpn)) -* Fix incorrect result in `FULL/RIGHT JOIN` with const column. [#4723](https://github.com/ClickHouse/ClickHouse/pull/4723) ([Artem Zuikov](https://github.com/4ertus2)) -* Fix duplicates in `GLOBAL JOIN` with asterisk. [#4705](https://github.com/ClickHouse/ClickHouse/pull/4705) ([Artem Zuikov](https://github.com/4ertus2)) -* Fix parameter deduction in `ALTER MODIFY` of column `CODEC` when column type is not specified. [#4883](https://github.com/ClickHouse/ClickHouse/pull/4883) ([alesapin](https://github.com/alesapin)) -* Functions `cutQueryStringAndFragment()` and `queryStringAndFragment()` now works correctly when `URL` contains a fragment and no query. [#4894](https://github.com/ClickHouse/ClickHouse/pull/4894) ([Vitaly Baranov](https://github.com/vitlibar)) -* Fix rare bug when setting `min_bytes_to_use_direct_io` is greater than zero, which occures when thread have to seek backward in column file. [#4897](https://github.com/ClickHouse/ClickHouse/pull/4897) ([alesapin](https://github.com/alesapin)) -* Fix wrong argument types for aggregate functions with `LowCardinality` arguments (fixes issue [#4919](https://github.com/ClickHouse/ClickHouse/issues/4919)). [#4922](https://github.com/ClickHouse/ClickHouse/pull/4922) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Fix wrong name qualification in `GLOBAL JOIN`. [#4969](https://github.com/ClickHouse/ClickHouse/pull/4969) ([Artem Zuikov](https://github.com/4ertus2)) -* Fix function `toISOWeek` result for year 1970. [#4988](https://github.com/ClickHouse/ClickHouse/pull/4988) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix `DROP`, `TRUNCATE` and `OPTIMIZE` queries duplication, when executed on `ON CLUSTER` for `ReplicatedMergeTree*` tables family. [#4991](https://github.com/ClickHouse/ClickHouse/pull/4991) ([alesapin](https://github.com/alesapin)) - -#### Backward Incompatible Change - -* Rename setting `insert_sample_with_metadata` to setting `input_format_defaults_for_omitted_fields`. [#4771](https://github.com/ClickHouse/ClickHouse/pull/4771) ([Artem Zuikov](https://github.com/4ertus2)) -* Added setting `max_partitions_per_insert_block` (with value 100 by default). If inserted block contains larger number of partitions, an exception is thrown. Set it to 0 if you want to remove the limit (not recommended). [#4845](https://github.com/ClickHouse/ClickHouse/pull/4845) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Multi-search functions were renamed (`multiPosition` to `multiSearchAllPositions`, `multiSearch` to `multiSearchAny`, `firstMatch` to `multiSearchFirstIndex`). [#4780](https://github.com/ClickHouse/ClickHouse/pull/4780) ([Danila Kutenin](https://github.com/danlark1)) - -#### Performance Improvement - -* Optimize Volnitsky searcher by inlining, giving about 5-10% search improvement for queries with many needles or many similar bigrams. [#4862](https://github.com/ClickHouse/ClickHouse/pull/4862) ([Danila Kutenin](https://github.com/danlark1)) -* Fix performance issue when setting `use_uncompressed_cache` is greater than zero, which appeared when all read data contained in cache. [#4913](https://github.com/ClickHouse/ClickHouse/pull/4913) ([alesapin](https://github.com/alesapin)) - - -#### Build/Testing/Packaging Improvement - -* Hardening debug build: more granular memory mappings and ASLR; add memory protection for mark cache and index. This allows to find more memory stomping bugs in case when ASan and MSan cannot do it. [#4632](https://github.com/ClickHouse/ClickHouse/pull/4632) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Add support for cmake variables `ENABLE_PROTOBUF`, `ENABLE_PARQUET` and `ENABLE_BROTLI` which allows to enable/disable the above features (same as we can do for librdkafka, mysql, etc). [#4669](https://github.com/ClickHouse/ClickHouse/pull/4669) ([Silviu Caragea](https://github.com/silviucpp)) -* Add ability to print process list and stacktraces of all threads if some queries are hung after test run. [#4675](https://github.com/ClickHouse/ClickHouse/pull/4675) ([alesapin](https://github.com/alesapin)) -* Add retries on `Connection loss` error in `clickhouse-test`. [#4682](https://github.com/ClickHouse/ClickHouse/pull/4682) ([alesapin](https://github.com/alesapin)) -* Add freebsd build with vagrant and build with thread sanitizer to packager script. [#4712](https://github.com/ClickHouse/ClickHouse/pull/4712) [#4748](https://github.com/ClickHouse/ClickHouse/pull/4748) ([alesapin](https://github.com/alesapin)) -* Now user asked for password for user `'default'` during installation. [#4725](https://github.com/ClickHouse/ClickHouse/pull/4725) ([proller](https://github.com/proller)) -* Suppress warning in `rdkafka` library. [#4740](https://github.com/ClickHouse/ClickHouse/pull/4740) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Allow ability to build without ssl. [#4750](https://github.com/ClickHouse/ClickHouse/pull/4750) ([proller](https://github.com/proller)) -* Add a way to launch clickhouse-server image from a custom user. [#4753](https://github.com/ClickHouse/ClickHouse/pull/4753) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) -* Upgrade contrib boost to 1.69. [#4793](https://github.com/ClickHouse/ClickHouse/pull/4793) ([proller](https://github.com/proller)) -* Disable usage of `mremap` when compiled with Thread Sanitizer. Surprisingly enough, TSan does not intercept `mremap` (though it does intercept `mmap`, `munmap`) that leads to false positives. Fixed TSan report in stateful tests. [#4859](https://github.com/ClickHouse/ClickHouse/pull/4859) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Add test checking using format schema via HTTP interface. [#4864](https://github.com/ClickHouse/ClickHouse/pull/4864) ([Vitaly Baranov](https://github.com/vitlibar)) - -## ClickHouse release 19.4 -### ClickHouse release 19.4.4.33, 2019-04-17 - -#### Bug Fixes - -* Avoid `std::terminate` in case of memory allocation failure. Now `std::bad_alloc` exception is thrown as expected. [#4665](https://github.com/ClickHouse/ClickHouse/pull/4665) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixes capnproto reading from buffer. Sometimes files wasn't loaded successfully by HTTP. [#4674](https://github.com/ClickHouse/ClickHouse/pull/4674) ([Vladislav](https://github.com/smirnov-vs)) -* Fix error `Unknown log entry type: 0` after `OPTIMIZE TABLE FINAL` query. [#4683](https://github.com/ClickHouse/ClickHouse/pull/4683) ([Amos Bird](https://github.com/amosbird)) -* Wrong arguments to `hasAny` or `hasAll` functions may lead to segfault. [#4698](https://github.com/ClickHouse/ClickHouse/pull/4698) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Deadlock may happen while executing `DROP DATABASE dictionary` query. [#4701](https://github.com/ClickHouse/ClickHouse/pull/4701) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix undefined behavior in `median` and `quantile` functions. [#4702](https://github.com/ClickHouse/ClickHouse/pull/4702) ([hcz](https://github.com/hczhcz)) -* Fix compression level detection when `network_compression_method` in lowercase. Broken in v19.1. [#4706](https://github.com/ClickHouse/ClickHouse/pull/4706) ([proller](https://github.com/proller)) -* Fixed ignorance of `UTC` setting (fixes issue [#4658](https://github.com/ClickHouse/ClickHouse/issues/4658)). [#4718](https://github.com/ClickHouse/ClickHouse/pull/4718) ([proller](https://github.com/proller)) -* Fix `histogram` function behaviour with `Distributed` tables. [#4741](https://github.com/ClickHouse/ClickHouse/pull/4741) ([olegkv](https://github.com/olegkv)) -* Fixed tsan report `destroy of a locked mutex`. [#4742](https://github.com/ClickHouse/ClickHouse/pull/4742) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed TSan report on shutdown due to race condition in system logs usage. Fixed potential use-after-free on shutdown when part_log is enabled. [#4758](https://github.com/ClickHouse/ClickHouse/pull/4758) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix recheck parts in `ReplicatedMergeTreeAlterThread` in case of error. [#4772](https://github.com/ClickHouse/ClickHouse/pull/4772) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Arithmetic operations on intermediate aggregate function states were not working for constant arguments (such as subquery results). [#4776](https://github.com/ClickHouse/ClickHouse/pull/4776) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Always backquote column names in metadata. Otherwise it's impossible to create a table with column named `index` (server won't restart due to malformed `ATTACH` query in metadata). [#4782](https://github.com/ClickHouse/ClickHouse/pull/4782) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix crash in `ALTER ... MODIFY ORDER BY` on `Distributed` table. [#4790](https://github.com/ClickHouse/ClickHouse/pull/4790) ([TCeason](https://github.com/TCeason)) -* Fix segfault in `JOIN ON` with enabled `enable_optimize_predicate_expression`. [#4794](https://github.com/ClickHouse/ClickHouse/pull/4794) ([Winter Zhang](https://github.com/zhang2014)) -* Fix bug with adding an extraneous row after consuming a protobuf message from Kafka. [#4808](https://github.com/ClickHouse/ClickHouse/pull/4808) ([Vitaly Baranov](https://github.com/vitlibar)) -* Fix segmentation fault in `clickhouse-copier`. [#4835](https://github.com/ClickHouse/ClickHouse/pull/4835) ([proller](https://github.com/proller)) -* Fixed race condition in `SELECT` from `system.tables` if the table is renamed or altered concurrently. [#4836](https://github.com/ClickHouse/ClickHouse/pull/4836) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed data race when fetching data part that is already obsolete. [#4839](https://github.com/ClickHouse/ClickHouse/pull/4839) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed rare data race that can happen during `RENAME` table of MergeTree family. [#4844](https://github.com/ClickHouse/ClickHouse/pull/4844) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed segmentation fault in function `arrayIntersect`. Segmentation fault could happen if function was called with mixed constant and ordinary arguments. [#4847](https://github.com/ClickHouse/ClickHouse/pull/4847) ([Lixiang Qian](https://github.com/fancyqlx)) -* Fixed reading from `Array(LowCardinality)` column in rare case when column contained a long sequence of empty arrays. [#4850](https://github.com/ClickHouse/ClickHouse/pull/4850) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Fix `No message received` exception while fetching parts between replicas. [#4856](https://github.com/ClickHouse/ClickHouse/pull/4856) ([alesapin](https://github.com/alesapin)) -* Fixed `arrayIntersect` function wrong result in case of several repeated values in single array. [#4871](https://github.com/ClickHouse/ClickHouse/pull/4871) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Fix a race condition during concurrent `ALTER COLUMN` queries that could lead to a server crash (fixes issue [#3421](https://github.com/ClickHouse/ClickHouse/issues/3421)). [#4592](https://github.com/ClickHouse/ClickHouse/pull/4592) ([Alex Zatelepin](https://github.com/ztlpn)) -* Fix parameter deduction in `ALTER MODIFY` of column `CODEC` when column type is not specified. [#4883](https://github.com/ClickHouse/ClickHouse/pull/4883) ([alesapin](https://github.com/alesapin)) -* Functions `cutQueryStringAndFragment()` and `queryStringAndFragment()` now works correctly when `URL` contains a fragment and no query. [#4894](https://github.com/ClickHouse/ClickHouse/pull/4894) ([Vitaly Baranov](https://github.com/vitlibar)) -* Fix rare bug when setting `min_bytes_to_use_direct_io` is greater than zero, which occures when thread have to seek backward in column file. [#4897](https://github.com/ClickHouse/ClickHouse/pull/4897) ([alesapin](https://github.com/alesapin)) -* Fix wrong argument types for aggregate functions with `LowCardinality` arguments (fixes issue [#4919](https://github.com/ClickHouse/ClickHouse/issues/4919)). [#4922](https://github.com/ClickHouse/ClickHouse/pull/4922) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Fix function `toISOWeek` result for year 1970. [#4988](https://github.com/ClickHouse/ClickHouse/pull/4988) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix `DROP`, `TRUNCATE` and `OPTIMIZE` queries duplication, when executed on `ON CLUSTER` for `ReplicatedMergeTree*` tables family. [#4991](https://github.com/ClickHouse/ClickHouse/pull/4991) ([alesapin](https://github.com/alesapin)) - -#### Improvements - -* Keep ordinary, `DEFAULT`, `MATERIALIZED` and `ALIAS` columns in a single list (fixes issue [#2867](https://github.com/ClickHouse/ClickHouse/issues/2867)). [#4707](https://github.com/ClickHouse/ClickHouse/pull/4707) ([Alex Zatelepin](https://github.com/ztlpn)) - -### ClickHouse release 19.4.3.11, 2019-04-02 - -#### Bug Fixes - -* Fix crash in `FULL/RIGHT JOIN` when we joining on nullable vs not nullable. [#4855](https://github.com/ClickHouse/ClickHouse/pull/4855) ([Artem Zuikov](https://github.com/4ertus2)) -* Fix segmentation fault in `clickhouse-copier`. [#4835](https://github.com/ClickHouse/ClickHouse/pull/4835) ([proller](https://github.com/proller)) - -#### Build/Testing/Packaging Improvement - -* Add a way to launch clickhouse-server image from a custom user. [#4753](https://github.com/ClickHouse/ClickHouse/pull/4753) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) - -### ClickHouse release 19.4.2.7, 2019-03-30 - -#### Bug Fixes -* Fixed reading from `Array(LowCardinality)` column in rare case when column contained a long sequence of empty arrays. [#4850](https://github.com/ClickHouse/ClickHouse/pull/4850) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) - -### ClickHouse release 19.4.1.3, 2019-03-19 - -#### Bug Fixes -* Fixed remote queries which contain both `LIMIT BY` and `LIMIT`. Previously, if `LIMIT BY` and `LIMIT` were used for remote query, `LIMIT` could happen before `LIMIT BY`, which led to too filtered result. [#4708](https://github.com/ClickHouse/ClickHouse/pull/4708) ([Constantin S. Pan](https://github.com/kvap)) - -### ClickHouse release 19.4.0.49, 2019-03-09 - -#### New Features -* Added full support for `Protobuf` format (input and output, nested data structures). [#4174](https://github.com/ClickHouse/ClickHouse/pull/4174) [#4493](https://github.com/ClickHouse/ClickHouse/pull/4493) ([Vitaly Baranov](https://github.com/vitlibar)) -* Added bitmap functions with Roaring Bitmaps. [#4207](https://github.com/ClickHouse/ClickHouse/pull/4207) ([Andy Yang](https://github.com/andyyzh)) [#4568](https://github.com/ClickHouse/ClickHouse/pull/4568) ([Vitaly Baranov](https://github.com/vitlibar)) -* Parquet format support. [#4448](https://github.com/ClickHouse/ClickHouse/pull/4448) ([proller](https://github.com/proller)) -* N-gram distance was added for fuzzy string comparison. It is similar to q-gram metrics in R language. [#4466](https://github.com/ClickHouse/ClickHouse/pull/4466) ([Danila Kutenin](https://github.com/danlark1)) -* Combine rules for graphite rollup from dedicated aggregation and retention patterns. [#4426](https://github.com/ClickHouse/ClickHouse/pull/4426) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) -* Added `max_execution_speed` and `max_execution_speed_bytes` to limit resource usage. Added `min_execution_speed_bytes` setting to complement the `min_execution_speed`. [#4430](https://github.com/ClickHouse/ClickHouse/pull/4430) ([Winter Zhang](https://github.com/zhang2014)) -* Implemented function `flatten`. [#4555](https://github.com/ClickHouse/ClickHouse/pull/4555) [#4409](https://github.com/ClickHouse/ClickHouse/pull/4409) ([alexey-milovidov](https://github.com/alexey-milovidov), [kzon](https://github.com/kzon)) -* Added functions `arrayEnumerateDenseRanked` and `arrayEnumerateUniqRanked` (it's like `arrayEnumerateUniq` but allows to fine tune array depth to look inside multidimensional arrays). [#4475](https://github.com/ClickHouse/ClickHouse/pull/4475) ([proller](https://github.com/proller)) [#4601](https://github.com/ClickHouse/ClickHouse/pull/4601) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Multiple JOINS with some restrictions: no asterisks, no complex aliases in ON/WHERE/GROUP BY/... [#4462](https://github.com/ClickHouse/ClickHouse/pull/4462) ([Artem Zuikov](https://github.com/4ertus2)) - -#### Bug Fixes -* This release also contains all bug fixes from 19.3 and 19.1. -* Fixed bug in data skipping indices: order of granules after INSERT was incorrect. [#4407](https://github.com/ClickHouse/ClickHouse/pull/4407) ([Nikita Vasilev](https://github.com/nikvas0)) -* Fixed `set` index for `Nullable` and `LowCardinality` columns. Before it, `set` index with `Nullable` or `LowCardinality` column led to error `Data type must be deserialized with multiple streams` while selecting. [#4594](https://github.com/ClickHouse/ClickHouse/pull/4594) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Correctly set update_time on full `executable` dictionary update. [#4551](https://github.com/ClickHouse/ClickHouse/pull/4551) ([Tema Novikov](https://github.com/temoon)) -* Fix broken progress bar in 19.3. [#4627](https://github.com/ClickHouse/ClickHouse/pull/4627) ([filimonov](https://github.com/filimonov)) -* Fixed inconsistent values of MemoryTracker when memory region was shrinked, in certain cases. [#4619](https://github.com/ClickHouse/ClickHouse/pull/4619) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed undefined behaviour in ThreadPool. [#4612](https://github.com/ClickHouse/ClickHouse/pull/4612) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed a very rare crash with the message `mutex lock failed: Invalid argument` that could happen when a MergeTree table was dropped concurrently with a SELECT. [#4608](https://github.com/ClickHouse/ClickHouse/pull/4608) ([Alex Zatelepin](https://github.com/ztlpn)) -* ODBC driver compatibility with `LowCardinality` data type. [#4381](https://github.com/ClickHouse/ClickHouse/pull/4381) ([proller](https://github.com/proller)) -* FreeBSD: Fixup for `AIOcontextPool: Found io_event with unknown id 0` error. [#4438](https://github.com/ClickHouse/ClickHouse/pull/4438) ([urgordeadbeef](https://github.com/urgordeadbeef)) -* `system.part_log` table was created regardless to configuration. [#4483](https://github.com/ClickHouse/ClickHouse/pull/4483) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix undefined behaviour in `dictIsIn` function for cache dictionaries. [#4515](https://github.com/ClickHouse/ClickHouse/pull/4515) ([alesapin](https://github.com/alesapin)) -* Fixed a deadlock when a SELECT query locks the same table multiple times (e.g. from different threads or when executing multiple subqueries) and there is a concurrent DDL query. [#4535](https://github.com/ClickHouse/ClickHouse/pull/4535) ([Alex Zatelepin](https://github.com/ztlpn)) -* Disable compile_expressions by default until we get own `llvm` contrib and can test it with `clang` and `asan`. [#4579](https://github.com/ClickHouse/ClickHouse/pull/4579) ([alesapin](https://github.com/alesapin)) -* Prevent `std::terminate` when `invalidate_query` for `clickhouse` external dictionary source has returned wrong resultset (empty or more than one row or more than one column). Fixed issue when the `invalidate_query` was performed every five seconds regardless to the `lifetime`. [#4583](https://github.com/ClickHouse/ClickHouse/pull/4583) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Avoid deadlock when the `invalidate_query` for a dictionary with `clickhouse` source was involving `system.dictionaries` table or `Dictionaries` database (rare case). [#4599](https://github.com/ClickHouse/ClickHouse/pull/4599) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixes for CROSS JOIN with empty WHERE. [#4598](https://github.com/ClickHouse/ClickHouse/pull/4598) ([Artem Zuikov](https://github.com/4ertus2)) -* Fixed segfault in function "replicate" when constant argument is passed. [#4603](https://github.com/ClickHouse/ClickHouse/pull/4603) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix lambda function with predicate optimizer. [#4408](https://github.com/ClickHouse/ClickHouse/pull/4408) ([Winter Zhang](https://github.com/zhang2014)) -* Multiple JOINs multiple fixes. [#4595](https://github.com/ClickHouse/ClickHouse/pull/4595) ([Artem Zuikov](https://github.com/4ertus2)) - -#### Improvements -* Support aliases in JOIN ON section for right table columns. [#4412](https://github.com/ClickHouse/ClickHouse/pull/4412) ([Artem Zuikov](https://github.com/4ertus2)) -* Result of multiple JOINs need correct result names to be used in subselects. Replace flat aliases with source names in result. [#4474](https://github.com/ClickHouse/ClickHouse/pull/4474) ([Artem Zuikov](https://github.com/4ertus2)) -* Improve push-down logic for joined statements. [#4387](https://github.com/ClickHouse/ClickHouse/pull/4387) ([Ivan](https://github.com/abyss7)) - -#### Performance Improvements -* Improved heuristics of "move to PREWHERE" optimization. [#4405](https://github.com/ClickHouse/ClickHouse/pull/4405) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Use proper lookup tables that uses HashTable's API for 8-bit and 16-bit keys. [#4536](https://github.com/ClickHouse/ClickHouse/pull/4536) ([Amos Bird](https://github.com/amosbird)) -* Improved performance of string comparison. [#4564](https://github.com/ClickHouse/ClickHouse/pull/4564) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Cleanup distributed DDL queue in a separate thread so that it doesn't slow down the main loop that processes distributed DDL tasks. [#4502](https://github.com/ClickHouse/ClickHouse/pull/4502) ([Alex Zatelepin](https://github.com/ztlpn)) -* When `min_bytes_to_use_direct_io` is set to 1, not every file was opened with O_DIRECT mode because the data size to read was sometimes underestimated by the size of one compressed block. [#4526](https://github.com/ClickHouse/ClickHouse/pull/4526) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Build/Testing/Packaging Improvement -* Added support for clang-9 [#4604](https://github.com/ClickHouse/ClickHouse/pull/4604) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix wrong `__asm__` instructions (again) [#4621](https://github.com/ClickHouse/ClickHouse/pull/4621) ([Konstantin Podshumok](https://github.com/podshumok)) -* Add ability to specify settings for `clickhouse-performance-test` from command line. [#4437](https://github.com/ClickHouse/ClickHouse/pull/4437) ([alesapin](https://github.com/alesapin)) -* Add dictionaries tests to integration tests. [#4477](https://github.com/ClickHouse/ClickHouse/pull/4477) ([alesapin](https://github.com/alesapin)) -* Added queries from the benchmark on the website to automated performance tests. [#4496](https://github.com/ClickHouse/ClickHouse/pull/4496) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* `xxhash.h` does not exist in external lz4 because it is an implementation detail and its symbols are namespaced with `XXH_NAMESPACE` macro. When lz4 is external, xxHash has to be external too, and the dependents have to link to it. [#4495](https://github.com/ClickHouse/ClickHouse/pull/4495) ([Orivej Desh](https://github.com/orivej)) -* Fixed a case when `quantileTiming` aggregate function can be called with negative or floating point argument (this fixes fuzz test with undefined behaviour sanitizer). [#4506](https://github.com/ClickHouse/ClickHouse/pull/4506) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Spelling error correction. [#4531](https://github.com/ClickHouse/ClickHouse/pull/4531) ([sdk2](https://github.com/sdk2)) -* Fix compilation on Mac. [#4371](https://github.com/ClickHouse/ClickHouse/pull/4371) ([Vitaly Baranov](https://github.com/vitlibar)) -* Build fixes for FreeBSD and various unusual build configurations. [#4444](https://github.com/ClickHouse/ClickHouse/pull/4444) ([proller](https://github.com/proller)) - -## ClickHouse release 19.3 -### ClickHouse release 19.3.9.1, 2019-04-02 - -#### Bug Fixes - -* Fix crash in `FULL/RIGHT JOIN` when we joining on nullable vs not nullable. [#4855](https://github.com/ClickHouse/ClickHouse/pull/4855) ([Artem Zuikov](https://github.com/4ertus2)) -* Fix segmentation fault in `clickhouse-copier`. [#4835](https://github.com/ClickHouse/ClickHouse/pull/4835) ([proller](https://github.com/proller)) -* Fixed reading from `Array(LowCardinality)` column in rare case when column contained a long sequence of empty arrays. [#4850](https://github.com/ClickHouse/ClickHouse/pull/4850) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) - -#### Build/Testing/Packaging Improvement - -* Add a way to launch clickhouse-server image from a custom user [#4753](https://github.com/ClickHouse/ClickHouse/pull/4753) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) - - -### ClickHouse release 19.3.7, 2019-03-12 - -#### Bug fixes - -* Fixed error in #3920. This error manifests itself as random cache corruption (messages `Unknown codec family code`, `Cannot seek through file`) and segfaults. This bug first appeared in version 19.1 and is present in versions up to 19.1.10 and 19.3.6. [#4623](https://github.com/ClickHouse/ClickHouse/pull/4623) ([alexey-milovidov](https://github.com/alexey-milovidov)) - - -### ClickHouse release 19.3.6, 2019-03-02 - -#### Bug fixes - -* When there are more than 1000 threads in a thread pool, `std::terminate` may happen on thread exit. [Azat Khuzhin](https://github.com/azat) [#4485](https://github.com/ClickHouse/ClickHouse/pull/4485) [#4505](https://github.com/ClickHouse/ClickHouse/pull/4505) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Now it's possible to create `ReplicatedMergeTree*` tables with comments on columns without defaults and tables with columns codecs without comments and defaults. Also fix comparison of codecs. [#4523](https://github.com/ClickHouse/ClickHouse/pull/4523) ([alesapin](https://github.com/alesapin)) -* Fixed crash on JOIN with array or tuple. [#4552](https://github.com/ClickHouse/ClickHouse/pull/4552) ([Artem Zuikov](https://github.com/4ertus2)) -* Fixed crash in clickhouse-copier with the message `ThreadStatus not created`. [#4540](https://github.com/ClickHouse/ClickHouse/pull/4540) ([Artem Zuikov](https://github.com/4ertus2)) -* Fixed hangup on server shutdown if distributed DDLs were used. [#4472](https://github.com/ClickHouse/ClickHouse/pull/4472) ([Alex Zatelepin](https://github.com/ztlpn)) -* Incorrect column numbers were printed in error message about text format parsing for columns with number greater than 10. [#4484](https://github.com/ClickHouse/ClickHouse/pull/4484) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Build/Testing/Packaging Improvements - -* Fixed build with AVX enabled. [#4527](https://github.com/ClickHouse/ClickHouse/pull/4527) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Enable extended accounting and IO accounting based on good known version instead of kernel under which it is compiled. [#4541](https://github.com/ClickHouse/ClickHouse/pull/4541) ([nvartolomei](https://github.com/nvartolomei)) -* Allow to skip setting of core_dump.size_limit, warning instead of throw if limit set fail. [#4473](https://github.com/ClickHouse/ClickHouse/pull/4473) ([proller](https://github.com/proller)) -* Removed the `inline` tags of `void readBinary(...)` in `Field.cpp`. Also merged redundant `namespace DB` blocks. [#4530](https://github.com/ClickHouse/ClickHouse/pull/4530) ([hcz](https://github.com/hczhcz)) - - -### ClickHouse release 19.3.5, 2019-02-21 - -#### Bug fixes -* Fixed bug with large http insert queries processing. [#4454](https://github.com/ClickHouse/ClickHouse/pull/4454) ([alesapin](https://github.com/alesapin)) -* Fixed backward incompatibility with old versions due to wrong implementation of `send_logs_level` setting. [#4445](https://github.com/ClickHouse/ClickHouse/pull/4445) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed backward incompatibility of table function `remote` introduced with column comments. [#4446](https://github.com/ClickHouse/ClickHouse/pull/4446) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.3.4, 2019-02-16 - -#### Improvements -* Table index size is not accounted for memory limits when doing `ATTACH TABLE` query. Avoided the possibility that a table cannot be attached after being detached. [#4396](https://github.com/ClickHouse/ClickHouse/pull/4396) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Slightly raised up the limit on max string and array size received from ZooKeeper. It allows to continue to work with increased size of `CLIENT_JVMFLAGS=-Djute.maxbuffer=...` on ZooKeeper. [#4398](https://github.com/ClickHouse/ClickHouse/pull/4398) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Allow to repair abandoned replica even if it already has huge number of nodes in its queue. [#4399](https://github.com/ClickHouse/ClickHouse/pull/4399) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Add one required argument to `SET` index (max stored rows number). [#4386](https://github.com/ClickHouse/ClickHouse/pull/4386) ([Nikita Vasilev](https://github.com/nikvas0)) - -#### Bug Fixes -* Fixed `WITH ROLLUP` result for group by single `LowCardinality` key. [#4384](https://github.com/ClickHouse/ClickHouse/pull/4384) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Fixed bug in the set index (dropping a granule if it contains more than `max_rows` rows). [#4386](https://github.com/ClickHouse/ClickHouse/pull/4386) ([Nikita Vasilev](https://github.com/nikvas0)) -* A lot of FreeBSD build fixes. [#4397](https://github.com/ClickHouse/ClickHouse/pull/4397) ([proller](https://github.com/proller)) -* Fixed aliases substitution in queries with subquery containing same alias (issue [#4110](https://github.com/ClickHouse/ClickHouse/issues/4110)). [#4351](https://github.com/ClickHouse/ClickHouse/pull/4351) ([Artem Zuikov](https://github.com/4ertus2)) - -#### Build/Testing/Packaging Improvements -* Add ability to run `clickhouse-server` for stateless tests in docker image. [#4347](https://github.com/ClickHouse/ClickHouse/pull/4347) ([Vasily Nemkov](https://github.com/Enmk)) - -### ClickHouse release 19.3.3, 2019-02-13 - -#### New Features -* Added the `KILL MUTATION` statement that allows removing mutations that are for some reasons stuck. Added `latest_failed_part`, `latest_fail_time`, `latest_fail_reason` fields to the `system.mutations` table for easier troubleshooting. [#4287](https://github.com/ClickHouse/ClickHouse/pull/4287) ([Alex Zatelepin](https://github.com/ztlpn)) -* Added aggregate function `entropy` which computes Shannon entropy. [#4238](https://github.com/ClickHouse/ClickHouse/pull/4238) ([Quid37](https://github.com/Quid37)) -* Added ability to send queries `INSERT INTO tbl VALUES (....` to server without splitting on `query` and `data` parts. [#4301](https://github.com/ClickHouse/ClickHouse/pull/4301) ([alesapin](https://github.com/alesapin)) -* Generic implementation of `arrayWithConstant` function was added. [#4322](https://github.com/ClickHouse/ClickHouse/pull/4322) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Implemented `NOT BETWEEN` comparison operator. [#4228](https://github.com/ClickHouse/ClickHouse/pull/4228) ([Dmitry Naumov](https://github.com/nezed)) -* Implement `sumMapFiltered` in order to be able to limit the number of keys for which values will be summed by `sumMap`. [#4129](https://github.com/ClickHouse/ClickHouse/pull/4129) ([Léo Ercolanelli](https://github.com/ercolanelli-leo)) -* Added support of `Nullable` types in `mysql` table function. [#4198](https://github.com/ClickHouse/ClickHouse/pull/4198) ([Emmanuel Donin de Rosière](https://github.com/edonin)) -* Support for arbitrary constant expressions in `LIMIT` clause. [#4246](https://github.com/ClickHouse/ClickHouse/pull/4246) ([k3box](https://github.com/k3box)) -* Added `topKWeighted` aggregate function that takes additional argument with (unsigned integer) weight. [#4245](https://github.com/ClickHouse/ClickHouse/pull/4245) ([Andrew Golman](https://github.com/andrewgolman)) -* `StorageJoin` now supports `join_any_take_last_row` setting that allows overwriting existing values of the same key. [#3973](https://github.com/ClickHouse/ClickHouse/pull/3973) ([Amos Bird](https://github.com/amosbird) -* Added function `toStartOfInterval`. [#4304](https://github.com/ClickHouse/ClickHouse/pull/4304) ([Vitaly Baranov](https://github.com/vitlibar)) -* Added `RowBinaryWithNamesAndTypes` format. [#4200](https://github.com/ClickHouse/ClickHouse/pull/4200) ([Oleg V. Kozlyuk](https://github.com/DarkWanderer)) -* Added `IPv4` and `IPv6` data types. More effective implementations of `IPv*` functions. [#3669](https://github.com/ClickHouse/ClickHouse/pull/3669) ([Vasily Nemkov](https://github.com/Enmk)) -* Added function `toStartOfTenMinutes()`. [#4298](https://github.com/ClickHouse/ClickHouse/pull/4298) ([Vitaly Baranov](https://github.com/vitlibar)) -* Added `Protobuf` output format. [#4005](https://github.com/ClickHouse/ClickHouse/pull/4005) [#4158](https://github.com/ClickHouse/ClickHouse/pull/4158) ([Vitaly Baranov](https://github.com/vitlibar)) -* Added brotli support for HTTP interface for data import (INSERTs). [#4235](https://github.com/ClickHouse/ClickHouse/pull/4235) ([Mikhail ](https://github.com/fandyushin)) -* Added hints while user make typo in function name or type in command line client. [#4239](https://github.com/ClickHouse/ClickHouse/pull/4239) ([Danila Kutenin](https://github.com/danlark1)) -* Added `Query-Id` to Server's HTTP Response header. [#4231](https://github.com/ClickHouse/ClickHouse/pull/4231) ([Mikhail ](https://github.com/fandyushin)) - -#### Experimental features -* Added `minmax` and `set` data skipping indices for MergeTree table engines family. [#4143](https://github.com/ClickHouse/ClickHouse/pull/4143) ([Nikita Vasilev](https://github.com/nikvas0)) -* Added conversion of `CROSS JOIN` to `INNER JOIN` if possible. [#4221](https://github.com/ClickHouse/ClickHouse/pull/4221) [#4266](https://github.com/ClickHouse/ClickHouse/pull/4266) ([Artem Zuikov](https://github.com/4ertus2)) - -#### Bug Fixes -* Fixed `Not found column` for duplicate columns in `JOIN ON` section. [#4279](https://github.com/ClickHouse/ClickHouse/pull/4279) ([Artem Zuikov](https://github.com/4ertus2)) -* Make `START REPLICATED SENDS` command start replicated sends. [#4229](https://github.com/ClickHouse/ClickHouse/pull/4229) ([nvartolomei](https://github.com/nvartolomei)) -* Fixed aggregate functions execution with `Array(LowCardinality)` arguments. [#4055](https://github.com/ClickHouse/ClickHouse/pull/4055) ([KochetovNicolai](https://github.com/KochetovNicolai)) -* Fixed wrong behaviour when doing `INSERT ... SELECT ... FROM file(...)` query and file has `CSVWithNames` or `TSVWIthNames` format and the first data row is missing. [#4297](https://github.com/ClickHouse/ClickHouse/pull/4297) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed crash on dictionary reload if dictionary not available. This bug was appeared in 19.1.6. [#4188](https://github.com/ClickHouse/ClickHouse/pull/4188) ([proller](https://github.com/proller)) -* Fixed `ALL JOIN` with duplicates in right table. [#4184](https://github.com/ClickHouse/ClickHouse/pull/4184) ([Artem Zuikov](https://github.com/4ertus2)) -* Fixed segmentation fault with `use_uncompressed_cache=1` and exception with wrong uncompressed size. This bug was appeared in 19.1.6. [#4186](https://github.com/ClickHouse/ClickHouse/pull/4186) ([alesapin](https://github.com/alesapin)) -* Fixed `compile_expressions` bug with comparison of big (more than int16) dates. [#4341](https://github.com/ClickHouse/ClickHouse/pull/4341) ([alesapin](https://github.com/alesapin)) -* Fixed infinite loop when selecting from table function `numbers(0)`. [#4280](https://github.com/ClickHouse/ClickHouse/pull/4280) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Temporarily disable predicate optimization for `ORDER BY`. [#3890](https://github.com/ClickHouse/ClickHouse/pull/3890) ([Winter Zhang](https://github.com/zhang2014)) -* Fixed `Illegal instruction` error when using base64 functions on old CPUs. This error has been reproduced only when ClickHouse was compiled with gcc-8. [#4275](https://github.com/ClickHouse/ClickHouse/pull/4275) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed `No message received` error when interacting with PostgreSQL ODBC Driver through TLS connection. Also fixes segfault when using MySQL ODBC Driver. [#4170](https://github.com/ClickHouse/ClickHouse/pull/4170) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed incorrect result when `Date` and `DateTime` arguments are used in branches of conditional operator (function `if`). Added generic case for function `if`. [#4243](https://github.com/ClickHouse/ClickHouse/pull/4243) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* ClickHouse dictionaries now load within `clickhouse` process. [#4166](https://github.com/ClickHouse/ClickHouse/pull/4166) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed deadlock when `SELECT` from a table with `File` engine was retried after `No such file or directory` error. [#4161](https://github.com/ClickHouse/ClickHouse/pull/4161) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed race condition when selecting from `system.tables` may give `table doesn't exist` error. [#4313](https://github.com/ClickHouse/ClickHouse/pull/4313) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* `clickhouse-client` can segfault on exit while loading data for command line suggestions if it was run in interactive mode. [#4317](https://github.com/ClickHouse/ClickHouse/pull/4317) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed a bug when the execution of mutations containing `IN` operators was producing incorrect results. [#4099](https://github.com/ClickHouse/ClickHouse/pull/4099) ([Alex Zatelepin](https://github.com/ztlpn)) -* Fixed error: if there is a database with `Dictionary` engine, all dictionaries forced to load at server startup, and if there is a dictionary with ClickHouse source from localhost, the dictionary cannot load. [#4255](https://github.com/ClickHouse/ClickHouse/pull/4255) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed error when system logs are tried to create again at server shutdown. [#4254](https://github.com/ClickHouse/ClickHouse/pull/4254) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Correctly return the right type and properly handle locks in `joinGet` function. [#4153](https://github.com/ClickHouse/ClickHouse/pull/4153) ([Amos Bird](https://github.com/amosbird)) -* Added `sumMapWithOverflow` function. [#4151](https://github.com/ClickHouse/ClickHouse/pull/4151) ([Léo Ercolanelli](https://github.com/ercolanelli-leo)) -* Fixed segfault with `allow_experimental_multiple_joins_emulation`. [52de2c](https://github.com/ClickHouse/ClickHouse/commit/52de2cd927f7b5257dd67e175f0a5560a48840d0) ([Artem Zuikov](https://github.com/4ertus2)) -* Fixed bug with incorrect `Date` and `DateTime` comparison. [#4237](https://github.com/ClickHouse/ClickHouse/pull/4237) ([valexey](https://github.com/valexey)) -* Fixed fuzz test under undefined behavior sanitizer: added parameter type check for `quantile*Weighted` family of functions. [#4145](https://github.com/ClickHouse/ClickHouse/pull/4145) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed rare race condition when removing of old data parts can fail with `File not found` error. [#4378](https://github.com/ClickHouse/ClickHouse/pull/4378) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix install package with missing /etc/clickhouse-server/config.xml. [#4343](https://github.com/ClickHouse/ClickHouse/pull/4343) ([proller](https://github.com/proller)) - - -#### Build/Testing/Packaging Improvements -* Debian package: correct /etc/clickhouse-server/preprocessed link according to config. [#4205](https://github.com/ClickHouse/ClickHouse/pull/4205) ([proller](https://github.com/proller)) -* Various build fixes for FreeBSD. [#4225](https://github.com/ClickHouse/ClickHouse/pull/4225) ([proller](https://github.com/proller)) -* Added ability to create, fill and drop tables in perftest. [#4220](https://github.com/ClickHouse/ClickHouse/pull/4220) ([alesapin](https://github.com/alesapin)) -* Added a script to check for duplicate includes. [#4326](https://github.com/ClickHouse/ClickHouse/pull/4326) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Added ability to run queries by index in performance test. [#4264](https://github.com/ClickHouse/ClickHouse/pull/4264) ([alesapin](https://github.com/alesapin)) -* Package with debug symbols is suggested to be installed. [#4274](https://github.com/ClickHouse/ClickHouse/pull/4274) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Refactoring of performance-test. Better logging and signals handling. [#4171](https://github.com/ClickHouse/ClickHouse/pull/4171) ([alesapin](https://github.com/alesapin)) -* Added docs to anonymized Yandex.Metrika datasets. [#4164](https://github.com/ClickHouse/ClickHouse/pull/4164) ([alesapin](https://github.com/alesapin)) -* Аdded tool for converting an old month-partitioned part to the custom-partitioned format. [#4195](https://github.com/ClickHouse/ClickHouse/pull/4195) ([Alex Zatelepin](https://github.com/ztlpn)) -* Added docs about two datasets in s3. [#4144](https://github.com/ClickHouse/ClickHouse/pull/4144) ([alesapin](https://github.com/alesapin)) -* Added script which creates changelog from pull requests description. [#4169](https://github.com/ClickHouse/ClickHouse/pull/4169) [#4173](https://github.com/ClickHouse/ClickHouse/pull/4173) ([KochetovNicolai](https://github.com/KochetovNicolai)) ([KochetovNicolai](https://github.com/KochetovNicolai)) -* Added puppet module for Clickhouse. [#4182](https://github.com/ClickHouse/ClickHouse/pull/4182) ([Maxim Fedotov](https://github.com/MaxFedotov)) -* Added docs for a group of undocumented functions. [#4168](https://github.com/ClickHouse/ClickHouse/pull/4168) ([Winter Zhang](https://github.com/zhang2014)) -* ARM build fixes. [#4210](https://github.com/ClickHouse/ClickHouse/pull/4210)[#4306](https://github.com/ClickHouse/ClickHouse/pull/4306) [#4291](https://github.com/ClickHouse/ClickHouse/pull/4291) ([proller](https://github.com/proller)) ([proller](https://github.com/proller)) -* Dictionary tests now able to run from `ctest`. [#4189](https://github.com/ClickHouse/ClickHouse/pull/4189) ([proller](https://github.com/proller)) -* Now `/etc/ssl` is used as default directory with SSL certificates. [#4167](https://github.com/ClickHouse/ClickHouse/pull/4167) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Added checking SSE and AVX instruction at start. [#4234](https://github.com/ClickHouse/ClickHouse/pull/4234) ([Igr](https://github.com/igron99)) -* Init script will wait server until start. [#4281](https://github.com/ClickHouse/ClickHouse/pull/4281) ([proller](https://github.com/proller)) - -#### Backward Incompatible Changes -* Removed `allow_experimental_low_cardinality_type` setting. `LowCardinality` data types are production ready. [#4323](https://github.com/ClickHouse/ClickHouse/pull/4323) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Reduce mark cache size and uncompressed cache size accordingly to available memory amount. [#4240](https://github.com/ClickHouse/ClickHouse/pull/4240) ([Lopatin Konstantin](https://github.com/k-lopatin) -* Added keyword `INDEX` in `CREATE TABLE` query. A column with name `index` must be quoted with backticks or double quotes: `` `index` ``. [#4143](https://github.com/ClickHouse/ClickHouse/pull/4143) ([Nikita Vasilev](https://github.com/nikvas0)) -* `sumMap` now promote result type instead of overflow. The old `sumMap` behavior can be obtained by using `sumMapWithOverflow` function. [#4151](https://github.com/ClickHouse/ClickHouse/pull/4151) ([Léo Ercolanelli](https://github.com/ercolanelli-leo)) - -#### Performance Improvements -* `std::sort` replaced by `pdqsort` for queries without `LIMIT`. [#4236](https://github.com/ClickHouse/ClickHouse/pull/4236) ([Evgenii Pravda](https://github.com/kvinty)) -* Now server reuse threads from global thread pool. This affects performance in some corner cases. [#4150](https://github.com/ClickHouse/ClickHouse/pull/4150) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Improvements -* Implemented AIO support for FreeBSD. [#4305](https://github.com/ClickHouse/ClickHouse/pull/4305) ([urgordeadbeef](https://github.com/urgordeadbeef)) -* `SELECT * FROM a JOIN b USING a, b` now return `a` and `b` columns only from the left table. [#4141](https://github.com/ClickHouse/ClickHouse/pull/4141) ([Artem Zuikov](https://github.com/4ertus2)) -* Allow `-C` option of client to work as `-c` option. [#4232](https://github.com/ClickHouse/ClickHouse/pull/4232) ([syominsergey](https://github.com/syominsergey)) -* Now option `--password` used without value requires password from stdin. [#4230](https://github.com/ClickHouse/ClickHouse/pull/4230) ([BSD_Conqueror](https://github.com/bsd-conqueror)) -* Added highlighting of unescaped metacharacters in string literals that contain `LIKE` expressions or regexps. [#4327](https://github.com/ClickHouse/ClickHouse/pull/4327) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Added cancelling of HTTP read only queries if client socket goes away. [#4213](https://github.com/ClickHouse/ClickHouse/pull/4213) ([nvartolomei](https://github.com/nvartolomei)) -* Now server reports progress to keep client connections alive. [#4215](https://github.com/ClickHouse/ClickHouse/pull/4215) ([Ivan](https://github.com/abyss7)) -* Slightly better message with reason for OPTIMIZE query with `optimize_throw_if_noop` setting enabled. [#4294](https://github.com/ClickHouse/ClickHouse/pull/4294) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Added support of `--version` option for clickhouse server. [#4251](https://github.com/ClickHouse/ClickHouse/pull/4251) ([Lopatin Konstantin](https://github.com/k-lopatin)) -* Added `--help/-h` option to `clickhouse-server`. [#4233](https://github.com/ClickHouse/ClickHouse/pull/4233) ([Yuriy Baranov](https://github.com/yurriy)) -* Added support for scalar subqueries with aggregate function state result. [#4348](https://github.com/ClickHouse/ClickHouse/pull/4348) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Improved server shutdown time and ALTERs waiting time. [#4372](https://github.com/ClickHouse/ClickHouse/pull/4372) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Added info about the replicated_can_become_leader setting to system.replicas and add logging if the replica won't try to become leader. [#4379](https://github.com/ClickHouse/ClickHouse/pull/4379) ([Alex Zatelepin](https://github.com/ztlpn)) - - -## ClickHouse release 19.1 -### ClickHouse release 19.1.14, 2019-03-14 - -* Fixed error `Column ... queried more than once` that may happen if the setting `asterisk_left_columns_only` is set to 1 in case of using `GLOBAL JOIN` with `SELECT *` (rare case). The issue does not exist in 19.3 and newer. [6bac7d8d](https://github.com/ClickHouse/ClickHouse/pull/4692/commits/6bac7d8d11a9b0d6de0b32b53c47eb2f6f8e7062) ([Artem Zuikov](https://github.com/4ertus2)) - -### ClickHouse release 19.1.13, 2019-03-12 - -This release contains exactly the same set of patches as 19.3.7. - -### ClickHouse release 19.1.10, 2019-03-03 - -This release contains exactly the same set of patches as 19.3.6. - - -## ClickHouse release 19.1 -### ClickHouse release 19.1.9, 2019-02-21 - -#### Bug fixes -* Fixed backward incompatibility with old versions due to wrong implementation of `send_logs_level` setting. [#4445](https://github.com/ClickHouse/ClickHouse/pull/4445) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed backward incompatibility of table function `remote` introduced with column comments. [#4446](https://github.com/ClickHouse/ClickHouse/pull/4446) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.1.8, 2019-02-16 - -#### Bug Fixes -* Fix install package with missing /etc/clickhouse-server/config.xml. [#4343](https://github.com/ClickHouse/ClickHouse/pull/4343) ([proller](https://github.com/proller)) - - -## ClickHouse release 19.1 -### ClickHouse release 19.1.7, 2019-02-15 - -#### Bug Fixes -* Correctly return the right type and properly handle locks in `joinGet` function. [#4153](https://github.com/ClickHouse/ClickHouse/pull/4153) ([Amos Bird](https://github.com/amosbird)) -* Fixed error when system logs are tried to create again at server shutdown. [#4254](https://github.com/ClickHouse/ClickHouse/pull/4254) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed error: if there is a database with `Dictionary` engine, all dictionaries forced to load at server startup, and if there is a dictionary with ClickHouse source from localhost, the dictionary cannot load. [#4255](https://github.com/ClickHouse/ClickHouse/pull/4255) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed a bug when the execution of mutations containing `IN` operators was producing incorrect results. [#4099](https://github.com/ClickHouse/ClickHouse/pull/4099) ([Alex Zatelepin](https://github.com/ztlpn)) -* `clickhouse-client` can segfault on exit while loading data for command line suggestions if it was run in interactive mode. [#4317](https://github.com/ClickHouse/ClickHouse/pull/4317) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed race condition when selecting from `system.tables` may give `table doesn't exist` error. [#4313](https://github.com/ClickHouse/ClickHouse/pull/4313) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed deadlock when `SELECT` from a table with `File` engine was retried after `No such file or directory` error. [#4161](https://github.com/ClickHouse/ClickHouse/pull/4161) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed an issue: local ClickHouse dictionaries are loaded via TCP, but should load within process. [#4166](https://github.com/ClickHouse/ClickHouse/pull/4166) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed `No message received` error when interacting with PostgreSQL ODBC Driver through TLS connection. Also fixes segfault when using MySQL ODBC Driver. [#4170](https://github.com/ClickHouse/ClickHouse/pull/4170) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Temporarily disable predicate optimization for `ORDER BY`. [#3890](https://github.com/ClickHouse/ClickHouse/pull/3890) ([Winter Zhang](https://github.com/zhang2014)) -* Fixed infinite loop when selecting from table function `numbers(0)`. [#4280](https://github.com/ClickHouse/ClickHouse/pull/4280) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed `compile_expressions` bug with comparison of big (more than int16) dates. [#4341](https://github.com/ClickHouse/ClickHouse/pull/4341) ([alesapin](https://github.com/alesapin)) -* Fixed segmentation fault with `uncompressed_cache=1` and exception with wrong uncompressed size. [#4186](https://github.com/ClickHouse/ClickHouse/pull/4186) ([alesapin](https://github.com/alesapin)) -* Fixed `ALL JOIN` with duplicates in right table. [#4184](https://github.com/ClickHouse/ClickHouse/pull/4184) ([Artem Zuikov](https://github.com/4ertus2)) -* Fixed wrong behaviour when doing `INSERT ... SELECT ... FROM file(...)` query and file has `CSVWithNames` or `TSVWIthNames` format and the first data row is missing. [#4297](https://github.com/ClickHouse/ClickHouse/pull/4297) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed aggregate functions execution with `Array(LowCardinality)` arguments. [#4055](https://github.com/ClickHouse/ClickHouse/pull/4055) ([KochetovNicolai](https://github.com/KochetovNicolai)) -* Debian package: correct /etc/clickhouse-server/preprocessed link according to config. [#4205](https://github.com/ClickHouse/ClickHouse/pull/4205) ([proller](https://github.com/proller)) -* Fixed fuzz test under undefined behavior sanitizer: added parameter type check for `quantile*Weighted` family of functions. [#4145](https://github.com/ClickHouse/ClickHouse/pull/4145) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Make `START REPLICATED SENDS` command start replicated sends. [#4229](https://github.com/ClickHouse/ClickHouse/pull/4229) ([nvartolomei](https://github.com/nvartolomei)) -* Fixed `Not found column` for duplicate columns in JOIN ON section. [#4279](https://github.com/ClickHouse/ClickHouse/pull/4279) ([Artem Zuikov](https://github.com/4ertus2)) -* Now `/etc/ssl` is used as default directory with SSL certificates. [#4167](https://github.com/ClickHouse/ClickHouse/pull/4167) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed crash on dictionary reload if dictionary not available. [#4188](https://github.com/ClickHouse/ClickHouse/pull/4188) ([proller](https://github.com/proller)) -* Fixed bug with incorrect `Date` and `DateTime` comparison. [#4237](https://github.com/ClickHouse/ClickHouse/pull/4237) ([valexey](https://github.com/valexey)) -* Fixed incorrect result when `Date` and `DateTime` arguments are used in branches of conditional operator (function `if`). Added generic case for function `if`. [#4243](https://github.com/ClickHouse/ClickHouse/pull/4243) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.1.6, 2019-01-24 - -#### New Features - -* Custom per column compression codecs for tables. [#3899](https://github.com/ClickHouse/ClickHouse/pull/3899) [#4111](https://github.com/ClickHouse/ClickHouse/pull/4111) ([alesapin](https://github.com/alesapin), [Winter Zhang](https://github.com/zhang2014), [Anatoly](https://github.com/Sindbag)) -* Added compression codec `Delta`. [#4052](https://github.com/ClickHouse/ClickHouse/pull/4052) ([alesapin](https://github.com/alesapin)) -* Allow to `ALTER` compression codecs. [#4054](https://github.com/ClickHouse/ClickHouse/pull/4054) ([alesapin](https://github.com/alesapin)) -* Added functions `left`, `right`, `trim`, `ltrim`, `rtrim`, `timestampadd`, `timestampsub` for SQL standard compatibility. [#3826](https://github.com/ClickHouse/ClickHouse/pull/3826) ([Ivan Blinkov](https://github.com/blinkov)) -* Support for write in `HDFS` tables and `hdfs` table function. [#4084](https://github.com/ClickHouse/ClickHouse/pull/4084) ([alesapin](https://github.com/alesapin)) -* Added functions to search for multiple constant strings from big haystack: `multiPosition`, `multiSearch` ,`firstMatch` also with `-UTF8`, `-CaseInsensitive`, and `-CaseInsensitiveUTF8` variants. [#4053](https://github.com/ClickHouse/ClickHouse/pull/4053) ([Danila Kutenin](https://github.com/danlark1)) -* Pruning of unused shards if `SELECT` query filters by sharding key (setting `optimize_skip_unused_shards`). [#3851](https://github.com/ClickHouse/ClickHouse/pull/3851) ([Gleb Kanterov](https://github.com/kanterov), [Ivan](https://github.com/abyss7)) -* Allow `Kafka` engine to ignore some number of parsing errors per block. [#4094](https://github.com/ClickHouse/ClickHouse/pull/4094) ([Ivan](https://github.com/abyss7)) -* Added support for `CatBoost` multiclass models evaluation. Function `modelEvaluate` returns tuple with per-class raw predictions for multiclass models. `libcatboostmodel.so` should be built with [#607](https://github.com/catboost/catboost/pull/607). [#3959](https://github.com/ClickHouse/ClickHouse/pull/3959) ([KochetovNicolai](https://github.com/KochetovNicolai)) -* Added functions `filesystemAvailable`, `filesystemFree`, `filesystemCapacity`. [#4097](https://github.com/ClickHouse/ClickHouse/pull/4097) ([Boris Granveaud](https://github.com/bgranvea)) -* Added hashing functions `xxHash64` and `xxHash32`. [#3905](https://github.com/ClickHouse/ClickHouse/pull/3905) ([filimonov](https://github.com/filimonov)) -* Added `gccMurmurHash` hashing function (GCC flavoured Murmur hash) which uses the same hash seed as [gcc](https://github.com/gcc-mirror/gcc/blob/41d6b10e96a1de98e90a7c0378437c3255814b16/libstdc%2B%2B-v3/include/bits/functional_hash.h#L191) [#4000](https://github.com/ClickHouse/ClickHouse/pull/4000) ([sundyli](https://github.com/sundy-li)) -* Added hashing functions `javaHash`, `hiveHash`. [#3811](https://github.com/ClickHouse/ClickHouse/pull/3811) ([shangshujie365](https://github.com/shangshujie365)) -* Added table function `remoteSecure`. Function works as `remote`, but uses secure connection. [#4088](https://github.com/ClickHouse/ClickHouse/pull/4088) ([proller](https://github.com/proller)) - - -#### Experimental features - -* Added multiple JOINs emulation (`allow_experimental_multiple_joins_emulation` setting). [#3946](https://github.com/ClickHouse/ClickHouse/pull/3946) ([Artem Zuikov](https://github.com/4ertus2)) - - -#### Bug Fixes - -* Make `compiled_expression_cache_size` setting limited by default to lower memory consumption. [#4041](https://github.com/ClickHouse/ClickHouse/pull/4041) ([alesapin](https://github.com/alesapin)) -* Fix a bug that led to hangups in threads that perform ALTERs of Replicated tables and in the thread that updates configuration from ZooKeeper. [#2947](https://github.com/ClickHouse/ClickHouse/issues/2947) [#3891](https://github.com/ClickHouse/ClickHouse/issues/3891) [#3934](https://github.com/ClickHouse/ClickHouse/pull/3934) ([Alex Zatelepin](https://github.com/ztlpn)) -* Fixed a race condition when executing a distributed ALTER task. The race condition led to more than one replica trying to execute the task and all replicas except one failing with a ZooKeeper error. [#3904](https://github.com/ClickHouse/ClickHouse/pull/3904) ([Alex Zatelepin](https://github.com/ztlpn)) -* Fix a bug when `from_zk` config elements weren't refreshed after a request to ZooKeeper timed out. [#2947](https://github.com/ClickHouse/ClickHouse/issues/2947) [#3947](https://github.com/ClickHouse/ClickHouse/pull/3947) ([Alex Zatelepin](https://github.com/ztlpn)) -* Fix bug with wrong prefix for IPv4 subnet masks. [#3945](https://github.com/ClickHouse/ClickHouse/pull/3945) ([alesapin](https://github.com/alesapin)) -* Fixed crash (`std::terminate`) in rare cases when a new thread cannot be created due to exhausted resources. [#3956](https://github.com/ClickHouse/ClickHouse/pull/3956) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix bug when in `remote` table function execution when wrong restrictions were used for in `getStructureOfRemoteTable`. [#4009](https://github.com/ClickHouse/ClickHouse/pull/4009) ([alesapin](https://github.com/alesapin)) -* Fix a leak of netlink sockets. They were placed in a pool where they were never deleted and new sockets were created at the start of a new thread when all current sockets were in use. [#4017](https://github.com/ClickHouse/ClickHouse/pull/4017) ([Alex Zatelepin](https://github.com/ztlpn)) -* Fix bug with closing `/proc/self/fd` directory earlier than all fds were read from `/proc` after forking `odbc-bridge` subprocess. [#4120](https://github.com/ClickHouse/ClickHouse/pull/4120) ([alesapin](https://github.com/alesapin)) -* Fixed String to UInt monotonic conversion in case of usage String in primary key. [#3870](https://github.com/ClickHouse/ClickHouse/pull/3870) ([Winter Zhang](https://github.com/zhang2014)) -* Fixed error in calculation of integer conversion function monotonicity. [#3921](https://github.com/ClickHouse/ClickHouse/pull/3921) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed segfault in `arrayEnumerateUniq`, `arrayEnumerateDense` functions in case of some invalid arguments. [#3909](https://github.com/ClickHouse/ClickHouse/pull/3909) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix UB in StorageMerge. [#3910](https://github.com/ClickHouse/ClickHouse/pull/3910) ([Amos Bird](https://github.com/amosbird)) -* Fixed segfault in functions `addDays`, `subtractDays`. [#3913](https://github.com/ClickHouse/ClickHouse/pull/3913) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed error: functions `round`, `floor`, `trunc`, `ceil` may return bogus result when executed on integer argument and large negative scale. [#3914](https://github.com/ClickHouse/ClickHouse/pull/3914) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed a bug induced by 'kill query sync' which leads to a core dump. [#3916](https://github.com/ClickHouse/ClickHouse/pull/3916) ([muVulDeePecker](https://github.com/fancyqlx)) -* Fix bug with long delay after empty replication queue. [#3928](https://github.com/ClickHouse/ClickHouse/pull/3928) [#3932](https://github.com/ClickHouse/ClickHouse/pull/3932) ([alesapin](https://github.com/alesapin)) -* Fixed excessive memory usage in case of inserting into table with `LowCardinality` primary key. [#3955](https://github.com/ClickHouse/ClickHouse/pull/3955) ([KochetovNicolai](https://github.com/KochetovNicolai)) -* Fixed `LowCardinality` serialization for `Native` format in case of empty arrays. [#3907](https://github.com/ClickHouse/ClickHouse/issues/3907) [#4011](https://github.com/ClickHouse/ClickHouse/pull/4011) ([KochetovNicolai](https://github.com/KochetovNicolai)) -* Fixed incorrect result while using distinct by single LowCardinality numeric column. [#3895](https://github.com/ClickHouse/ClickHouse/issues/3895) [#4012](https://github.com/ClickHouse/ClickHouse/pull/4012) ([KochetovNicolai](https://github.com/KochetovNicolai)) -* Fixed specialized aggregation with LowCardinality key (in case when `compile` setting is enabled). [#3886](https://github.com/ClickHouse/ClickHouse/pull/3886) ([KochetovNicolai](https://github.com/KochetovNicolai)) -* Fix user and password forwarding for replicated tables queries. [#3957](https://github.com/ClickHouse/ClickHouse/pull/3957) ([alesapin](https://github.com/alesapin)) ([小路](https://github.com/nicelulu)) -* Fixed very rare race condition that can happen when listing tables in Dictionary database while reloading dictionaries. [#3970](https://github.com/ClickHouse/ClickHouse/pull/3970) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed incorrect result when HAVING was used with ROLLUP or CUBE. [#3756](https://github.com/ClickHouse/ClickHouse/issues/3756) [#3837](https://github.com/ClickHouse/ClickHouse/pull/3837) ([Sam Chou](https://github.com/reflection)) -* Fixed column aliases for query with `JOIN ON` syntax and distributed tables. [#3980](https://github.com/ClickHouse/ClickHouse/pull/3980) ([Winter Zhang](https://github.com/zhang2014)) -* Fixed error in internal implementation of `quantileTDigest` (found by Artem Vakhrushev). This error never happens in ClickHouse and was relevant only for those who use ClickHouse codebase as a library directly. [#3935](https://github.com/ClickHouse/ClickHouse/pull/3935) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Improvements - -* Support for `IF NOT EXISTS` in `ALTER TABLE ADD COLUMN` statements along with `IF EXISTS` in `DROP/MODIFY/CLEAR/COMMENT COLUMN`. [#3900](https://github.com/ClickHouse/ClickHouse/pull/3900) ([Boris Granveaud](https://github.com/bgranvea)) -* Function `parseDateTimeBestEffort`: support for formats `DD.MM.YYYY`, `DD.MM.YY`, `DD-MM-YYYY`, `DD-Mon-YYYY`, `DD/Month/YYYY` and similar. [#3922](https://github.com/ClickHouse/ClickHouse/pull/3922) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* `CapnProtoInputStream` now support jagged structures. [#4063](https://github.com/ClickHouse/ClickHouse/pull/4063) ([Odin Hultgren Van Der Horst](https://github.com/Miniwoffer)) -* Usability improvement: added a check that server process is started from the data directory's owner. Do not allow to start server from root if the data belongs to non-root user. [#3785](https://github.com/ClickHouse/ClickHouse/pull/3785) ([sergey-v-galtsev](https://github.com/sergey-v-galtsev)) -* Better logic of checking required columns during analysis of queries with JOINs. [#3930](https://github.com/ClickHouse/ClickHouse/pull/3930) ([Artem Zuikov](https://github.com/4ertus2)) -* Decreased the number of connections in case of large number of Distributed tables in a single server. [#3726](https://github.com/ClickHouse/ClickHouse/pull/3726) ([Winter Zhang](https://github.com/zhang2014)) -* Supported totals row for `WITH TOTALS` query for ODBC driver. [#3836](https://github.com/ClickHouse/ClickHouse/pull/3836) ([Maksim Koritckiy](https://github.com/nightweb)) -* Allowed to use `Enum`s as integers inside if function. [#3875](https://github.com/ClickHouse/ClickHouse/pull/3875) ([Ivan](https://github.com/abyss7)) -* Added `low_cardinality_allow_in_native_format` setting. If disabled, do not use `LowCadrinality` type in `Native` format. [#3879](https://github.com/ClickHouse/ClickHouse/pull/3879) ([KochetovNicolai](https://github.com/KochetovNicolai)) -* Removed some redundant objects from compiled expressions cache to lower memory usage. [#4042](https://github.com/ClickHouse/ClickHouse/pull/4042) ([alesapin](https://github.com/alesapin)) -* Add check that `SET send_logs_level = 'value'` query accept appropriate value. [#3873](https://github.com/ClickHouse/ClickHouse/pull/3873) ([Sabyanin Maxim](https://github.com/s-mx)) -* Fixed data type check in type conversion functions. [#3896](https://github.com/ClickHouse/ClickHouse/pull/3896) ([Winter Zhang](https://github.com/zhang2014)) - -#### Performance Improvements - -* Add a MergeTree setting `use_minimalistic_part_header_in_zookeeper`. If enabled, Replicated tables will store compact part metadata in a single part znode. This can dramatically reduce ZooKeeper snapshot size (especially if the tables have a lot of columns). Note that after enabling this setting you will not be able to downgrade to a version that doesn't support it. [#3960](https://github.com/ClickHouse/ClickHouse/pull/3960) ([Alex Zatelepin](https://github.com/ztlpn)) -* Add an DFA-based implementation for functions `sequenceMatch` and `sequenceCount` in case pattern doesn't contain time. [#4004](https://github.com/ClickHouse/ClickHouse/pull/4004) ([Léo Ercolanelli](https://github.com/ercolanelli-leo)) -* Performance improvement for integer numbers serialization. [#3968](https://github.com/ClickHouse/ClickHouse/pull/3968) ([Amos Bird](https://github.com/amosbird)) -* Zero left padding PODArray so that -1 element is always valid and zeroed. It's used for branchless calculation of offsets. [#3920](https://github.com/ClickHouse/ClickHouse/pull/3920) ([Amos Bird](https://github.com/amosbird)) -* Reverted `jemalloc` version which lead to performance degradation. [#4018](https://github.com/ClickHouse/ClickHouse/pull/4018) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Backward Incompatible Changes - -* Removed undocumented feature `ALTER MODIFY PRIMARY KEY` because it was superseded by the `ALTER MODIFY ORDER BY` command. [#3887](https://github.com/ClickHouse/ClickHouse/pull/3887) ([Alex Zatelepin](https://github.com/ztlpn)) -* Removed function `shardByHash`. [#3833](https://github.com/ClickHouse/ClickHouse/pull/3833) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Forbid using scalar subqueries with result of type `AggregateFunction`. [#3865](https://github.com/ClickHouse/ClickHouse/pull/3865) ([Ivan](https://github.com/abyss7)) - -#### Build/Testing/Packaging Improvements - -* Added support for PowerPC (`ppc64le`) build. [#4132](https://github.com/ClickHouse/ClickHouse/pull/4132) ([Danila Kutenin](https://github.com/danlark1)) -* Stateful functional tests are run on public available dataset. [#3969](https://github.com/ClickHouse/ClickHouse/pull/3969) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed error when the server cannot start with the `bash: /usr/bin/clickhouse-extract-from-config: Operation not permitted` message within Docker or systemd-nspawn. [#4136](https://github.com/ClickHouse/ClickHouse/pull/4136) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Updated `rdkafka` library to v1.0.0-RC5. Used cppkafka instead of raw C interface. [#4025](https://github.com/ClickHouse/ClickHouse/pull/4025) ([Ivan](https://github.com/abyss7)) -* Updated `mariadb-client` library. Fixed one of issues found by UBSan. [#3924](https://github.com/ClickHouse/ClickHouse/pull/3924) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Some fixes for UBSan builds. [#3926](https://github.com/ClickHouse/ClickHouse/pull/3926) [#3021](https://github.com/ClickHouse/ClickHouse/pull/3021) [#3948](https://github.com/ClickHouse/ClickHouse/pull/3948) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Added per-commit runs of tests with UBSan build. -* Added per-commit runs of PVS-Studio static analyzer. -* Fixed bugs found by PVS-Studio. [#4013](https://github.com/ClickHouse/ClickHouse/pull/4013) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed glibc compatibility issues. [#4100](https://github.com/ClickHouse/ClickHouse/pull/4100) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Move Docker images to 18.10 and add compatibility file for glibc >= 2.28 [#3965](https://github.com/ClickHouse/ClickHouse/pull/3965) ([alesapin](https://github.com/alesapin)) -* Add env variable if user don't want to chown directories in server Docker image. [#3967](https://github.com/ClickHouse/ClickHouse/pull/3967) ([alesapin](https://github.com/alesapin)) -* Enabled most of the warnings from `-Weverything` in clang. Enabled `-Wpedantic`. [#3986](https://github.com/ClickHouse/ClickHouse/pull/3986) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Added a few more warnings that are available only in clang 8. [#3993](https://github.com/ClickHouse/ClickHouse/pull/3993) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Link to `libLLVM` rather than to individual LLVM libs when using shared linking. [#3989](https://github.com/ClickHouse/ClickHouse/pull/3989) ([Orivej Desh](https://github.com/orivej)) -* Added sanitizer variables for test images. [#4072](https://github.com/ClickHouse/ClickHouse/pull/4072) ([alesapin](https://github.com/alesapin)) -* `clickhouse-server` debian package will recommend `libcap2-bin` package to use `setcap` tool for setting capabilities. This is optional. [#4093](https://github.com/ClickHouse/ClickHouse/pull/4093) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Improved compilation time, fixed includes. [#3898](https://github.com/ClickHouse/ClickHouse/pull/3898) ([proller](https://github.com/proller)) -* Added performance tests for hash functions. [#3918](https://github.com/ClickHouse/ClickHouse/pull/3918) ([filimonov](https://github.com/filimonov)) -* Fixed cyclic library dependences. [#3958](https://github.com/ClickHouse/ClickHouse/pull/3958) ([proller](https://github.com/proller)) -* Improved compilation with low available memory. [#4030](https://github.com/ClickHouse/ClickHouse/pull/4030) ([proller](https://github.com/proller)) -* Added test script to reproduce performance degradation in `jemalloc`. [#4036](https://github.com/ClickHouse/ClickHouse/pull/4036) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed misspells in comments and string literals under `dbms`. [#4122](https://github.com/ClickHouse/ClickHouse/pull/4122) ([maiha](https://github.com/maiha)) -* Fixed typos in comments. [#4089](https://github.com/ClickHouse/ClickHouse/pull/4089) ([Evgenii Pravda](https://github.com/kvinty)) - - -## ClickHouse release 18.16 -### ClickHouse release 18.16.1, 2018-12-21 - -#### Bug fixes: - -* Fixed an error that led to problems with updating dictionaries with the ODBC source. [#3825](https://github.com/ClickHouse/ClickHouse/issues/3825), [#3829](https://github.com/ClickHouse/ClickHouse/issues/3829) -* JIT compilation of aggregate functions now works with LowCardinality columns. [#3838](https://github.com/ClickHouse/ClickHouse/issues/3838) - -#### Improvements: - -* Added the `low_cardinality_allow_in_native_format` setting (enabled by default). When disabled, LowCardinality columns will be converted to ordinary columns for SELECT queries and ordinary columns will be expected for INSERT queries. [#3879](https://github.com/ClickHouse/ClickHouse/pull/3879) - -#### Build improvements: - -* Fixes for builds on macOS and ARM. - -### ClickHouse release 18.16.0, 2018-12-14 - -#### New features: - -* `DEFAULT` expressions are evaluated for missing fields when loading data in semi-structured input formats (`JSONEachRow`, `TSKV`). The feature is enabled with the `insert_sample_with_metadata` setting. [#3555](https://github.com/ClickHouse/ClickHouse/pull/3555) -* The `ALTER TABLE` query now has the `MODIFY ORDER BY` action for changing the sorting key when adding or removing a table column. This is useful for tables in the `MergeTree` family that perform additional tasks when merging based on this sorting key, such as `SummingMergeTree`, `AggregatingMergeTree`, and so on. [#3581](https://github.com/ClickHouse/ClickHouse/pull/3581) [#3755](https://github.com/ClickHouse/ClickHouse/pull/3755) -* For tables in the `MergeTree` family, now you can specify a different sorting key (`ORDER BY`) and index (`PRIMARY KEY`). The sorting key can be longer than the index. [#3581](https://github.com/ClickHouse/ClickHouse/pull/3581) -* Added the `hdfs` table function and the `HDFS` table engine for importing and exporting data to HDFS. [chenxing-xc](https://github.com/ClickHouse/ClickHouse/pull/3617) -* Added functions for working with base64: `base64Encode`, `base64Decode`, `tryBase64Decode`. [Alexander Krasheninnikov](https://github.com/ClickHouse/ClickHouse/pull/3350) -* Now you can use a parameter to configure the precision of the `uniqCombined` aggregate function (select the number of HyperLogLog cells). [#3406](https://github.com/ClickHouse/ClickHouse/pull/3406) -* Added the `system.contributors` table that contains the names of everyone who made commits in ClickHouse. [#3452](https://github.com/ClickHouse/ClickHouse/pull/3452) -* Added the ability to omit the partition for the `ALTER TABLE ... FREEZE` query in order to back up all partitions at once. [#3514](https://github.com/ClickHouse/ClickHouse/pull/3514) -* Added `dictGet` and `dictGetOrDefault` functions that don't require specifying the type of return value. The type is determined automatically from the dictionary description. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3564) -* Now you can specify comments for a column in the table description and change it using `ALTER`. [#3377](https://github.com/ClickHouse/ClickHouse/pull/3377) -* Reading is supported for `Join` type tables with simple keys. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3728) -* Now you can specify the options `join_use_nulls`, `max_rows_in_join`, `max_bytes_in_join`, and `join_overflow_mode` when creating a `Join` type table. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3728) -* Added the `joinGet` function that allows you to use a `Join` type table like a dictionary. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3728) -* Added the `partition_key`, `sorting_key`, `primary_key`, and `sampling_key` columns to the `system.tables` table in order to provide information about table keys. [#3609](https://github.com/ClickHouse/ClickHouse/pull/3609) -* Added the `is_in_partition_key`, `is_in_sorting_key`, `is_in_primary_key`, and `is_in_sampling_key` columns to the `system.columns` table. [#3609](https://github.com/ClickHouse/ClickHouse/pull/3609) -* Added the `min_time` and `max_time` columns to the `system.parts` table. These columns are populated when the partitioning key is an expression consisting of `DateTime` columns. [Emmanuel Donin de Rosière](https://github.com/ClickHouse/ClickHouse/pull/3800) - -#### Bug fixes: - -* Fixes and performance improvements for the `LowCardinality` data type. `GROUP BY` using `LowCardinality(Nullable(...))`. Getting the values of `extremes`. Processing high-order functions. `LEFT ARRAY JOIN`. Distributed `GROUP BY`. Functions that return `Array`. Execution of `ORDER BY`. Writing to `Distributed` tables (nicelulu). Backward compatibility for `INSERT` queries from old clients that implement the `Native` protocol. Support for `LowCardinality` for `JOIN`. Improved performance when working in a single stream. [#3823](https://github.com/ClickHouse/ClickHouse/pull/3823) [#3803](https://github.com/ClickHouse/ClickHouse/pull/3803) [#3799](https://github.com/ClickHouse/ClickHouse/pull/3799) [#3769](https://github.com/ClickHouse/ClickHouse/pull/3769) [#3744](https://github.com/ClickHouse/ClickHouse/pull/3744) [#3681](https://github.com/ClickHouse/ClickHouse/pull/3681) [#3651](https://github.com/ClickHouse/ClickHouse/pull/3651) [#3649](https://github.com/ClickHouse/ClickHouse/pull/3649) [#3641](https://github.com/ClickHouse/ClickHouse/pull/3641) [#3632](https://github.com/ClickHouse/ClickHouse/pull/3632) [#3568](https://github.com/ClickHouse/ClickHouse/pull/3568) [#3523](https://github.com/ClickHouse/ClickHouse/pull/3523) [#3518](https://github.com/ClickHouse/ClickHouse/pull/3518) -* Fixed how the `select_sequential_consistency` option works. Previously, when this setting was enabled, an incomplete result was sometimes returned after beginning to write to a new partition. [#2863](https://github.com/ClickHouse/ClickHouse/pull/2863) -* Databases are correctly specified when executing DDL `ON CLUSTER` queries and `ALTER UPDATE/DELETE`. [#3772](https://github.com/ClickHouse/ClickHouse/pull/3772) [#3460](https://github.com/ClickHouse/ClickHouse/pull/3460) -* Databases are correctly specified for subqueries inside a VIEW. [#3521](https://github.com/ClickHouse/ClickHouse/pull/3521) -* Fixed a bug in `PREWHERE` with `FINAL` for `VersionedCollapsingMergeTree`. [7167bfd7](https://github.com/ClickHouse/ClickHouse/commit/7167bfd7b365538f7a91c4307ad77e552ab4e8c1) -* Now you can use `KILL QUERY` to cancel queries that have not started yet because they are waiting for the table to be locked. [#3517](https://github.com/ClickHouse/ClickHouse/pull/3517) -* Corrected date and time calculations if the clocks were moved back at midnight (this happens in Iran, and happened in Moscow from 1981 to 1983). Previously, this led to the time being reset a day earlier than necessary, and also caused incorrect formatting of the date and time in text format. [#3819](https://github.com/ClickHouse/ClickHouse/pull/3819) -* Fixed bugs in some cases of `VIEW` and subqueries that omit the database. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3521) -* Fixed a race condition when simultaneously reading from a `MATERIALIZED VIEW` and deleting a `MATERIALIZED VIEW` due to not locking the internal `MATERIALIZED VIEW`. [#3404](https://github.com/ClickHouse/ClickHouse/pull/3404) [#3694](https://github.com/ClickHouse/ClickHouse/pull/3694) -* Fixed the error `Lock handler cannot be nullptr.` [#3689](https://github.com/ClickHouse/ClickHouse/pull/3689) -* Fixed query processing when the `compile_expressions` option is enabled (it's enabled by default). Nondeterministic constant expressions like the `now` function are no longer unfolded. [#3457](https://github.com/ClickHouse/ClickHouse/pull/3457) -* Fixed a crash when specifying a non-constant scale argument in `toDecimal32/64/128` functions. -* Fixed an error when trying to insert an array with `NULL` elements in the `Values` format into a column of type `Array` without `Nullable` (if `input_format_values_interpret_expressions` = 1). [#3487](https://github.com/ClickHouse/ClickHouse/pull/3487) [#3503](https://github.com/ClickHouse/ClickHouse/pull/3503) -* Fixed continuous error logging in `DDLWorker` if ZooKeeper is not available. [8f50c620](https://github.com/ClickHouse/ClickHouse/commit/8f50c620334988b28018213ec0092fe6423847e2) -* Fixed the return type for `quantile*` functions from `Date` and `DateTime` types of arguments. [#3580](https://github.com/ClickHouse/ClickHouse/pull/3580) -* Fixed the `WITH` clause if it specifies a simple alias without expressions. [#3570](https://github.com/ClickHouse/ClickHouse/pull/3570) -* Fixed processing of queries with named sub-queries and qualified column names when `enable_optimize_predicate_expression` is enabled. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3588) -* Fixed the error `Attempt to attach to nullptr thread group` when working with materialized views. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3623) -* Fixed a crash when passing certain incorrect arguments to the `arrayReverse` function. [73e3a7b6](https://github.com/ClickHouse/ClickHouse/commit/73e3a7b662161d6005e7727d8a711b930386b871) -* Fixed the buffer overflow in the `extractURLParameter` function. Improved performance. Added correct processing of strings containing zero bytes. [141e9799](https://github.com/ClickHouse/ClickHouse/commit/141e9799e49201d84ea8e951d1bed4fb6d3dacb5) -* Fixed buffer overflow in the `lowerUTF8` and `upperUTF8` functions. Removed the ability to execute these functions over `FixedString` type arguments. [#3662](https://github.com/ClickHouse/ClickHouse/pull/3662) -* Fixed a rare race condition when deleting `MergeTree` tables. [#3680](https://github.com/ClickHouse/ClickHouse/pull/3680) -* Fixed a race condition when reading from `Buffer` tables and simultaneously performing `ALTER` or `DROP` on the target tables. [#3719](https://github.com/ClickHouse/ClickHouse/pull/3719) -* Fixed a segfault if the `max_temporary_non_const_columns` limit was exceeded. [#3788](https://github.com/ClickHouse/ClickHouse/pull/3788) - -#### Improvements: - -* The server does not write the processed configuration files to the `/etc/clickhouse-server/` directory. Instead, it saves them in the `preprocessed_configs` directory inside `path`. This means that the `/etc/clickhouse-server/` directory doesn't have write access for the `clickhouse` user, which improves security. [#2443](https://github.com/ClickHouse/ClickHouse/pull/2443) -* The `min_merge_bytes_to_use_direct_io` option is set to 10 GiB by default. A merge that forms large parts of tables from the MergeTree family will be performed in `O_DIRECT` mode, which prevents excessive page cache eviction. [#3504](https://github.com/ClickHouse/ClickHouse/pull/3504) -* Accelerated server start when there is a very large number of tables. [#3398](https://github.com/ClickHouse/ClickHouse/pull/3398) -* Added a connection pool and HTTP `Keep-Alive` for connections between replicas. [#3594](https://github.com/ClickHouse/ClickHouse/pull/3594) -* If the query syntax is invalid, the `400 Bad Request` code is returned in the `HTTP` interface (500 was returned previously). [31bc680a](https://github.com/ClickHouse/ClickHouse/commit/31bc680ac5f4bb1d0360a8ba4696fa84bb47d6ab) -* The `join_default_strictness` option is set to `ALL` by default for compatibility. [120e2cbe](https://github.com/ClickHouse/ClickHouse/commit/120e2cbe2ff4fbad626c28042d9b28781c805afe) -* Removed logging to `stderr` from the `re2` library for invalid or complex regular expressions. [#3723](https://github.com/ClickHouse/ClickHouse/pull/3723) -* Added for the `Kafka` table engine: checks for subscriptions before beginning to read from Kafka; the kafka_max_block_size setting for the table. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3396) -* The `cityHash64`, `farmHash64`, `metroHash64`, `sipHash64`, `halfMD5`, `murmurHash2_32`, `murmurHash2_64`, `murmurHash3_32`, and `murmurHash3_64` functions now work for any number of arguments and for arguments in the form of tuples. [#3451](https://github.com/ClickHouse/ClickHouse/pull/3451) [#3519](https://github.com/ClickHouse/ClickHouse/pull/3519) -* The `arrayReverse` function now works with any types of arrays. [73e3a7b6](https://github.com/ClickHouse/ClickHouse/commit/73e3a7b662161d6005e7727d8a711b930386b871) -* Added an optional parameter: the slot size for the `timeSlots` function. [Kirill Shvakov](https://github.com/ClickHouse/ClickHouse/pull/3724) -* For `FULL` and `RIGHT JOIN`, the `max_block_size` setting is used for a stream of non-joined data from the right table. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3699) -* Added the `--secure` command line parameter in `clickhouse-benchmark` and `clickhouse-performance-test` to enable TLS. [#3688](https://github.com/ClickHouse/ClickHouse/pull/3688) [#3690](https://github.com/ClickHouse/ClickHouse/pull/3690) -* Type conversion when the structure of a `Buffer` type table does not match the structure of the destination table. [Vitaly Baranov](https://github.com/ClickHouse/ClickHouse/pull/3603) -* Added the `tcp_keep_alive_timeout` option to enable keep-alive packets after inactivity for the specified time interval. [#3441](https://github.com/ClickHouse/ClickHouse/pull/3441) -* Removed unnecessary quoting of values for the partition key in the `system.parts` table if it consists of a single column. [#3652](https://github.com/ClickHouse/ClickHouse/pull/3652) -* The modulo function works for `Date` and `DateTime` data types. [#3385](https://github.com/ClickHouse/ClickHouse/pull/3385) -* Added synonyms for the `POWER`, `LN`, `LCASE`, `UCASE`, `REPLACE`, `LOCATE`, `SUBSTR`, and `MID` functions. [#3774](https://github.com/ClickHouse/ClickHouse/pull/3774) [#3763](https://github.com/ClickHouse/ClickHouse/pull/3763) Some function names are case-insensitive for compatibility with the SQL standard. Added syntactic sugar `SUBSTRING(expr FROM start FOR length)` for compatibility with SQL. [#3804](https://github.com/ClickHouse/ClickHouse/pull/3804) -* Added the ability to `mlock` memory pages corresponding to `clickhouse-server` executable code to prevent it from being forced out of memory. This feature is disabled by default. [#3553](https://github.com/ClickHouse/ClickHouse/pull/3553) -* Improved performance when reading from `O_DIRECT` (with the `min_bytes_to_use_direct_io` option enabled). [#3405](https://github.com/ClickHouse/ClickHouse/pull/3405) -* Improved performance of the `dictGet...OrDefault` function for a constant key argument and a non-constant default argument. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3563) -* The `firstSignificantSubdomain` function now processes the domains `gov`, `mil`, and `edu`. [Igor Hatarist](https://github.com/ClickHouse/ClickHouse/pull/3601) Improved performance. [#3628](https://github.com/ClickHouse/ClickHouse/pull/3628) -* Ability to specify custom environment variables for starting `clickhouse-server` using the `SYS-V init.d` script by defining `CLICKHOUSE_PROGRAM_ENV` in `/etc/default/clickhouse`. -[Pavlo Bashynskyi](https://github.com/ClickHouse/ClickHouse/pull/3612) -* Correct return code for the clickhouse-server init script. [#3516](https://github.com/ClickHouse/ClickHouse/pull/3516) -* The `system.metrics` table now has the `VersionInteger` metric, and `system.build_options` has the added line `VERSION_INTEGER`, which contains the numeric form of the ClickHouse version, such as `18016000`. [#3644](https://github.com/ClickHouse/ClickHouse/pull/3644) -* Removed the ability to compare the `Date` type with a number to avoid potential errors like `date = 2018-12-17`, where quotes around the date are omitted by mistake. [#3687](https://github.com/ClickHouse/ClickHouse/pull/3687) -* Fixed the behavior of stateful functions like `rowNumberInAllBlocks`. They previously output a result that was one number larger due to starting during query analysis. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3729) -* If the `force_restore_data` file can't be deleted, an error message is displayed. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3794) - -#### Build improvements: - -* Updated the `jemalloc` library, which fixes a potential memory leak. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3557) -* Profiling with `jemalloc` is enabled by default in order to debug builds. [2cc82f5c](https://github.com/ClickHouse/ClickHouse/commit/2cc82f5cbe266421cd4c1165286c2c47e5ffcb15) -* Added the ability to run integration tests when only `Docker` is installed on the system. [#3650](https://github.com/ClickHouse/ClickHouse/pull/3650) -* Added the fuzz expression test in SELECT queries. [#3442](https://github.com/ClickHouse/ClickHouse/pull/3442) -* Added a stress test for commits, which performs functional tests in parallel and in random order to detect more race conditions. [#3438](https://github.com/ClickHouse/ClickHouse/pull/3438) -* Improved the method for starting clickhouse-server in a Docker image. [Elghazal Ahmed](https://github.com/ClickHouse/ClickHouse/pull/3663) -* For a Docker image, added support for initializing databases using files in the `/docker-entrypoint-initdb.d` directory. [Konstantin Lebedev](https://github.com/ClickHouse/ClickHouse/pull/3695) -* Fixes for builds on ARM. [#3709](https://github.com/ClickHouse/ClickHouse/pull/3709) - -#### Backward incompatible changes: - -* Removed the ability to compare the `Date` type with a number. Instead of `toDate('2018-12-18') = 17883`, you must use explicit type conversion `= toDate(17883)` [#3687](https://github.com/ClickHouse/ClickHouse/pull/3687) - -## ClickHouse release 18.14 -### ClickHouse release 18.14.19, 2018-12-19 - -#### Bug fixes: - -* Fixed an error that led to problems with updating dictionaries with the ODBC source. [#3825](https://github.com/ClickHouse/ClickHouse/issues/3825), [#3829](https://github.com/ClickHouse/ClickHouse/issues/3829) -* Databases are correctly specified when executing DDL `ON CLUSTER` queries. [#3460](https://github.com/ClickHouse/ClickHouse/pull/3460) -* Fixed a segfault if the `max_temporary_non_const_columns` limit was exceeded. [#3788](https://github.com/ClickHouse/ClickHouse/pull/3788) - -#### Build improvements: - -* Fixes for builds on ARM. - -### ClickHouse release 18.14.18, 2018-12-04 - -#### Bug fixes: -* Fixed error in `dictGet...` function for dictionaries of type `range`, if one of the arguments is constant and other is not. [#3751](https://github.com/ClickHouse/ClickHouse/pull/3751) -* Fixed error that caused messages `netlink: '...': attribute type 1 has an invalid length` to be printed in Linux kernel log, that was happening only on fresh enough versions of Linux kernel. [#3749](https://github.com/ClickHouse/ClickHouse/pull/3749) -* Fixed segfault in function `empty` for argument of `FixedString` type. [Daniel, Dao Quang Minh](https://github.com/ClickHouse/ClickHouse/pull/3703) -* Fixed excessive memory allocation when using large value of `max_query_size` setting (a memory chunk of `max_query_size` bytes was preallocated at once). [#3720](https://github.com/ClickHouse/ClickHouse/pull/3720) - -#### Build changes: -* Fixed build with LLVM/Clang libraries of version 7 from the OS packages (these libraries are used for runtime query compilation). [#3582](https://github.com/ClickHouse/ClickHouse/pull/3582) - -### ClickHouse release 18.14.17, 2018-11-30 - -#### Bug fixes: -* Fixed cases when the ODBC bridge process did not terminate with the main server process. [#3642](https://github.com/ClickHouse/ClickHouse/pull/3642) -* Fixed synchronous insertion into the `Distributed` table with a columns list that differs from the column list of the remote table. [#3673](https://github.com/ClickHouse/ClickHouse/pull/3673) -* Fixed a rare race condition that can lead to a crash when dropping a MergeTree table. [#3643](https://github.com/ClickHouse/ClickHouse/pull/3643) -* Fixed a query deadlock in case when query thread creation fails with the `Resource temporarily unavailable` error. [#3643](https://github.com/ClickHouse/ClickHouse/pull/3643) -* Fixed parsing of the `ENGINE` clause when the `CREATE AS table` syntax was used and the `ENGINE` clause was specified before the `AS table` (the error resulted in ignoring the specified engine). [#3692](https://github.com/ClickHouse/ClickHouse/pull/3692) - -### ClickHouse release 18.14.15, 2018-11-21 - -#### Bug fixes: -* The size of memory chunk was overestimated while deserializing the column of type `Array(String)` that leads to "Memory limit exceeded" errors. The issue appeared in version 18.12.13. [#3589](https://github.com/ClickHouse/ClickHouse/issues/3589) - -### ClickHouse release 18.14.14, 2018-11-20 - -#### Bug fixes: -* Fixed `ON CLUSTER` queries when cluster configured as secure (flag ``). [#3599](https://github.com/ClickHouse/ClickHouse/pull/3599) - -#### Build changes: -* Fixed problems (llvm-7 from system, macos) [#3582](https://github.com/ClickHouse/ClickHouse/pull/3582) - -### ClickHouse release 18.14.13, 2018-11-08 - -#### Bug fixes: -* Fixed the `Block structure mismatch in MergingSorted stream` error. [#3162](https://github.com/ClickHouse/ClickHouse/issues/3162) -* Fixed `ON CLUSTER` queries in case when secure connections were turned on in the cluster config (the `` flag). [#3465](https://github.com/ClickHouse/ClickHouse/pull/3465) -* Fixed an error in queries that used `SAMPLE`, `PREWHERE` and alias columns. [#3543](https://github.com/ClickHouse/ClickHouse/pull/3543) -* Fixed a rare `unknown compression method` error when the `min_bytes_to_use_direct_io` setting was enabled. [3544](https://github.com/ClickHouse/ClickHouse/pull/3544) - -#### Performance improvements: -* Fixed performance regression of queries with `GROUP BY` of columns of UInt16 or Date type when executing on AMD EPYC processors. [Igor Lapko](https://github.com/ClickHouse/ClickHouse/pull/3512) -* Fixed performance regression of queries that process long strings. [#3530](https://github.com/ClickHouse/ClickHouse/pull/3530) - -#### Build improvements: -* Improvements for simplifying the Arcadia build. [#3475](https://github.com/ClickHouse/ClickHouse/pull/3475), [#3535](https://github.com/ClickHouse/ClickHouse/pull/3535) - -### ClickHouse release 18.14.12, 2018-11-02 - -#### Bug fixes: - -* Fixed a crash on joining two unnamed subqueries. [#3505](https://github.com/ClickHouse/ClickHouse/pull/3505) -* Fixed generating incorrect queries (with an empty `WHERE` clause) when querying external databases. [hotid](https://github.com/ClickHouse/ClickHouse/pull/3477) -* Fixed using an incorrect timeout value in ODBC dictionaries. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3511) - -### ClickHouse release 18.14.11, 2018-10-29 - -#### Bug fixes: - -* Fixed the error `Block structure mismatch in UNION stream: different number of columns` in LIMIT queries. [#2156](https://github.com/ClickHouse/ClickHouse/issues/2156) -* Fixed errors when merging data in tables containing arrays inside Nested structures. [#3397](https://github.com/ClickHouse/ClickHouse/pull/3397) -* Fixed incorrect query results if the `merge_tree_uniform_read_distribution` setting is disabled (it is enabled by default). [#3429](https://github.com/ClickHouse/ClickHouse/pull/3429) -* Fixed an error on inserts to a Distributed table in Native format. [#3411](https://github.com/ClickHouse/ClickHouse/issues/3411) - -### ClickHouse release 18.14.10, 2018-10-23 - -* The `compile_expressions` setting (JIT compilation of expressions) is disabled by default. [#3410](https://github.com/ClickHouse/ClickHouse/pull/3410) -* The `enable_optimize_predicate_expression` setting is disabled by default. - -### ClickHouse release 18.14.9, 2018-10-16 - -#### New features: - -* The `WITH CUBE` modifier for `GROUP BY` (the alternative syntax `GROUP BY CUBE(...)` is also available). [#3172](https://github.com/ClickHouse/ClickHouse/pull/3172) -* Added the `formatDateTime` function. [Alexandr Krasheninnikov](https://github.com/ClickHouse/ClickHouse/pull/2770) -* Added the `JDBC` table engine and `jdbc` table function (requires installing clickhouse-jdbc-bridge). [Alexandr Krasheninnikov](https://github.com/ClickHouse/ClickHouse/pull/3210) -* Added functions for working with the ISO week number: `toISOWeek`, `toISOYear`, `toStartOfISOYear`, and `toDayOfYear`. [#3146](https://github.com/ClickHouse/ClickHouse/pull/3146) -* Now you can use `Nullable` columns for `MySQL` and `ODBC` tables. [#3362](https://github.com/ClickHouse/ClickHouse/pull/3362) -* Nested data structures can be read as nested objects in `JSONEachRow` format. Added the `input_format_import_nested_json` setting. [Veloman Yunkan](https://github.com/ClickHouse/ClickHouse/pull/3144) -* Parallel processing is available for many `MATERIALIZED VIEW`s when inserting data. See the `parallel_view_processing` setting. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3208) -* Added the `SYSTEM FLUSH LOGS` query (forced log flushes to system tables such as `query_log`) [#3321](https://github.com/ClickHouse/ClickHouse/pull/3321) -* Now you can use pre-defined `database` and `table` macros when declaring `Replicated` tables. [#3251](https://github.com/ClickHouse/ClickHouse/pull/3251) -* Added the ability to read `Decimal` type values in engineering notation (indicating powers of ten). [#3153](https://github.com/ClickHouse/ClickHouse/pull/3153) - -#### Experimental features: - -* Optimization of the GROUP BY clause for `LowCardinality data types.` [#3138](https://github.com/ClickHouse/ClickHouse/pull/3138) -* Optimized calculation of expressions for `LowCardinality data types.` [#3200](https://github.com/ClickHouse/ClickHouse/pull/3200) - -#### Improvements: - -* Significantly reduced memory consumption for queries with `ORDER BY` and `LIMIT`. See the `max_bytes_before_remerge_sort` setting. [#3205](https://github.com/ClickHouse/ClickHouse/pull/3205) -* In the absence of `JOIN` (`LEFT`, `INNER`, ...), `INNER JOIN` is assumed. [#3147](https://github.com/ClickHouse/ClickHouse/pull/3147) -* Qualified asterisks work correctly in queries with `JOIN`. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3202) -* The `ODBC` table engine correctly chooses the method for quoting identifiers in the SQL dialect of a remote database. [Alexandr Krasheninnikov](https://github.com/ClickHouse/ClickHouse/pull/3210) -* The `compile_expressions` setting (JIT compilation of expressions) is enabled by default. -* Fixed behavior for simultaneous DROP DATABASE/TABLE IF EXISTS and CREATE DATABASE/TABLE IF NOT EXISTS. Previously, a `CREATE DATABASE ... IF NOT EXISTS` query could return the error message "File ... already exists", and the `CREATE TABLE ... IF NOT EXISTS` and `DROP TABLE IF EXISTS` queries could return `Table ... is creating or attaching right now`. [#3101](https://github.com/ClickHouse/ClickHouse/pull/3101) -* LIKE and IN expressions with a constant right half are passed to the remote server when querying from MySQL or ODBC tables. [#3182](https://github.com/ClickHouse/ClickHouse/pull/3182) -* Comparisons with constant expressions in a WHERE clause are passed to the remote server when querying from MySQL and ODBC tables. Previously, only comparisons with constants were passed. [#3182](https://github.com/ClickHouse/ClickHouse/pull/3182) -* Correct calculation of row width in the terminal for `Pretty` formats, including strings with hieroglyphs. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3257). -* `ON CLUSTER` can be specified for `ALTER UPDATE` queries. -* Improved performance for reading data in `JSONEachRow` format. [#3332](https://github.com/ClickHouse/ClickHouse/pull/3332) -* Added synonyms for the `LENGTH` and `CHARACTER_LENGTH` functions for compatibility. The `CONCAT` function is no longer case-sensitive. [#3306](https://github.com/ClickHouse/ClickHouse/pull/3306) -* Added the `TIMESTAMP` synonym for the `DateTime` type. [#3390](https://github.com/ClickHouse/ClickHouse/pull/3390) -* There is always space reserved for query_id in the server logs, even if the log line is not related to a query. This makes it easier to parse server text logs with third-party tools. -* Memory consumption by a query is logged when it exceeds the next level of an integer number of gigabytes. [#3205](https://github.com/ClickHouse/ClickHouse/pull/3205) -* Added compatibility mode for the case when the client library that uses the Native protocol sends fewer columns by mistake than the server expects for the INSERT query. This scenario was possible when using the clickhouse-cpp library. Previously, this scenario caused the server to crash. [#3171](https://github.com/ClickHouse/ClickHouse/pull/3171) -* In a user-defined WHERE expression in `clickhouse-copier`, you can now use a `partition_key` alias (for additional filtering by source table partition). This is useful if the partitioning scheme changes during copying, but only changes slightly. [#3166](https://github.com/ClickHouse/ClickHouse/pull/3166) -* The workflow of the `Kafka` engine has been moved to a background thread pool in order to automatically reduce the speed of data reading at high loads. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3215). -* Support for reading `Tuple` and `Nested` values of structures like `struct` in the `Cap'n'Proto format`. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3216) -* The list of top-level domains for the `firstSignificantSubdomain` function now includes the domain `biz`. [decaseal](https://github.com/ClickHouse/ClickHouse/pull/3219) -* In the configuration of external dictionaries, `null_value` is interpreted as the value of the default data type. [#3330](https://github.com/ClickHouse/ClickHouse/pull/3330) -* Support for the `intDiv` and `intDivOrZero` functions for `Decimal`. [b48402e8](https://github.com/ClickHouse/ClickHouse/commit/b48402e8712e2b9b151e0eef8193811d433a1264) -* Support for the `Date`, `DateTime`, `UUID`, and `Decimal` types as a key for the `sumMap` aggregate function. [#3281](https://github.com/ClickHouse/ClickHouse/pull/3281) -* Support for the `Decimal` data type in external dictionaries. [#3324](https://github.com/ClickHouse/ClickHouse/pull/3324) -* Support for the `Decimal` data type in `SummingMergeTree` tables. [#3348](https://github.com/ClickHouse/ClickHouse/pull/3348) -* Added specializations for `UUID` in `if`. [#3366](https://github.com/ClickHouse/ClickHouse/pull/3366) -* Reduced the number of `open` and `close` system calls when reading from a `MergeTree table`. [#3283](https://github.com/ClickHouse/ClickHouse/pull/3283) -* A `TRUNCATE TABLE` query can be executed on any replica (the query is passed to the leader replica). [Kirill Shvakov](https://github.com/ClickHouse/ClickHouse/pull/3375) - -#### Bug fixes: - -* Fixed an issue with `Dictionary` tables for `range_hashed` dictionaries. This error occurred in version 18.12.17. [#1702](https://github.com/ClickHouse/ClickHouse/pull/1702) -* Fixed an error when loading `range_hashed` dictionaries (the message `Unsupported type Nullable (...)`). This error occurred in version 18.12.17. [#3362](https://github.com/ClickHouse/ClickHouse/pull/3362) -* Fixed errors in the `pointInPolygon` function due to the accumulation of inaccurate calculations for polygons with a large number of vertices located close to each other. [#3331](https://github.com/ClickHouse/ClickHouse/pull/3331) [#3341](https://github.com/ClickHouse/ClickHouse/pull/3341) -* If after merging data parts, the checksum for the resulting part differs from the result of the same merge in another replica, the result of the merge is deleted and the data part is downloaded from the other replica (this is the correct behavior). But after downloading the data part, it couldn't be added to the working set because of an error that the part already exists (because the data part was deleted with some delay after the merge). This led to cyclical attempts to download the same data. [#3194](https://github.com/ClickHouse/ClickHouse/pull/3194) -* Fixed incorrect calculation of total memory consumption by queries (because of incorrect calculation, the `max_memory_usage_for_all_queries` setting worked incorrectly and the `MemoryTracking` metric had an incorrect value). This error occurred in version 18.12.13. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3344) -* Fixed the functionality of `CREATE TABLE ... ON CLUSTER ... AS SELECT ...` This error occurred in version 18.12.13. [#3247](https://github.com/ClickHouse/ClickHouse/pull/3247) -* Fixed unnecessary preparation of data structures for `JOIN`s on the server that initiates the query if the `JOIN` is only performed on remote servers. [#3340](https://github.com/ClickHouse/ClickHouse/pull/3340) -* Fixed bugs in the `Kafka` engine: deadlocks after exceptions when starting to read data, and locks upon completion [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3215). -* For `Kafka` tables, the optional `schema` parameter was not passed (the schema of the `Cap'n'Proto` format). [Vojtech Splichal](https://github.com/ClickHouse/ClickHouse/pull/3150) -* If the ensemble of ZooKeeper servers has servers that accept the connection but then immediately close it instead of responding to the handshake, ClickHouse chooses to connect another server. Previously, this produced the error `Cannot read all data. Bytes read: 0. Bytes expected: 4.` and the server couldn't start. [8218cf3a](https://github.com/ClickHouse/ClickHouse/commit/8218cf3a5f39a43401953769d6d12a0bb8d29da9) -* If the ensemble of ZooKeeper servers contains servers for which the DNS query returns an error, these servers are ignored. [17b8e209](https://github.com/ClickHouse/ClickHouse/commit/17b8e209221061325ad7ba0539f03c6e65f87f29) -* Fixed type conversion between `Date` and `DateTime` when inserting data in the `VALUES` format (if `input_format_values_interpret_expressions = 1`). Previously, the conversion was performed between the numerical value of the number of days in Unix Epoch time and the Unix timestamp, which led to unexpected results. [#3229](https://github.com/ClickHouse/ClickHouse/pull/3229) -* Corrected type conversion between `Decimal` and integer numbers. [#3211](https://github.com/ClickHouse/ClickHouse/pull/3211) -* Fixed errors in the `enable_optimize_predicate_expression` setting. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3231) -* Fixed a parsing error in CSV format with floating-point numbers if a non-default CSV separator is used, such as `;` [#3155](https://github.com/ClickHouse/ClickHouse/pull/3155) -* Fixed the `arrayCumSumNonNegative` function (it does not accumulate negative values if the accumulator is less than zero). [Aleksey Studnev](https://github.com/ClickHouse/ClickHouse/pull/3163) -* Fixed how `Merge` tables work on top of `Distributed` tables when using `PREWHERE`. [#3165](https://github.com/ClickHouse/ClickHouse/pull/3165) -* Bug fixes in the `ALTER UPDATE` query. -* Fixed bugs in the `odbc` table function that appeared in version 18.12. [#3197](https://github.com/ClickHouse/ClickHouse/pull/3197) -* Fixed the operation of aggregate functions with `StateArray` combinators. [#3188](https://github.com/ClickHouse/ClickHouse/pull/3188) -* Fixed a crash when dividing a `Decimal` value by zero. [69dd6609](https://github.com/ClickHouse/ClickHouse/commit/69dd6609193beb4e7acd3e6ad216eca0ccfb8179) -* Fixed output of types for operations using `Decimal` and integer arguments. [#3224](https://github.com/ClickHouse/ClickHouse/pull/3224) -* Fixed the segfault during `GROUP BY` on `Decimal128`. [3359ba06](https://github.com/ClickHouse/ClickHouse/commit/3359ba06c39fcd05bfdb87d6c64154819621e13a) -* The `log_query_threads` setting (logging information about each thread of query execution) now takes effect only if the `log_queries` option (logging information about queries) is set to 1. Since the `log_query_threads` option is enabled by default, information about threads was previously logged even if query logging was disabled. [#3241](https://github.com/ClickHouse/ClickHouse/pull/3241) -* Fixed an error in the distributed operation of the quantiles aggregate function (the error message `Not found column quantile...`). [292a8855](https://github.com/ClickHouse/ClickHouse/commit/292a885533b8e3b41ce8993867069d14cbd5a664) -* Fixed the compatibility problem when working on a cluster of version 18.12.17 servers and older servers at the same time. For distributed queries with GROUP BY keys of both fixed and non-fixed length, if there was a large amount of data to aggregate, the returned data was not always fully aggregated (two different rows contained the same aggregation keys). [#3254](https://github.com/ClickHouse/ClickHouse/pull/3254) -* Fixed handling of substitutions in `clickhouse-performance-test`, if the query contains only part of the substitutions declared in the test. [#3263](https://github.com/ClickHouse/ClickHouse/pull/3263) -* Fixed an error when using `FINAL` with `PREWHERE`. [#3298](https://github.com/ClickHouse/ClickHouse/pull/3298) -* Fixed an error when using `PREWHERE` over columns that were added during `ALTER`. [#3298](https://github.com/ClickHouse/ClickHouse/pull/3298) -* Added a check for the absence of `arrayJoin` for `DEFAULT` and `MATERIALIZED` expressions. Previously, `arrayJoin` led to an error when inserting data. [#3337](https://github.com/ClickHouse/ClickHouse/pull/3337) -* Added a check for the absence of `arrayJoin` in a `PREWHERE` clause. Previously, this led to messages like `Size ... doesn't match` or `Unknown compression method` when executing queries. [#3357](https://github.com/ClickHouse/ClickHouse/pull/3357) -* Fixed segfault that could occur in rare cases after optimization that replaced AND chains from equality evaluations with the corresponding IN expression. [liuyimin-bytedance](https://github.com/ClickHouse/ClickHouse/pull/3339) -* Minor corrections to `clickhouse-benchmark`: previously, client information was not sent to the server; now the number of queries executed is calculated more accurately when shutting down and for limiting the number of iterations. [#3351](https://github.com/ClickHouse/ClickHouse/pull/3351) [#3352](https://github.com/ClickHouse/ClickHouse/pull/3352) - -#### Backward incompatible changes: - -* Removed the `allow_experimental_decimal_type` option. The `Decimal` data type is available for default use. [#3329](https://github.com/ClickHouse/ClickHouse/pull/3329) - -## ClickHouse release 18.12 - -### ClickHouse release 18.12.17, 2018-09-16 - -#### New features: - -* `invalidate_query` (the ability to specify a query to check whether an external dictionary needs to be updated) is implemented for the `clickhouse` source. [#3126](https://github.com/ClickHouse/ClickHouse/pull/3126) -* Added the ability to use `UInt*`, `Int*`, and `DateTime` data types (along with the `Date` type) as a `range_hashed` external dictionary key that defines the boundaries of ranges. Now `NULL` can be used to designate an open range. [Vasily Nemkov](https://github.com/ClickHouse/ClickHouse/pull/3123) -* The `Decimal` type now supports `var*` and `stddev*` aggregate functions. [#3129](https://github.com/ClickHouse/ClickHouse/pull/3129) -* The `Decimal` type now supports mathematical functions (`exp`, `sin` and so on.) [#3129](https://github.com/ClickHouse/ClickHouse/pull/3129) -* The `system.part_log` table now has the `partition_id` column. [#3089](https://github.com/ClickHouse/ClickHouse/pull/3089) - -#### Bug fixes: - -* `Merge` now works correctly on `Distributed` tables. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3159) -* Fixed incompatibility (unnecessary dependency on the `glibc` version) that made it impossible to run ClickHouse on `Ubuntu Precise` and older versions. The incompatibility arose in version 18.12.13. [#3130](https://github.com/ClickHouse/ClickHouse/pull/3130) -* Fixed errors in the `enable_optimize_predicate_expression` setting. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3107) -* Fixed a minor issue with backwards compatibility that appeared when working with a cluster of replicas on versions earlier than 18.12.13 and simultaneously creating a new replica of a table on a server with a newer version (shown in the message `Can not clone replica, because the ... updated to new ClickHouse version`, which is logical, but shouldn't happen). [#3122](https://github.com/ClickHouse/ClickHouse/pull/3122) - -#### Backward incompatible changes: - -* The `enable_optimize_predicate_expression` option is enabled by default (which is rather optimistic). If query analysis errors occur that are related to searching for the column names, set `enable_optimize_predicate_expression` to 0. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3107) - -### ClickHouse release 18.12.14, 2018-09-13 - -#### New features: - -* Added support for `ALTER UPDATE` queries. [#3035](https://github.com/ClickHouse/ClickHouse/pull/3035) -* Added the `allow_ddl` option, which restricts the user's access to DDL queries. [#3104](https://github.com/ClickHouse/ClickHouse/pull/3104) -* Added the `min_merge_bytes_to_use_direct_io` option for `MergeTree` engines, which allows you to set a threshold for the total size of the merge (when above the threshold, data part files will be handled using O_DIRECT). [#3117](https://github.com/ClickHouse/ClickHouse/pull/3117) -* The `system.merges` system table now contains the `partition_id` column. [#3099](https://github.com/ClickHouse/ClickHouse/pull/3099) - -#### Improvements - -* If a data part remains unchanged during mutation, it isn't downloaded by replicas. [#3103](https://github.com/ClickHouse/ClickHouse/pull/3103) -* Autocomplete is available for names of settings when working with `clickhouse-client`. [#3106](https://github.com/ClickHouse/ClickHouse/pull/3106) - -#### Bug fixes: - -* Added a check for the sizes of arrays that are elements of `Nested` type fields when inserting. [#3118](https://github.com/ClickHouse/ClickHouse/pull/3118) -* Fixed an error updating external dictionaries with the `ODBC` source and `hashed` storage. This error occurred in version 18.12.13. -* Fixed a crash when creating a temporary table from a query with an `IN` condition. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3098) -* Fixed an error in aggregate functions for arrays that can have `NULL` elements. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3097) - - -### ClickHouse release 18.12.13, 2018-09-10 - -#### New features: - -* Added the `DECIMAL(digits, scale)` data type (`Decimal32(scale)`, `Decimal64(scale)`, `Decimal128(scale)`). To enable it, use the setting `allow_experimental_decimal_type`. [#2846](https://github.com/ClickHouse/ClickHouse/pull/2846) [#2970](https://github.com/ClickHouse/ClickHouse/pull/2970) [#3008](https://github.com/ClickHouse/ClickHouse/pull/3008) [#3047](https://github.com/ClickHouse/ClickHouse/pull/3047) -* New `WITH ROLLUP` modifier for `GROUP BY` (alternative syntax: `GROUP BY ROLLUP(...)`). [#2948](https://github.com/ClickHouse/ClickHouse/pull/2948) -* In queries with JOIN, the star character expands to a list of columns in all tables, in compliance with the SQL standard. You can restore the old behavior by setting `asterisk_left_columns_only` to 1 on the user configuration level. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2787) -* Added support for JOIN with table functions. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2907) -* Autocomplete by pressing Tab in clickhouse-client. [Sergey Shcherbin](https://github.com/ClickHouse/ClickHouse/pull/2447) -* Ctrl+C in clickhouse-client clears a query that was entered. [#2877](https://github.com/ClickHouse/ClickHouse/pull/2877) -* Added the `join_default_strictness` setting (values: `"`, `'any'`, `'all'`). This allows you to not specify `ANY` or `ALL` for `JOIN`. [#2982](https://github.com/ClickHouse/ClickHouse/pull/2982) -* Each line of the server log related to query processing shows the query ID. [#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) -* Now you can get query execution logs in clickhouse-client (use the `send_logs_level` setting). With distributed query processing, logs are cascaded from all the servers. [#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) -* The `system.query_log` and `system.processes` (`SHOW PROCESSLIST`) tables now have information about all changed settings when you run a query (the nested structure of the `Settings` data). Added the `log_query_settings` setting. [#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) -* The `system.query_log` and `system.processes` tables now show information about the number of threads that are participating in query execution (see the `thread_numbers` column). [#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) -* Added `ProfileEvents` counters that measure the time spent on reading and writing over the network and reading and writing to disk, the number of network errors, and the time spent waiting when network bandwidth is limited. [#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) -* Added `ProfileEvents`counters that contain the system metrics from rusage (you can use them to get information about CPU usage in userspace and the kernel, page faults, and context switches), as well as taskstats metrics (use these to obtain information about I/O wait time, CPU wait time, and the amount of data read and recorded, both with and without page cache). [#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) -* The `ProfileEvents` counters are applied globally and for each query, as well as for each query execution thread, which allows you to profile resource consumption by query in detail. [#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) -* Added the `system.query_thread_log` table, which contains information about each query execution thread. Added the `log_query_threads` setting. [#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) -* The `system.metrics` and `system.events` tables now have built-in documentation. [#3016](https://github.com/ClickHouse/ClickHouse/pull/3016) -* Added the `arrayEnumerateDense` function. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2975) -* Added the `arrayCumSumNonNegative` and `arrayDifference` functions. [Aleksey Studnev](https://github.com/ClickHouse/ClickHouse/pull/2942) -* Added the `retention` aggregate function. [Sundy Li](https://github.com/ClickHouse/ClickHouse/pull/2887) -* Now you can add (merge) states of aggregate functions by using the plus operator, and multiply the states of aggregate functions by a nonnegative constant. [#3062](https://github.com/ClickHouse/ClickHouse/pull/3062) [#3034](https://github.com/ClickHouse/ClickHouse/pull/3034) -* Tables in the MergeTree family now have the virtual column `_partition_id`. [#3089](https://github.com/ClickHouse/ClickHouse/pull/3089) - -#### Experimental features: - -* Added the `LowCardinality(T)` data type. This data type automatically creates a local dictionary of values and allows data processing without unpacking the dictionary. [#2830](https://github.com/ClickHouse/ClickHouse/pull/2830) -* Added a cache of JIT-compiled functions and a counter for the number of uses before compiling. To JIT compile expressions, enable the `compile_expressions` setting. [#2990](https://github.com/ClickHouse/ClickHouse/pull/2990) [#3077](https://github.com/ClickHouse/ClickHouse/pull/3077) - -#### Improvements: - -* Fixed the problem with unlimited accumulation of the replication log when there are abandoned replicas. Added an effective recovery mode for replicas with a long lag. -* Improved performance of `GROUP BY` with multiple aggregation fields when one of them is string and the others are fixed length. -* Improved performance when using `PREWHERE` and with implicit transfer of expressions in `PREWHERE`. -* Improved parsing performance for text formats (`CSV`, `TSV`). [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2977) [#2980](https://github.com/ClickHouse/ClickHouse/pull/2980) -* Improved performance of reading strings and arrays in binary formats. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2955) -* Increased performance and reduced memory consumption for queries to `system.tables` and `system.columns` when there is a very large number of tables on a single server. [#2953](https://github.com/ClickHouse/ClickHouse/pull/2953) -* Fixed a performance problem in the case of a large stream of queries that result in an error (the ` _dl_addr` function is visible in `perf top`, but the server isn't using much CPU). [#2938](https://github.com/ClickHouse/ClickHouse/pull/2938) -* Conditions are cast into the View (when `enable_optimize_predicate_expression` is enabled). [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2907) -* Improvements to the functionality for the `UUID` data type. [#3074](https://github.com/ClickHouse/ClickHouse/pull/3074) [#2985](https://github.com/ClickHouse/ClickHouse/pull/2985) -* The `UUID` data type is supported in The-Alchemist dictionaries. [#2822](https://github.com/ClickHouse/ClickHouse/pull/2822) -* The `visitParamExtractRaw` function works correctly with nested structures. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2974) -* When the `input_format_skip_unknown_fields` setting is enabled, object fields in `JSONEachRow` format are skipped correctly. [BlahGeek](https://github.com/ClickHouse/ClickHouse/pull/2958) -* For a `CASE` expression with conditions, you can now omit `ELSE`, which is equivalent to `ELSE NULL`. [#2920](https://github.com/ClickHouse/ClickHouse/pull/2920) -* The operation timeout can now be configured when working with ZooKeeper. [urykhy](https://github.com/ClickHouse/ClickHouse/pull/2971) -* You can specify an offset for `LIMIT n, m` as `LIMIT n OFFSET m`. [#2840](https://github.com/ClickHouse/ClickHouse/pull/2840) -* You can use the `SELECT TOP n` syntax as an alternative for `LIMIT`. [#2840](https://github.com/ClickHouse/ClickHouse/pull/2840) -* Increased the size of the queue to write to system tables, so the `SystemLog parameter queue is full` error doesn't happen as often. -* The `windowFunnel` aggregate function now supports events that meet multiple conditions. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2801) -* Duplicate columns can be used in a `USING` clause for `JOIN`. [#3006](https://github.com/ClickHouse/ClickHouse/pull/3006) -* `Pretty` formats now have a limit on column alignment by width. Use the `output_format_pretty_max_column_pad_width` setting. If a value is wider, it will still be displayed in its entirety, but the other cells in the table will not be too wide. [#3003](https://github.com/ClickHouse/ClickHouse/pull/3003) -* The `odbc` table function now allows you to specify the database/schema name. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2885) -* Added the ability to use a username specified in the `clickhouse-client` config file. [Vladimir Kozbin](https://github.com/ClickHouse/ClickHouse/pull/2909) -* The `ZooKeeperExceptions` counter has been split into three counters: `ZooKeeperUserExceptions`, `ZooKeeperHardwareExceptions`, and `ZooKeeperOtherExceptions`. -* `ALTER DELETE` queries work for materialized views. -* Added randomization when running the cleanup thread periodically for `ReplicatedMergeTree` tables in order to avoid periodic load spikes when there are a very large number of `ReplicatedMergeTree` tables. -* Support for `ATTACH TABLE ... ON CLUSTER` queries. [#3025](https://github.com/ClickHouse/ClickHouse/pull/3025) - -#### Bug fixes: - -* Fixed an issue with `Dictionary` tables (throws the `Size of offsets doesn't match size of column` or `Unknown compression method` exception). This bug appeared in version 18.10.3. [#2913](https://github.com/ClickHouse/ClickHouse/issues/2913) -* Fixed a bug when merging `CollapsingMergeTree` tables if one of the data parts is empty (these parts are formed during merge or `ALTER DELETE` if all data was deleted), and the `vertical` algorithm was used for the merge. [#3049](https://github.com/ClickHouse/ClickHouse/pull/3049) -* Fixed a race condition during `DROP` or `TRUNCATE` for `Memory` tables with a simultaneous `SELECT`, which could lead to server crashes. This bug appeared in version 1.1.54388. [#3038](https://github.com/ClickHouse/ClickHouse/pull/3038) -* Fixed the possibility of data loss when inserting in `Replicated` tables if the `Session is expired` error is returned (data loss can be detected by the `ReplicatedDataLoss` metric). This error occurred in version 1.1.54378. [#2939](https://github.com/ClickHouse/ClickHouse/pull/2939) [#2949](https://github.com/ClickHouse/ClickHouse/pull/2949) [#2964](https://github.com/ClickHouse/ClickHouse/pull/2964) -* Fixed a segfault during `JOIN ... ON`. [#3000](https://github.com/ClickHouse/ClickHouse/pull/3000) -* Fixed the error searching column names when the `WHERE` expression consists entirely of a qualified column name, such as `WHERE table.column`. [#2994](https://github.com/ClickHouse/ClickHouse/pull/2994) -* Fixed the "Not found column" error that occurred when executing distributed queries if a single column consisting of an IN expression with a subquery is requested from a remote server. [#3087](https://github.com/ClickHouse/ClickHouse/pull/3087) -* Fixed the `Block structure mismatch in UNION stream: different number of columns` error that occurred for distributed queries if one of the shards is local and the other is not, and optimization of the move to `PREWHERE` is triggered. [#2226](https://github.com/ClickHouse/ClickHouse/pull/2226) [#3037](https://github.com/ClickHouse/ClickHouse/pull/3037) [#3055](https://github.com/ClickHouse/ClickHouse/pull/3055) [#3065](https://github.com/ClickHouse/ClickHouse/pull/3065) [#3073](https://github.com/ClickHouse/ClickHouse/pull/3073) [#3090](https://github.com/ClickHouse/ClickHouse/pull/3090) [#3093](https://github.com/ClickHouse/ClickHouse/pull/3093) -* Fixed the `pointInPolygon` function for certain cases of non-convex polygons. [#2910](https://github.com/ClickHouse/ClickHouse/pull/2910) -* Fixed the incorrect result when comparing `nan` with integers. [#3024](https://github.com/ClickHouse/ClickHouse/pull/3024) -* Fixed an error in the `zlib-ng` library that could lead to segfault in rare cases. [#2854](https://github.com/ClickHouse/ClickHouse/pull/2854) -* Fixed a memory leak when inserting into a table with `AggregateFunction` columns, if the state of the aggregate function is not simple (allocates memory separately), and if a single insertion request results in multiple small blocks. [#3084](https://github.com/ClickHouse/ClickHouse/pull/3084) -* Fixed a race condition when creating and deleting the same `Buffer` or `MergeTree` table simultaneously. -* Fixed the possibility of a segfault when comparing tuples made up of certain non-trivial types, such as tuples. [#2989](https://github.com/ClickHouse/ClickHouse/pull/2989) -* Fixed the possibility of a segfault when running certain `ON CLUSTER` queries. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2960) -* Fixed an error in the `arrayDistinct` function for `Nullable` array elements. [#2845](https://github.com/ClickHouse/ClickHouse/pull/2845) [#2937](https://github.com/ClickHouse/ClickHouse/pull/2937) -* The `enable_optimize_predicate_expression` option now correctly supports cases with `SELECT *`. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2929) -* Fixed the segfault when re-initializing the ZooKeeper session. [#2917](https://github.com/ClickHouse/ClickHouse/pull/2917) -* Fixed potential blocking when working with ZooKeeper. -* Fixed incorrect code for adding nested data structures in a `SummingMergeTree`. -* When allocating memory for states of aggregate functions, alignment is correctly taken into account, which makes it possible to use operations that require alignment when implementing states of aggregate functions. [chenxing-xc](https://github.com/ClickHouse/ClickHouse/pull/2808) - -#### Security fix: - -* Safe use of ODBC data sources. Interaction with ODBC drivers uses a separate `clickhouse-odbc-bridge` process. Errors in third-party ODBC drivers no longer cause problems with server stability or vulnerabilities. [#2828](https://github.com/ClickHouse/ClickHouse/pull/2828) [#2879](https://github.com/ClickHouse/ClickHouse/pull/2879) [#2886](https://github.com/ClickHouse/ClickHouse/pull/2886) [#2893](https://github.com/ClickHouse/ClickHouse/pull/2893) [#2921](https://github.com/ClickHouse/ClickHouse/pull/2921) -* Fixed incorrect validation of the file path in the `catBoostPool` table function. [#2894](https://github.com/ClickHouse/ClickHouse/pull/2894) -* The contents of system tables (`tables`, `databases`, `parts`, `columns`, `parts_columns`, `merges`, `mutations`, `replicas`, and `replication_queue`) are filtered according to the user's configured access to databases (`allow_databases`). [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2856) - -#### Backward incompatible changes: - -* In queries with JOIN, the star character expands to a list of columns in all tables, in compliance with the SQL standard. You can restore the old behavior by setting `asterisk_left_columns_only` to 1 on the user configuration level. - -#### Build changes: - -* Most integration tests can now be run by commit. -* Code style checks can also be run by commit. -* The `memcpy` implementation is chosen correctly when building on CentOS7/Fedora. [Etienne Champetier](https://github.com/ClickHouse/ClickHouse/pull/2912) -* When using clang to build, some warnings from `-Weverything` have been added, in addition to the regular `-Wall-Wextra -Werror`. [#2957](https://github.com/ClickHouse/ClickHouse/pull/2957) -* Debugging the build uses the `jemalloc` debug option. -* The interface of the library for interacting with ZooKeeper is declared abstract. [#2950](https://github.com/ClickHouse/ClickHouse/pull/2950) - -## ClickHouse release 18.10 - -### ClickHouse release 18.10.3, 2018-08-13 - -#### New features: - -* HTTPS can be used for replication. [#2760](https://github.com/ClickHouse/ClickHouse/pull/2760) -* Added the functions `murmurHash2_64`, `murmurHash3_32`, `murmurHash3_64`, and `murmurHash3_128` in addition to the existing `murmurHash2_32`. [#2791](https://github.com/ClickHouse/ClickHouse/pull/2791) -* Support for Nullable types in the ClickHouse ODBC driver (`ODBCDriver2` output format). [#2834](https://github.com/ClickHouse/ClickHouse/pull/2834) -* Support for `UUID` in the key columns. - -#### Improvements: - -* Clusters can be removed without restarting the server when they are deleted from the config files. [#2777](https://github.com/ClickHouse/ClickHouse/pull/2777) -* External dictionaries can be removed without restarting the server when they are removed from config files. [#2779](https://github.com/ClickHouse/ClickHouse/pull/2779) -* Added `SETTINGS` support for the `Kafka` table engine. [Alexander Marshalov](https://github.com/ClickHouse/ClickHouse/pull/2781) -* Improvements for the `UUID` data type (not yet complete). [#2618](https://github.com/ClickHouse/ClickHouse/pull/2618) -* Support for empty parts after merges in the `SummingMergeTree`, `CollapsingMergeTree` and `VersionedCollapsingMergeTree` engines. [#2815](https://github.com/ClickHouse/ClickHouse/pull/2815) -* Old records of completed mutations are deleted (`ALTER DELETE`). [#2784](https://github.com/ClickHouse/ClickHouse/pull/2784) -* Added the `system.merge_tree_settings` table. [Kirill Shvakov](https://github.com/ClickHouse/ClickHouse/pull/2841) -* The `system.tables` table now has dependency columns: `dependencies_database` and `dependencies_table`. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2851) -* Added the `max_partition_size_to_drop` config option. [#2782](https://github.com/ClickHouse/ClickHouse/pull/2782) -* Added the `output_format_json_escape_forward_slashes` option. [Alexander Bocharov](https://github.com/ClickHouse/ClickHouse/pull/2812) -* Added the `max_fetch_partition_retries_count` setting. [#2831](https://github.com/ClickHouse/ClickHouse/pull/2831) -* Added the `prefer_localhost_replica` setting for disabling the preference for a local replica and going to a local replica without inter-process interaction. [#2832](https://github.com/ClickHouse/ClickHouse/pull/2832) -* The `quantileExact` aggregate function returns `nan` in the case of aggregation on an empty `Float32` or `Float64` set. [Sundy Li](https://github.com/ClickHouse/ClickHouse/pull/2855) - -#### Bug fixes: - -* Removed unnecessary escaping of the connection string parameters for ODBC, which made it impossible to establish a connection. This error occurred in version 18.6.0. -* Fixed the logic for processing `REPLACE PARTITION` commands in the replication queue. If there are two `REPLACE` commands for the same partition, the incorrect logic could cause one of them to remain in the replication queue and not be executed. [#2814](https://github.com/ClickHouse/ClickHouse/pull/2814) -* Fixed a merge bug when all data parts were empty (parts that were formed from a merge or from `ALTER DELETE` if all data was deleted). This bug appeared in version 18.1.0. [#2930](https://github.com/ClickHouse/ClickHouse/pull/2930) -* Fixed an error for concurrent `Set` or `Join`. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2823) -* Fixed the `Block structure mismatch in UNION stream: different number of columns` error that occurred for `UNION ALL` queries inside a sub-query if one of the `SELECT` queries contains duplicate column names. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2094) -* Fixed a memory leak if an exception occurred when connecting to a MySQL server. -* Fixed incorrect clickhouse-client response code in case of a query error. -* Fixed incorrect behavior of materialized views containing DISTINCT. [#2795](https://github.com/ClickHouse/ClickHouse/issues/2795) - -#### Backward incompatible changes - -* Removed support for CHECK TABLE queries for Distributed tables. - -#### Build changes: - -* The allocator has been replaced: `jemalloc` is now used instead of `tcmalloc`. In some scenarios, this increases speed up to 20%. However, there are queries that have slowed by up to 20%. Memory consumption has been reduced by approximately 10% in some scenarios, with improved stability. With highly competitive loads, CPU usage in userspace and in system shows just a slight increase. [#2773](https://github.com/ClickHouse/ClickHouse/pull/2773) -* Use of libressl from a submodule. [#1983](https://github.com/ClickHouse/ClickHouse/pull/1983) [#2807](https://github.com/ClickHouse/ClickHouse/pull/2807) -* Use of unixodbc from a submodule. [#2789](https://github.com/ClickHouse/ClickHouse/pull/2789) -* Use of mariadb-connector-c from a submodule. [#2785](https://github.com/ClickHouse/ClickHouse/pull/2785) -* Added functional test files to the repository that depend on the availability of test data (for the time being, without the test data itself). - -## ClickHouse release 18.6 - -### ClickHouse release 18.6.0, 2018-08-02 - -#### New features: - -* Added support for ON expressions for the JOIN ON syntax: -`JOIN ON Expr([table.]column ...) = Expr([table.]column, ...) [AND Expr([table.]column, ...) = Expr([table.]column, ...) ...]` -The expression must be a chain of equalities joined by the AND operator. Each side of the equality can be an arbitrary expression over the columns of one of the tables. The use of fully qualified column names is supported (`table.name`, `database.table.name`, `table_alias.name`, `subquery_alias.name`) for the right table. [#2742](https://github.com/ClickHouse/ClickHouse/pull/2742) -* HTTPS can be enabled for replication. [#2760](https://github.com/ClickHouse/ClickHouse/pull/2760) - -#### Improvements: - -* The server passes the patch component of its version to the client. Data about the patch version component is in `system.processes` and `query_log`. [#2646](https://github.com/ClickHouse/ClickHouse/pull/2646) - -## ClickHouse release 18.5 - -### ClickHouse release 18.5.1, 2018-07-31 - -#### New features: - -* Added the hash function `murmurHash2_32` [#2756](https://github.com/ClickHouse/ClickHouse/pull/2756). - -#### Improvements: - -* Now you can use the `from_env` [#2741](https://github.com/ClickHouse/ClickHouse/pull/2741) attribute to set values in config files from environment variables. -* Added case-insensitive versions of the `coalesce`, `ifNull`, and `nullIf functions` [#2752](https://github.com/ClickHouse/ClickHouse/pull/2752). - -#### Bug fixes: - -* Fixed a possible bug when starting a replica [#2759](https://github.com/ClickHouse/ClickHouse/pull/2759). - -## ClickHouse release 18.4 - -### ClickHouse release 18.4.0, 2018-07-28 - -#### New features: - -* Added system tables: `formats`, `data_type_families`, `aggregate_function_combinators`, `table_functions`, `table_engines`, `collations` [#2721](https://github.com/ClickHouse/ClickHouse/pull/2721). -* Added the ability to use a table function instead of a table as an argument of a `remote` or `cluster table function` [#2708](https://github.com/ClickHouse/ClickHouse/pull/2708). -* Support for `HTTP Basic` authentication in the replication protocol [#2727](https://github.com/ClickHouse/ClickHouse/pull/2727). -* The `has` function now allows searching for a numeric value in an array of `Enum` values [Maxim Khrisanfov](https://github.com/ClickHouse/ClickHouse/pull/2699). -* Support for adding arbitrary message separators when reading from `Kafka` [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2701). - -#### Improvements: - -* The `ALTER TABLE t DELETE WHERE` query does not rewrite data parts that were not affected by the WHERE condition [#2694](https://github.com/ClickHouse/ClickHouse/pull/2694). -* The `use_minimalistic_checksums_in_zookeeper` option for `ReplicatedMergeTree` tables is enabled by default. This setting was added in version 1.1.54378, 2018-04-16. Versions that are older than 1.1.54378 can no longer be installed. -* Support for running `KILL` and `OPTIMIZE` queries that specify `ON CLUSTER` [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2689). - -#### Bug fixes: - -* Fixed the error `Column ... is not under an aggregate function and not in GROUP BY` for aggregation with an IN expression. This bug appeared in version 18.1.0. ([bbdd780b](https://github.com/ClickHouse/ClickHouse/commit/bbdd780be0be06a0f336775941cdd536878dd2c2)) -* Fixed a bug in the `windowFunnel aggregate function` [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2735). -* Fixed a bug in the `anyHeavy` aggregate function ([a2101df2](https://github.com/ClickHouse/ClickHouse/commit/a2101df25a6a0fba99aa71f8793d762af2b801ee)) -* Fixed server crash when using the `countArray()` aggregate function. - -#### Backward incompatible changes: - -* Parameters for `Kafka` engine was changed from `Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format[, kafka_schema, kafka_num_consumers])` to `Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format[, kafka_row_delimiter, kafka_schema, kafka_num_consumers])`. If your tables use `kafka_schema` or `kafka_num_consumers` parameters, you have to manually edit the metadata files `path/metadata/database/table.sql` and add `kafka_row_delimiter` parameter with `''` value. - -## ClickHouse release 18.1 - -### ClickHouse release 18.1.0, 2018-07-23 - -#### New features: - -* Support for the `ALTER TABLE t DELETE WHERE` query for non-replicated MergeTree tables ([#2634](https://github.com/ClickHouse/ClickHouse/pull/2634)). -* Support for arbitrary types for the `uniq*` family of aggregate functions ([#2010](https://github.com/ClickHouse/ClickHouse/issues/2010)). -* Support for arbitrary types in comparison operators ([#2026](https://github.com/ClickHouse/ClickHouse/issues/2026)). -* The `users.xml` file allows setting a subnet mask in the format `10.0.0.1/255.255.255.0`. This is necessary for using masks for IPv6 networks with zeros in the middle ([#2637](https://github.com/ClickHouse/ClickHouse/pull/2637)). -* Added the `arrayDistinct` function ([#2670](https://github.com/ClickHouse/ClickHouse/pull/2670)). -* The SummingMergeTree engine can now work with AggregateFunction type columns ([Constantin S. Pan](https://github.com/ClickHouse/ClickHouse/pull/2566)). - -#### Improvements: - -* Changed the numbering scheme for release versions. Now the first part contains the year of release (A.D., Moscow timezone, minus 2000), the second part contains the number for major changes (increases for most releases), and the third part is the patch version. Releases are still backward compatible, unless otherwise stated in the changelog. -* Faster conversions of floating-point numbers to a string ([Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2664)). -* If some rows were skipped during an insert due to parsing errors (this is possible with the `input_allow_errors_num` and `input_allow_errors_ratio` settings enabled), the number of skipped rows is now written to the server log ([Leonardo Cecchi](https://github.com/ClickHouse/ClickHouse/pull/2669)). - -#### Bug fixes: - -* Fixed the TRUNCATE command for temporary tables ([Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2624)). -* Fixed a rare deadlock in the ZooKeeper client library that occurred when there was a network error while reading the response ([c315200](https://github.com/ClickHouse/ClickHouse/commit/c315200e64b87e44bdf740707fc857d1fdf7e947)). -* Fixed an error during a CAST to Nullable types ([#1322](https://github.com/ClickHouse/ClickHouse/issues/1322)). -* Fixed the incorrect result of the `maxIntersection()` function when the boundaries of intervals coincided ([Michael Furmur](https://github.com/ClickHouse/ClickHouse/pull/2657)). -* Fixed incorrect transformation of the OR expression chain in a function argument ([chenxing-xc](https://github.com/ClickHouse/ClickHouse/pull/2663)). -* Fixed performance degradation for queries containing `IN (subquery)` expressions inside another subquery ([#2571](https://github.com/ClickHouse/ClickHouse/issues/2571)). -* Fixed incompatibility between servers with different versions in distributed queries that use a `CAST` function that isn't in uppercase letters ([fe8c4d6](https://github.com/ClickHouse/ClickHouse/commit/fe8c4d64e434cacd4ceef34faa9005129f2190a5)). -* Added missing quoting of identifiers for queries to an external DBMS ([#2635](https://github.com/ClickHouse/ClickHouse/issues/2635)). - -#### Backward incompatible changes: - -* Converting a string containing the number zero to DateTime does not work. Example: `SELECT toDateTime('0')`. This is also the reason that `DateTime DEFAULT '0'` does not work in tables, as well as `0` in dictionaries. Solution: replace `0` with `0000-00-00 00:00:00`. - -## ClickHouse release 1.1 - -### ClickHouse release 1.1.54394, 2018-07-12 - -#### New features: - -* Added the `histogram` aggregate function ([Mikhail Surin](https://github.com/ClickHouse/ClickHouse/pull/2521)). -* Now `OPTIMIZE TABLE ... FINAL` can be used without specifying partitions for `ReplicatedMergeTree` ([Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2600)). - -#### Bug fixes: - -* Fixed a problem with a very small timeout for sockets (one second) for reading and writing when sending and downloading replicated data, which made it impossible to download larger parts if there is a load on the network or disk (it resulted in cyclical attempts to download parts). This error occurred in version 1.1.54388. -* Fixed issues when using chroot in ZooKeeper if you inserted duplicate data blocks in the table. -* The `has` function now works correctly for an array with Nullable elements ([#2115](https://github.com/ClickHouse/ClickHouse/issues/2115)). -* The `system.tables` table now works correctly when used in distributed queries. The `metadata_modification_time` and `engine_full` columns are now non-virtual. Fixed an error that occurred if only these columns were queried from the table. -* Fixed how an empty `TinyLog` table works after inserting an empty data block ([#2563](https://github.com/ClickHouse/ClickHouse/issues/2563)). -* The `system.zookeeper` table works if the value of the node in ZooKeeper is NULL. - -### ClickHouse release 1.1.54390, 2018-07-06 - -#### New features: - -* Queries can be sent in `multipart/form-data` format (in the `query` field), which is useful if external data is also sent for query processing ([Olga Hvostikova](https://github.com/ClickHouse/ClickHouse/pull/2490)). -* Added the ability to enable or disable processing single or double quotes when reading data in CSV format. You can configure this in the `format_csv_allow_single_quotes` and `format_csv_allow_double_quotes` settings ([Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2574)). -* Now `OPTIMIZE TABLE ... FINAL` can be used without specifying the partition for non-replicated variants of `MergeTree` ([Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2599)). - -#### Improvements: - -* Improved performance, reduced memory consumption, and correct memory consumption tracking with use of the IN operator when a table index could be used ([#2584](https://github.com/ClickHouse/ClickHouse/pull/2584)). -* Removed redundant checking of checksums when adding a data part. This is important when there are a large number of replicas, because in these cases the total number of checks was equal to N^2. -* Added support for `Array(Tuple(...))` arguments for the `arrayEnumerateUniq` function ([#2573](https://github.com/ClickHouse/ClickHouse/pull/2573)). -* Added `Nullable` support for the `runningDifference` function ([#2594](https://github.com/ClickHouse/ClickHouse/pull/2594)). -* Improved query analysis performance when there is a very large number of expressions ([#2572](https://github.com/ClickHouse/ClickHouse/pull/2572)). -* Faster selection of data parts for merging in `ReplicatedMergeTree` tables. Faster recovery of the ZooKeeper session ([#2597](https://github.com/ClickHouse/ClickHouse/pull/2597)). -* The `format_version.txt` file for `MergeTree` tables is re-created if it is missing, which makes sense if ClickHouse is launched after copying the directory structure without files ([Ciprian Hacman](https://github.com/ClickHouse/ClickHouse/pull/2593)). - -#### Bug fixes: - -* Fixed a bug when working with ZooKeeper that could make it impossible to recover the session and readonly states of tables before restarting the server. -* Fixed a bug when working with ZooKeeper that could result in old nodes not being deleted if the session is interrupted. -* Fixed an error in the `quantileTDigest` function for Float arguments (this bug was introduced in version 1.1.54388) ([Mikhail Surin](https://github.com/ClickHouse/ClickHouse/pull/2553)). -* Fixed a bug in the index for MergeTree tables if the primary key column is located inside the function for converting types between signed and unsigned integers of the same size ([#2603](https://github.com/ClickHouse/ClickHouse/pull/2603)). -* Fixed segfault if `macros` are used but they aren't in the config file ([#2570](https://github.com/ClickHouse/ClickHouse/pull/2570)). -* Fixed switching to the default database when reconnecting the client ([#2583](https://github.com/ClickHouse/ClickHouse/pull/2583)). -* Fixed a bug that occurred when the `use_index_for_in_with_subqueries` setting was disabled. - -#### Security fix: - -* Sending files is no longer possible when connected to MySQL (`LOAD DATA LOCAL INFILE`). - -### ClickHouse release 1.1.54388, 2018-06-28 - -#### New features: - -* Support for the `ALTER TABLE t DELETE WHERE` query for replicated tables. Added the `system.mutations` table to track progress of this type of queries. -* Support for the `ALTER TABLE t [REPLACE|ATTACH] PARTITION` query for \*MergeTree tables. -* Support for the `TRUNCATE TABLE` query ([Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2260)) -* Several new `SYSTEM` queries for replicated tables (`RESTART REPLICAS`, `SYNC REPLICA`, `[STOP|START] [MERGES|FETCHES|SENDS REPLICATED|REPLICATION QUEUES]`). -* Added the ability to write to a table with the MySQL engine and the corresponding table function ([sundy-li](https://github.com/ClickHouse/ClickHouse/pull/2294)). -* Added the `url()` table function and the `URL` table engine ([Alexander Sapin](https://github.com/ClickHouse/ClickHouse/pull/2501)). -* Added the `windowFunnel` aggregate function ([sundy-li](https://github.com/ClickHouse/ClickHouse/pull/2352)). -* New `startsWith` and `endsWith` functions for strings ([Vadim Plakhtinsky](https://github.com/ClickHouse/ClickHouse/pull/2429)). -* The `numbers()` table function now allows you to specify the offset ([Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2535)). -* The password to `clickhouse-client` can be entered interactively. -* Server logs can now be sent to syslog ([Alexander Krasheninnikov](https://github.com/ClickHouse/ClickHouse/pull/2459)). -* Support for logging in dictionaries with a shared library source ([Alexander Sapin](https://github.com/ClickHouse/ClickHouse/pull/2472)). -* Support for custom CSV delimiters ([Ivan Zhukov](https://github.com/ClickHouse/ClickHouse/pull/2263)) -* Added the `date_time_input_format` setting. If you switch this setting to `'best_effort'`, DateTime values will be read in a wide range of formats. -* Added the `clickhouse-obfuscator` utility for data obfuscation. Usage example: publishing data used in performance tests. - -#### Experimental features: - -* Added the ability to calculate `and` arguments only where they are needed ([Anastasia Tsarkova](https://github.com/ClickHouse/ClickHouse/pull/2272)) -* JIT compilation to native code is now available for some expressions ([pyos](https://github.com/ClickHouse/ClickHouse/pull/2277)). - -#### Bug fixes: - -* Duplicates no longer appear for a query with `DISTINCT` and `ORDER BY`. -* Queries with `ARRAY JOIN` and `arrayFilter` no longer return an incorrect result. -* Fixed an error when reading an array column from a Nested structure ([#2066](https://github.com/ClickHouse/ClickHouse/issues/2066)). -* Fixed an error when analyzing queries with a HAVING clause like `HAVING tuple IN (...)`. -* Fixed an error when analyzing queries with recursive aliases. -* Fixed an error when reading from ReplacingMergeTree with a condition in PREWHERE that filters all rows ([#2525](https://github.com/ClickHouse/ClickHouse/issues/2525)). -* User profile settings were not applied when using sessions in the HTTP interface. -* Fixed how settings are applied from the command line parameters in clickhouse-local. -* The ZooKeeper client library now uses the session timeout received from the server. -* Fixed a bug in the ZooKeeper client library when the client waited for the server response longer than the timeout. -* Fixed pruning of parts for queries with conditions on partition key columns ([#2342](https://github.com/ClickHouse/ClickHouse/issues/2342)). -* Merges are now possible after `CLEAR COLUMN IN PARTITION` ([#2315](https://github.com/ClickHouse/ClickHouse/issues/2315)). -* Type mapping in the ODBC table function has been fixed ([sundy-li](https://github.com/ClickHouse/ClickHouse/pull/2268)). -* Type comparisons have been fixed for `DateTime` with and without the time zone ([Alexander Bocharov](https://github.com/ClickHouse/ClickHouse/pull/2400)). -* Fixed syntactic parsing and formatting of the `CAST` operator. -* Fixed insertion into a materialized view for the Distributed table engine ([Babacar Diassé](https://github.com/ClickHouse/ClickHouse/pull/2411)). -* Fixed a race condition when writing data from the `Kafka` engine to materialized views ([Yangkuan Liu](https://github.com/ClickHouse/ClickHouse/pull/2448)). -* Fixed SSRF in the remote() table function. -* Fixed exit behavior of `clickhouse-client` in multiline mode ([#2510](https://github.com/ClickHouse/ClickHouse/issues/2510)). - -#### Improvements: - -* Background tasks in replicated tables are now performed in a thread pool instead of in separate threads ([Silviu Caragea](https://github.com/ClickHouse/ClickHouse/pull/1722)). -* Improved LZ4 compression performance. -* Faster analysis for queries with a large number of JOINs and sub-queries. -* The DNS cache is now updated automatically when there are too many network errors. -* Table inserts no longer occur if the insert into one of the materialized views is not possible because it has too many parts. -* Corrected the discrepancy in the event counters `Query`, `SelectQuery`, and `InsertQuery`. -* Expressions like `tuple IN (SELECT tuple)` are allowed if the tuple types match. -* A server with replicated tables can start even if you haven't configured ZooKeeper. -* When calculating the number of available CPU cores, limits on cgroups are now taken into account ([Atri Sharma](https://github.com/ClickHouse/ClickHouse/pull/2325)). -* Added chown for config directories in the systemd config file ([Mikhail Shiryaev](https://github.com/ClickHouse/ClickHouse/pull/2421)). - -#### Build changes: - -* The gcc8 compiler can be used for builds. -* Added the ability to build llvm from submodule. -* The version of the librdkafka library has been updated to v0.11.4. -* Added the ability to use the system libcpuid library. The library version has been updated to 0.4.0. -* Fixed the build using the vectorclass library ([Babacar Diassé](https://github.com/ClickHouse/ClickHouse/pull/2274)). -* Cmake now generates files for ninja by default (like when using `-G Ninja`). -* Added the ability to use the libtinfo library instead of libtermcap ([Georgy Kondratiev](https://github.com/ClickHouse/ClickHouse/pull/2519)). -* Fixed a header file conflict in Fedora Rawhide ([#2520](https://github.com/ClickHouse/ClickHouse/issues/2520)). - -#### Backward incompatible changes: - -* Removed escaping in `Vertical` and `Pretty*` formats and deleted the `VerticalRaw` format. -* If servers with version 1.1.54388 (or newer) and servers with an older version are used simultaneously in a distributed query and the query has the `cast(x, 'Type')` expression without the `AS` keyword and doesn't have the word `cast` in uppercase, an exception will be thrown with a message like `Not found column cast(0, 'UInt8') in block`. Solution: Update the server on the entire cluster. - -### ClickHouse release 1.1.54385, 2018-06-01 - -#### Bug fixes: - -* Fixed an error that in some cases caused ZooKeeper operations to block. - -### ClickHouse release 1.1.54383, 2018-05-22 - -#### Bug fixes: - -* Fixed a slowdown of replication queue if a table has many replicas. - -### ClickHouse release 1.1.54381, 2018-05-14 - -#### Bug fixes: - -* Fixed a nodes leak in ZooKeeper when ClickHouse loses connection to ZooKeeper server. - -### ClickHouse release 1.1.54380, 2018-04-21 - -#### New features: - -* Added the table function `file(path, format, structure)`. An example reading bytes from `/dev/urandom`: `ln -s /dev/urandom /var/lib/clickhouse/user_files/random``clickhouse-client -q "SELECT * FROM file('random', 'RowBinary', 'd UInt8') LIMIT 10"`. - -#### Improvements: - -* Subqueries can be wrapped in `()` brackets to enhance query readability. For example: `(SELECT 1) UNION ALL (SELECT 1)`. -* Simple `SELECT` queries from the `system.processes` table are not included in the `max_concurrent_queries` limit. - -#### Bug fixes: - -* Fixed incorrect behavior of the `IN` operator when select from `MATERIALIZED VIEW`. -* Fixed incorrect filtering by partition index in expressions like `partition_key_column IN (...)`. -* Fixed inability to execute `OPTIMIZE` query on non-leader replica if `REANAME` was performed on the table. -* Fixed the authorization error when executing `OPTIMIZE` or `ALTER` queries on a non-leader replica. -* Fixed freezing of `KILL QUERY`. -* Fixed an error in ZooKeeper client library which led to loss of watches, freezing of distributed DDL queue, and slowdowns in the replication queue if a non-empty `chroot` prefix is used in the ZooKeeper configuration. - -#### Backward incompatible changes: - -* Removed support for expressions like `(a, b) IN (SELECT (a, b))` (you can use the equivalent expression `(a, b) IN (SELECT a, b)`). In previous releases, these expressions led to undetermined `WHERE` filtering or caused errors. - -### ClickHouse release 1.1.54378, 2018-04-16 - -#### New features: - -* Logging level can be changed without restarting the server. -* Added the `SHOW CREATE DATABASE` query. -* The `query_id` can be passed to `clickhouse-client` (elBroom). -* New setting: `max_network_bandwidth_for_all_users`. -* Added support for `ALTER TABLE ... PARTITION ... ` for `MATERIALIZED VIEW`. -* Added information about the size of data parts in uncompressed form in the system table. -* Server-to-server encryption support for distributed tables (`1` in the replica config in ``). -* Configuration of the table level for the `ReplicatedMergeTree` family in order to minimize the amount of data stored in Zookeeper: : `use_minimalistic_checksums_in_zookeeper = 1` -* Configuration of the `clickhouse-client` prompt. By default, server names are now output to the prompt. The server's display name can be changed. It's also sent in the `X-ClickHouse-Display-Name` HTTP header (Kirill Shvakov). -* Multiple comma-separated `topics` can be specified for the `Kafka` engine (Tobias Adamson) -* When a query is stopped by `KILL QUERY` or `replace_running_query`, the client receives the `Query was canceled` exception instead of an incomplete result. - -#### Improvements: - -* `ALTER TABLE ... DROP/DETACH PARTITION` queries are run at the front of the replication queue. -* `SELECT ... FINAL` and `OPTIMIZE ... FINAL` can be used even when the table has a single data part. -* A `query_log` table is recreated on the fly if it was deleted manually (Kirill Shvakov). -* The `lengthUTF8` function runs faster (zhang2014). -* Improved performance of synchronous inserts in `Distributed` tables (`insert_distributed_sync = 1`) when there is a very large number of shards. -* The server accepts the `send_timeout` and `receive_timeout` settings from the client and applies them when connecting to the client (they are applied in reverse order: the server socket's `send_timeout` is set to the `receive_timeout` value received from the client, and vice versa). -* More robust crash recovery for asynchronous insertion into `Distributed` tables. -* The return type of the `countEqual` function changed from `UInt32` to `UInt64` (谢磊). - -#### Bug fixes: - -* Fixed an error with `IN` when the left side of the expression is `Nullable`. -* Correct results are now returned when using tuples with `IN` when some of the tuple components are in the table index. -* The `max_execution_time` limit now works correctly with distributed queries. -* Fixed errors when calculating the size of composite columns in the `system.columns` table. -* Fixed an error when creating a temporary table `CREATE TEMPORARY TABLE IF NOT EXISTS.` -* Fixed errors in `StorageKafka` (##2075) -* Fixed server crashes from invalid arguments of certain aggregate functions. -* Fixed the error that prevented the `DETACH DATABASE` query from stopping background tasks for `ReplicatedMergeTree` tables. -* `Too many parts` state is less likely to happen when inserting into aggregated materialized views (##2084). -* Corrected recursive handling of substitutions in the config if a substitution must be followed by another substitution on the same level. -* Corrected the syntax in the metadata file when creating a `VIEW` that uses a query with `UNION ALL`. -* `SummingMergeTree` now works correctly for summation of nested data structures with a composite key. -* Fixed the possibility of a race condition when choosing the leader for `ReplicatedMergeTree` tables. - -#### Build changes: - -* The build supports `ninja` instead of `make` and uses `ninja` by default for building releases. -* Renamed packages: `clickhouse-server-base` in `clickhouse-common-static`; `clickhouse-server-common` in `clickhouse-server`; `clickhouse-common-dbg` in `clickhouse-common-static-dbg`. To install, use `clickhouse-server clickhouse-client`. Packages with the old names will still load in the repositories for backward compatibility. - -#### Backward incompatible changes: - -* Removed the special interpretation of an IN expression if an array is specified on the left side. Previously, the expression `arr IN (set)` was interpreted as "at least one `arr` element belongs to the `set`". To get the same behavior in the new version, write `arrayExists(x -> x IN (set), arr)`. -* Disabled the incorrect use of the socket option `SO_REUSEPORT`, which was incorrectly enabled by default in the Poco library. Note that on Linux there is no longer any reason to simultaneously specify the addresses `::` and `0.0.0.0` for listen – use just `::`, which allows listening to the connection both over IPv4 and IPv6 (with the default kernel config settings). You can also revert to the behavior from previous versions by specifying `1` in the config. - -### ClickHouse release 1.1.54370, 2018-03-16 - -#### New features: - -* Added the `system.macros` table and auto updating of macros when the config file is changed. -* Added the `SYSTEM RELOAD CONFIG` query. -* Added the `maxIntersections(left_col, right_col)` aggregate function, which returns the maximum number of simultaneously intersecting intervals `[left; right]`. The `maxIntersectionsPosition(left, right)` function returns the beginning of the "maximum" interval. ([Michael Furmur](https://github.com/ClickHouse/ClickHouse/pull/2012)). - -#### Improvements: - -* When inserting data in a `Replicated` table, fewer requests are made to `ZooKeeper` (and most of the user-level errors have disappeared from the `ZooKeeper` log). -* Added the ability to create aliases for data sets. Example: `WITH (1, 2, 3) AS set SELECT number IN set FROM system.numbers LIMIT 10`. - -#### Bug fixes: - -* Fixed the `Illegal PREWHERE` error when reading from Merge tables for `Distributed`tables. -* Added fixes that allow you to start clickhouse-server in IPv4-only Docker containers. -* Fixed a race condition when reading from system `system.parts_columns tables.` -* Removed double buffering during a synchronous insert to a `Distributed` table, which could have caused the connection to timeout. -* Fixed a bug that caused excessively long waits for an unavailable replica before beginning a `SELECT` query. -* Fixed incorrect dates in the `system.parts` table. -* Fixed a bug that made it impossible to insert data in a `Replicated` table if `chroot` was non-empty in the configuration of the `ZooKeeper` cluster. -* Fixed the vertical merging algorithm for an empty `ORDER BY` table. -* Restored the ability to use dictionaries in queries to remote tables, even if these dictionaries are not present on the requestor server. This functionality was lost in release 1.1.54362. -* Restored the behavior for queries like `SELECT * FROM remote('server2', default.table) WHERE col IN (SELECT col2 FROM default.table)` when the right side of the `IN` should use a remote `default.table` instead of a local one. This behavior was broken in version 1.1.54358. -* Removed extraneous error-level logging of `Not found column ... in block`. - -### Clickhouse Release 1.1.54362, 2018-03-11 - -#### New features: - -* Aggregation without `GROUP BY` for an empty set (such as `SELECT count(*) FROM table WHERE 0`) now returns a result with one row with null values for aggregate functions, in compliance with the SQL standard. To restore the old behavior (return an empty result), set `empty_result_for_aggregation_by_empty_set` to 1. -* Added type conversion for `UNION ALL`. Different alias names are allowed in `SELECT` positions in `UNION ALL`, in compliance with the SQL standard. -* Arbitrary expressions are supported in `LIMIT BY` clauses. Previously, it was only possible to use columns resulting from `SELECT`. -* An index of `MergeTree` tables is used when `IN` is applied to a tuple of expressions from the columns of the primary key. Example: `WHERE (UserID, EventDate) IN ((123, '2000-01-01'), ...)` (Anastasiya Tsarkova). -* Added the `clickhouse-copier` tool for copying between clusters and resharding data (beta). -* Added consistent hashing functions: `yandexConsistentHash`, `jumpConsistentHash`, `sumburConsistentHash`. They can be used as a sharding key in order to reduce the amount of network traffic during subsequent reshardings. -* Added functions: `arrayAny`, `arrayAll`, `hasAny`, `hasAll`, `arrayIntersect`, `arrayResize`. -* Added the `arrayCumSum` function (Javi Santana). -* Added the `parseDateTimeBestEffort`, `parseDateTimeBestEffortOrZero`, and `parseDateTimeBestEffortOrNull` functions to read the DateTime from a string containing text in a wide variety of possible formats. -* Data can be partially reloaded from external dictionaries during updating (load just the records in which the value of the specified field greater than in the previous download) (Arsen Hakobyan). -* Added the `cluster` table function. Example: `cluster(cluster_name, db, table)`. The `remote` table function can accept the cluster name as the first argument, if it is specified as an identifier. -* The `remote` and `cluster` table functions can be used in `INSERT` queries. -* Added the `create_table_query` and `engine_full` virtual columns to the `system.tables`table . The `metadata_modification_time` column is virtual. -* Added the `data_path` and `metadata_path` columns to `system.tables`and` system.databases` tables, and added the `path` column to the `system.parts` and `system.parts_columns` tables. -* Added additional information about merges in the `system.part_log` table. -* An arbitrary partitioning key can be used for the `system.query_log` table (Kirill Shvakov). -* The `SHOW TABLES` query now also shows temporary tables. Added temporary tables and the `is_temporary` column to `system.tables` (zhang2014). -* Added `DROP TEMPORARY TABLE` and `EXISTS TEMPORARY TABLE` queries (zhang2014). -* Support for `SHOW CREATE TABLE` for temporary tables (zhang2014). -* Added the `system_profile` configuration parameter for the settings used by internal processes. -* Support for loading `object_id` as an attribute in `MongoDB` dictionaries (Pavel Litvinenko). -* Reading `null` as the default value when loading data for an external dictionary with the `MongoDB` source (Pavel Litvinenko). -* Reading `DateTime` values in the `Values` format from a Unix timestamp without single quotes. -* Failover is supported in `remote` table functions for cases when some of the replicas are missing the requested table. -* Configuration settings can be overridden in the command line when you run `clickhouse-server`. Example: `clickhouse-server -- --logger.level=information`. -* Implemented the `empty` function from a `FixedString` argument: the function returns 1 if the string consists entirely of null bytes (zhang2014). -* Added the `listen_try`configuration parameter for listening to at least one of the listen addresses without quitting, if some of the addresses can't be listened to (useful for systems with disabled support for IPv4 or IPv6). -* Added the `VersionedCollapsingMergeTree` table engine. -* Support for rows and arbitrary numeric types for the `library` dictionary source. -* `MergeTree` tables can be used without a primary key (you need to specify `ORDER BY tuple()`). -* A `Nullable` type can be `CAST` to a non-`Nullable` type if the argument is not `NULL`. -* `RENAME TABLE` can be performed for `VIEW`. -* Added the `throwIf` function. -* Added the `odbc_default_field_size` option, which allows you to extend the maximum size of the value loaded from an ODBC source (by default, it is 1024). -* The `system.processes` table and `SHOW PROCESSLIST` now have the `is_cancelled` and `peak_memory_usage` columns. - -#### Improvements: - -* Limits and quotas on the result are no longer applied to intermediate data for `INSERT SELECT` queries or for `SELECT` subqueries. -* Fewer false triggers of `force_restore_data` when checking the status of `Replicated` tables when the server starts. -* Added the `allow_distributed_ddl` option. -* Nondeterministic functions are not allowed in expressions for `MergeTree` table keys. -* Files with substitutions from `config.d` directories are loaded in alphabetical order. -* Improved performance of the `arrayElement` function in the case of a constant multidimensional array with an empty array as one of the elements. Example: `[[1], []][x]`. -* The server starts faster now when using configuration files with very large substitutions (for instance, very large lists of IP networks). -* When running a query, table valued functions run once. Previously, `remote` and `mysql` table valued functions performed the same query twice to retrieve the table structure from a remote server. -* The `MkDocs` documentation generator is used. -* When you try to delete a table column that `DEFAULT`/`MATERIALIZED` expressions of other columns depend on, an exception is thrown (zhang2014). -* Added the ability to parse an empty line in text formats as the number 0 for `Float` data types. This feature was previously available but was lost in release 1.1.54342. -* `Enum` values can be used in `min`, `max`, `sum` and some other functions. In these cases, it uses the corresponding numeric values. This feature was previously available but was lost in the release 1.1.54337. -* Added `max_expanded_ast_elements` to restrict the size of the AST after recursively expanding aliases. - -#### Bug fixes: - -* Fixed cases when unnecessary columns were removed from subqueries in error, or not removed from subqueries containing `UNION ALL`. -* Fixed a bug in merges for `ReplacingMergeTree` tables. -* Fixed synchronous insertions in `Distributed` tables (`insert_distributed_sync = 1`). -* Fixed segfault for certain uses of `FULL` and `RIGHT JOIN` with duplicate columns in subqueries. -* Fixed segfault for certain uses of `replace_running_query` and `KILL QUERY`. -* Fixed the order of the `source` and `last_exception` columns in the `system.dictionaries` table. -* Fixed a bug when the `DROP DATABASE` query did not delete the file with metadata. -* Fixed the `DROP DATABASE` query for `Dictionary` databases. -* Fixed the low precision of `uniqHLL12` and `uniqCombined` functions for cardinalities greater than 100 million items (Alex Bocharov). -* Fixed the calculation of implicit default values when necessary to simultaneously calculate default explicit expressions in `INSERT` queries (zhang2014). -* Fixed a rare case when a query to a `MergeTree` table couldn't finish (chenxing-xc). -* Fixed a crash that occurred when running a `CHECK` query for `Distributed` tables if all shards are local (chenxing.xc). -* Fixed a slight performance regression with functions that use regular expressions. -* Fixed a performance regression when creating multidimensional arrays from complex expressions. -* Fixed a bug that could cause an extra `FORMAT` section to appear in an `.sql` file with metadata. -* Fixed a bug that caused the `max_table_size_to_drop` limit to apply when trying to delete a `MATERIALIZED VIEW` looking at an explicitly specified table. -* Fixed incompatibility with old clients (old clients were sometimes sent data with the `DateTime('timezone')` type, which they do not understand). -* Fixed a bug when reading `Nested` column elements of structures that were added using `ALTER` but that are empty for the old partitions, when the conditions for these columns moved to `PREWHERE`. -* Fixed a bug when filtering tables by virtual `_table` columns in queries to `Merge` tables. -* Fixed a bug when using `ALIAS` columns in `Distributed` tables. -* Fixed a bug that made dynamic compilation impossible for queries with aggregate functions from the `quantile` family. -* Fixed a race condition in the query execution pipeline that occurred in very rare cases when using `Merge` tables with a large number of tables, and when using `GLOBAL` subqueries. -* Fixed a crash when passing arrays of different sizes to an `arrayReduce` function when using aggregate functions from multiple arguments. -* Prohibited the use of queries with `UNION ALL` in a `MATERIALIZED VIEW`. -* Fixed an error during initialization of the `part_log` system table when the server starts (by default, `part_log` is disabled). - -#### Backward incompatible changes: - -* Removed the `distributed_ddl_allow_replicated_alter` option. This behavior is enabled by default. -* Removed the `strict_insert_defaults` setting. If you were using this functionality, write to `clickhouse-feedback@yandex-team.com`. -* Removed the `UnsortedMergeTree` engine. - -### Clickhouse Release 1.1.54343, 2018-02-05 - -* Added macros support for defining cluster names in distributed DDL queries and constructors of Distributed tables: `CREATE TABLE distr ON CLUSTER '{cluster}' (...) ENGINE = Distributed('{cluster}', 'db', 'table')`. -* Now queries like `SELECT ... FROM table WHERE expr IN (subquery)` are processed using the `table` index. -* Improved processing of duplicates when inserting to Replicated tables, so they no longer slow down execution of the replication queue. - -### Clickhouse Release 1.1.54342, 2018-01-22 - -This release contains bug fixes for the previous release 1.1.54337: - -* Fixed a regression in 1.1.54337: if the default user has readonly access, then the server refuses to start up with the message `Cannot create database in readonly mode`. -* Fixed a regression in 1.1.54337: on systems with systemd, logs are always written to syslog regardless of the configuration; the watchdog script still uses init.d. -* Fixed a regression in 1.1.54337: wrong default configuration in the Docker image. -* Fixed nondeterministic behavior of GraphiteMergeTree (you can see it in log messages `Data after merge is not byte-identical to the data on another replicas`). -* Fixed a bug that may lead to inconsistent merges after OPTIMIZE query to Replicated tables (you may see it in log messages `Part ... intersects the previous part`). -* Buffer tables now work correctly when MATERIALIZED columns are present in the destination table (by zhang2014). -* Fixed a bug in implementation of NULL. - -### Clickhouse Release 1.1.54337, 2018-01-18 - -#### New features: - -* Added support for storage of multi-dimensional arrays and tuples (`Tuple` data type) in tables. -* Support for table functions for `DESCRIBE` and `INSERT` queries. Added support for subqueries in `DESCRIBE`. Examples: `DESC TABLE remote('host', default.hits)`; `DESC TABLE (SELECT 1)`; `INSERT INTO TABLE FUNCTION remote('host', default.hits)`. Support for `INSERT INTO TABLE` in addition to `INSERT INTO`. -* Improved support for time zones. The `DateTime` data type can be annotated with the timezone that is used for parsing and formatting in text formats. Example: `DateTime('Europe/Moscow')`. When timezones are specified in functions for `DateTime` arguments, the return type will track the timezone, and the value will be displayed as expected. -* Added the functions `toTimeZone`, `timeDiff`, `toQuarter`, `toRelativeQuarterNum`. The `toRelativeHour`/`Minute`/`Second` functions can take a value of type `Date` as an argument. The `now` function name is case-sensitive. -* Added the `toStartOfFifteenMinutes` function (Kirill Shvakov). -* Added the `clickhouse format` tool for formatting queries. -* Added the `format_schema_path` configuration parameter (Marek Vavruşa). It is used for specifying a schema in `Cap'n Proto` format. Schema files can be located only in the specified directory. -* Added support for config substitutions (`incl` and `conf.d`) for configuration of external dictionaries and models (Pavel Yakunin). -* Added a column with documentation for the `system.settings` table (Kirill Shvakov). -* Added the `system.parts_columns` table with information about column sizes in each data part of `MergeTree` tables. -* Added the `system.models` table with information about loaded `CatBoost` machine learning models. -* Added the `mysql` and `odbc` table function and corresponding `MySQL` and `ODBC` table engines for accessing remote databases. This functionality is in the beta stage. -* Added the possibility to pass an argument of type `AggregateFunction` for the `groupArray` aggregate function (so you can create an array of states of some aggregate function). -* Removed restrictions on various combinations of aggregate function combinators. For example, you can use `avgForEachIf` as well as `avgIfForEach` aggregate functions, which have different behaviors. -* The `-ForEach` aggregate function combinator is extended for the case of aggregate functions of multiple arguments. -* Added support for aggregate functions of `Nullable` arguments even for cases when the function returns a non-`Nullable` result (added with the contribution of Silviu Caragea). Example: `groupArray`, `groupUniqArray`, `topK`. -* Added the `max_client_network_bandwidth` for `clickhouse-client` (Kirill Shvakov). -* Users with the ` readonly = 2` setting are allowed to work with TEMPORARY tables (CREATE, DROP, INSERT...) (Kirill Shvakov). -* Added support for using multiple consumers with the `Kafka` engine. Extended configuration options for `Kafka` (Marek Vavruša). -* Added the `intExp3` and `intExp4` functions. -* Added the `sumKahan` aggregate function. -* Added the to * Number* OrNull functions, where * Number* is a numeric type. -* Added support for `WITH` clauses for an `INSERT SELECT` query (author: zhang2014). -* Added settings: `http_connection_timeout`, `http_send_timeout`, `http_receive_timeout`. In particular, these settings are used for downloading data parts for replication. Changing these settings allows for faster failover if the network is overloaded. -* Added support for `ALTER` for tables of type `Null` (Anastasiya Tsarkova). -* The `reinterpretAsString` function is extended for all data types that are stored contiguously in memory. -* Added the `--silent` option for the `clickhouse-local` tool. It suppresses printing query execution info in stderr. -* Added support for reading values of type `Date` from text in a format where the month and/or day of the month is specified using a single digit instead of two digits (Amos Bird). - -#### Performance optimizations: - -* Improved performance of aggregate functions `min`, `max`, `any`, `anyLast`, `anyHeavy`, `argMin`, `argMax` from string arguments. -* Improved performance of the functions `isInfinite`, `isFinite`, `isNaN`, `roundToExp2`. -* Improved performance of parsing and formatting `Date` and `DateTime` type values in text format. -* Improved performance and precision of parsing floating point numbers. -* Lowered memory usage for `JOIN` in the case when the left and right parts have columns with identical names that are not contained in `USING` . -* Improved performance of aggregate functions `varSamp`, `varPop`, `stddevSamp`, `stddevPop`, `covarSamp`, `covarPop`, `corr` by reducing computational stability. The old functions are available under the names `varSampStable`, `varPopStable`, `stddevSampStable`, `stddevPopStable`, `covarSampStable`, `covarPopStable`, `corrStable`. - -#### Bug fixes: - -* Fixed data deduplication after running a `DROP` or `DETACH PARTITION` query. In the previous version, dropping a partition and inserting the same data again was not working because inserted blocks were considered duplicates. -* Fixed a bug that could lead to incorrect interpretation of the `WHERE` clause for ` CREATE MATERIALIZED VIEW` queries with `POPULATE` . -* Fixed a bug in using the `root_path` parameter in the `zookeeper_servers` configuration. -* Fixed unexpected results of passing the `Date` argument to `toStartOfDay` . -* Fixed the `addMonths` and `subtractMonths` functions and the arithmetic for ` INTERVAL n MONTH` in cases when the result has the previous year. -* Added missing support for the `UUID` data type for `DISTINCT` , `JOIN` , and `uniq` aggregate functions and external dictionaries (Evgeniy Ivanov). Support for `UUID` is still incomplete. -* Fixed `SummingMergeTree` behavior in cases when the rows summed to zero. -* Various fixes for the `Kafka` engine (Marek Vavruša). -* Fixed incorrect behavior of the `Join` table engine (Amos Bird). -* Fixed incorrect allocator behavior under FreeBSD and OS X. -* The `extractAll` function now supports empty matches. -* Fixed an error that blocked usage of `libressl` instead of `openssl` . -* Fixed the ` CREATE TABLE AS SELECT` query from temporary tables. -* Fixed non-atomicity of updating the replication queue. This could lead to replicas being out of sync until the server restarts. -* Fixed possible overflow in `gcd` , `lcm` and `modulo` (`%` operator) (Maks Skorokhod). -* `-preprocessed` files are now created after changing `umask` (`umask` can be changed in the config). -* Fixed a bug in the background check of parts (`MergeTreePartChecker` ) when using a custom partition key. -* Fixed parsing of tuples (values of the `Tuple` data type) in text formats. -* Improved error messages about incompatible types passed to `multiIf` , `array` and some other functions. -* Redesigned support for `Nullable` types. Fixed bugs that may lead to a server crash. Fixed almost all other bugs related to ` NULL` support: incorrect type conversions in INSERT SELECT, insufficient support for Nullable in HAVING and PREWHERE, `join_use_nulls` mode, Nullable types as arguments of `OR` operator, etc. -* Fixed various bugs related to internal semantics of data types. Examples: unnecessary summing of `Enum` type fields in `SummingMergeTree` ; alignment of `Enum` types in `Pretty` formats, etc. -* Stricter checks for allowed combinations of composite columns. -* Fixed the overflow when specifying a very large parameter for the `FixedString` data type. -* Fixed a bug in the `topK` aggregate function in a generic case. -* Added the missing check for equality of array sizes in arguments of n-ary variants of aggregate functions with an `-Array` combinator. -* Fixed a bug in `--pager` for `clickhouse-client` (author: ks1322). -* Fixed the precision of the `exp10` function. -* Fixed the behavior of the `visitParamExtract` function for better compliance with documentation. -* Fixed the crash when incorrect data types are specified. -* Fixed the behavior of `DISTINCT` in the case when all columns are constants. -* Fixed query formatting in the case of using the `tupleElement` function with a complex constant expression as the tuple element index. -* Fixed a bug in `Dictionary` tables for `range_hashed` dictionaries. -* Fixed a bug that leads to excessive rows in the result of `FULL` and ` RIGHT JOIN` (Amos Bird). -* Fixed a server crash when creating and removing temporary files in `config.d` directories during config reload. -* Fixed the ` SYSTEM DROP DNS CACHE` query: the cache was flushed but addresses of cluster nodes were not updated. -* Fixed the behavior of ` MATERIALIZED VIEW` after executing ` DETACH TABLE` for the table under the view (Marek Vavruša). - -#### Build improvements: - -* The `pbuilder` tool is used for builds. The build process is almost completely independent of the build host environment. -* A single build is used for different OS versions. Packages and binaries have been made compatible with a wide range of Linux systems. -* Added the `clickhouse-test` package. It can be used to run functional tests. -* The source tarball can now be published to the repository. It can be used to reproduce the build without using GitHub. -* Added limited integration with Travis CI. Due to limits on build time in Travis, only the debug build is tested and a limited subset of tests are run. -* Added support for `Cap'n'Proto` in the default build. -* Changed the format of documentation sources from `Restricted Text` to `Markdown`. -* Added support for `systemd` (Vladimir Smirnov). It is disabled by default due to incompatibility with some OS images and can be enabled manually. -* For dynamic code generation, `clang` and `lld` are embedded into the `clickhouse` binary. They can also be invoked as ` clickhouse clang` and ` clickhouse lld` . -* Removed usage of GNU extensions from the code. Enabled the `-Wextra` option. When building with `clang` the default is `libc++` instead of `libstdc++`. -* Extracted `clickhouse_parsers` and `clickhouse_common_io` libraries to speed up builds of various tools. - -#### Backward incompatible changes: - -* The format for marks in `Log` type tables that contain `Nullable` columns was changed in a backward incompatible way. If you have these tables, you should convert them to the `TinyLog` type before starting up the new server version. To do this, replace `ENGINE = Log` with `ENGINE = TinyLog` in the corresponding `.sql` file in the `metadata` directory. If your table doesn't have `Nullable` columns or if the type of your table is not `Log`, then you don't need to do anything. -* Removed the `experimental_allow_extended_storage_definition_syntax` setting. Now this feature is enabled by default. -* The `runningIncome` function was renamed to `runningDifferenceStartingWithFirstvalue` to avoid confusion. -* Removed the ` FROM ARRAY JOIN arr` syntax when ARRAY JOIN is specified directly after FROM with no table (Amos Bird). -* Removed the `BlockTabSeparated` format that was used solely for demonstration purposes. -* Changed the state format for aggregate functions `varSamp`, `varPop`, `stddevSamp`, `stddevPop`, `covarSamp`, `covarPop`, `corr`. If you have stored states of these aggregate functions in tables (using the `AggregateFunction` data type or materialized views with corresponding states), please write to clickhouse-feedback@yandex-team.com. -* In previous server versions there was an undocumented feature: if an aggregate function depends on parameters, you can still specify it without parameters in the AggregateFunction data type. Example: `AggregateFunction(quantiles, UInt64)` instead of `AggregateFunction(quantiles(0.5, 0.9), UInt64)`. This feature was lost. Although it was undocumented, we plan to support it again in future releases. -* Enum data types cannot be used in min/max aggregate functions. This ability will be returned in the next release. - -#### Please note when upgrading: - -* When doing a rolling update on a cluster, at the point when some of the replicas are running the old version of ClickHouse and some are running the new version, replication is temporarily stopped and the message ` unknown parameter 'shard'` appears in the log. Replication will continue after all replicas of the cluster are updated. -* If different versions of ClickHouse are running on the cluster servers, it is possible that distributed queries using the following functions will have incorrect results: `varSamp`, `varPop`, `stddevSamp`, `stddevPop`, `covarSamp`, `covarPop`, `corr`. You should update all cluster nodes. - -### ClickHouse release 1.1.54327, 2017-12-21 - -This release contains bug fixes for the previous release 1.1.54318: - -* Fixed bug with possible race condition in replication that could lead to data loss. This issue affects versions 1.1.54310 and 1.1.54318. If you use one of these versions with Replicated tables, the update is strongly recommended. This issue shows in logs in Warning messages like ` Part ... from own log doesn't exist.` The issue is relevant even if you don't see these messages in logs. - -### ClickHouse release 1.1.54318, 2017-11-30 - -This release contains bug fixes for the previous release 1.1.54310: - -* Fixed incorrect row deletions during merges in the SummingMergeTree engine -* Fixed a memory leak in unreplicated MergeTree engines -* Fixed performance degradation with frequent inserts in MergeTree engines -* Fixed an issue that was causing the replication queue to stop running -* Fixed rotation and archiving of server logs - -### ClickHouse release 1.1.54310, 2017-11-01 - -#### New features: - -* Custom partitioning key for the MergeTree family of table engines. -* [Kafka](https://clickhouse.yandex/docs/en/operations/table_engines/kafka/) table engine. -* Added support for loading [CatBoost](https://catboost.yandex/) models and applying them to data stored in ClickHouse. -* Added support for time zones with non-integer offsets from UTC. -* Added support for arithmetic operations with time intervals. -* The range of values for the Date and DateTime types is extended to the year 2105. -* Added the ` CREATE MATERIALIZED VIEW x TO y` query (specifies an existing table for storing the data of a materialized view). -* Added the `ATTACH TABLE` query without arguments. -* The processing logic for Nested columns with names ending in -Map in a SummingMergeTree table was extracted to the sumMap aggregate function. You can now specify such columns explicitly. -* Max size of the IP trie dictionary is increased to 128M entries. -* Added the getSizeOfEnumType function. -* Added the sumWithOverflow aggregate function. -* Added support for the Cap'n Proto input format. -* You can now customize compression level when using the zstd algorithm. - -#### Backward incompatible changes: - -* Creation of temporary tables with an engine other than Memory is not allowed. -* Explicit creation of tables with the View or MaterializedView engine is not allowed. -* During table creation, a new check verifies that the sampling key expression is included in the primary key. - -#### Bug fixes: - -* Fixed hangups when synchronously inserting into a Distributed table. -* Fixed nonatomic adding and removing of parts in Replicated tables. -* Data inserted into a materialized view is not subjected to unnecessary deduplication. -* Executing a query to a Distributed table for which the local replica is lagging and remote replicas are unavailable does not result in an error anymore. -* Users don't need access permissions to the `default` database to create temporary tables anymore. -* Fixed crashing when specifying the Array type without arguments. -* Fixed hangups when the disk volume containing server logs is full. -* Fixed an overflow in the toRelativeWeekNum function for the first week of the Unix epoch. - -#### Build improvements: - -* Several third-party libraries (notably Poco) were updated and converted to git submodules. - -### ClickHouse release 1.1.54304, 2017-10-19 - -#### New features: - -* TLS support in the native protocol (to enable, set `tcp_ssl_port` in `config.xml` ). - -#### Bug fixes: - -* `ALTER` for replicated tables now tries to start running as soon as possible. -* Fixed crashing when reading data with the setting `preferred_block_size_bytes=0.` -* Fixed crashes of `clickhouse-client` when pressing ` Page Down` -* Correct interpretation of certain complex queries with `GLOBAL IN` and `UNION ALL` -* `FREEZE PARTITION` always works atomically now. -* Empty POST requests now return a response with code 411. -* Fixed interpretation errors for expressions like `CAST(1 AS Nullable(UInt8)).` -* Fixed an error when reading `Array(Nullable(String))` columns from `MergeTree` tables. -* Fixed crashing when parsing queries like `SELECT dummy AS dummy, dummy AS b` -* Users are updated correctly with invalid `users.xml` -* Correct handling when an executable dictionary returns a non-zero response code. - -### ClickHouse release 1.1.54292, 2017-09-20 - -#### New features: - -* Added the `pointInPolygon` function for working with coordinates on a coordinate plane. -* Added the `sumMap` aggregate function for calculating the sum of arrays, similar to `SummingMergeTree`. -* Added the `trunc` function. Improved performance of the rounding functions (`round`, `floor`, `ceil`, `roundToExp2`) and corrected the logic of how they work. Changed the logic of the `roundToExp2` function for fractions and negative numbers. -* The ClickHouse executable file is now less dependent on the libc version. The same ClickHouse executable file can run on a wide variety of Linux systems. There is still a dependency when using compiled queries (with the setting ` compile = 1` , which is not used by default). -* Reduced the time needed for dynamic compilation of queries. - -#### Bug fixes: - -* Fixed an error that sometimes produced ` part ... intersects previous part` messages and weakened replica consistency. -* Fixed an error that caused the server to lock up if ZooKeeper was unavailable during shutdown. -* Removed excessive logging when restoring replicas. -* Fixed an error in the UNION ALL implementation. -* Fixed an error in the concat function that occurred if the first column in a block has the Array type. -* Progress is now displayed correctly in the system.merges table. - -### ClickHouse release 1.1.54289, 2017-09-13 - -#### New features: - -* `SYSTEM` queries for server administration: `SYSTEM RELOAD DICTIONARY`, `SYSTEM RELOAD DICTIONARIES`, `SYSTEM DROP DNS CACHE`, `SYSTEM SHUTDOWN`, `SYSTEM KILL`. -* Added functions for working with arrays: `concat`, `arraySlice`, `arrayPushBack`, `arrayPushFront`, `arrayPopBack`, `arrayPopFront`. -* Added `root` and `identity` parameters for the ZooKeeper configuration. This allows you to isolate individual users on the same ZooKeeper cluster. -* Added aggregate functions `groupBitAnd`, `groupBitOr`, and `groupBitXor` (for compatibility, they are also available under the names `BIT_AND`, `BIT_OR`, and `BIT_XOR`). -* External dictionaries can be loaded from MySQL by specifying a socket in the filesystem. -* External dictionaries can be loaded from MySQL over SSL (`ssl_cert`, `ssl_key`, `ssl_ca` parameters). -* Added the `max_network_bandwidth_for_user` setting to restrict the overall bandwidth use for queries per user. -* Support for `DROP TABLE` for temporary tables. -* Support for reading `DateTime` values in Unix timestamp format from the `CSV` and `JSONEachRow` formats. -* Lagging replicas in distributed queries are now excluded by default (the default threshold is 5 minutes). -* FIFO locking is used during ALTER: an ALTER query isn't blocked indefinitely for continuously running queries. -* Option to set `umask` in the config file. -* Improved performance for queries with `DISTINCT` . - -#### Bug fixes: - -* Improved the process for deleting old nodes in ZooKeeper. Previously, old nodes sometimes didn't get deleted if there were very frequent inserts, which caused the server to be slow to shut down, among other things. -* Fixed randomization when choosing hosts for the connection to ZooKeeper. -* Fixed the exclusion of lagging replicas in distributed queries if the replica is localhost. -* Fixed an error where a data part in a `ReplicatedMergeTree` table could be broken after running ` ALTER MODIFY` on an element in a `Nested` structure. -* Fixed an error that could cause SELECT queries to "hang". -* Improvements to distributed DDL queries. -* Fixed the query `CREATE TABLE ... AS `. -* Resolved the deadlock in the ` ALTER ... CLEAR COLUMN IN PARTITION` query for `Buffer` tables. -* Fixed the invalid default value for `Enum` s (0 instead of the minimum) when using the `JSONEachRow` and `TSKV` formats. -* Resolved the appearance of zombie processes when using a dictionary with an `executable` source. -* Fixed segfault for the HEAD query. - -#### Improved workflow for developing and assembling ClickHouse: - -* You can use `pbuilder` to build ClickHouse. -* You can use `libc++` instead of `libstdc++` for builds on Linux. -* Added instructions for using static code analysis tools: `Coverage`, `clang-tidy`, `cppcheck`. - -#### Please note when upgrading: - -* There is now a higher default value for the MergeTree setting `max_bytes_to_merge_at_max_space_in_pool` (the maximum total size of data parts to merge, in bytes): it has increased from 100 GiB to 150 GiB. This might result in large merges running after the server upgrade, which could cause an increased load on the disk subsystem. If the free space available on the server is less than twice the total amount of the merges that are running, this will cause all other merges to stop running, including merges of small data parts. As a result, INSERT queries will fail with the message "Merges are processing significantly slower than inserts." Use the ` SELECT * FROM system.merges` query to monitor the situation. You can also check the `DiskSpaceReservedForMerge` metric in the `system.metrics` table, or in Graphite. You don't need to do anything to fix this, since the issue will resolve itself once the large merges finish. If you find this unacceptable, you can restore the previous value for the `max_bytes_to_merge_at_max_space_in_pool` setting. To do this, go to the section in config.xml, set ```107374182400` and restart the server. - -### ClickHouse release 1.1.54284, 2017-08-29 - -* This is a bugfix release for the previous 1.1.54282 release. It fixes leaks in the parts directory in ZooKeeper. - -### ClickHouse release 1.1.54282, 2017-08-23 - -This release contains bug fixes for the previous release 1.1.54276: - -* Fixed `DB::Exception: Assertion violation: !_path.empty()` when inserting into a Distributed table. -* Fixed parsing when inserting in RowBinary format if input data starts with';'. -* Errors during runtime compilation of certain aggregate functions (e.g. `groupArray()`). - -### Clickhouse Release 1.1.54276, 2017-08-16 - -#### New features: - -* Added an optional WITH section for a SELECT query. Example query: `WITH 1+1 AS a SELECT a, a*a` -* INSERT can be performed synchronously in a Distributed table: OK is returned only after all the data is saved on all the shards. This is activated by the setting insert_distributed_sync=1. -* Added the UUID data type for working with 16-byte identifiers. -* Added aliases of CHAR, FLOAT and other types for compatibility with the Tableau. -* Added the functions toYYYYMM, toYYYYMMDD, and toYYYYMMDDhhmmss for converting time into numbers. -* You can use IP addresses (together with the hostname) to identify servers for clustered DDL queries. -* Added support for non-constant arguments and negative offsets in the function `substring(str, pos, len).` -* Added the max_size parameter for the `groupArray(max_size)(column)` aggregate function, and optimized its performance. - -#### Main changes: - -* Security improvements: all server files are created with 0640 permissions (can be changed via config parameter). -* Improved error messages for queries with invalid syntax. -* Significantly reduced memory consumption and improved performance when merging large sections of MergeTree data. -* Significantly increased the performance of data merges for the ReplacingMergeTree engine. -* Improved performance for asynchronous inserts from a Distributed table by combining multiple source inserts. To enable this functionality, use the setting distributed_directory_monitor_batch_inserts=1. - -#### Backward incompatible changes: - -* Changed the binary format of aggregate states of `groupArray(array_column)` functions for arrays. - -#### Complete list of changes: - -* Added the `output_format_json_quote_denormals` setting, which enables outputting nan and inf values in JSON format. -* Optimized stream allocation when reading from a Distributed table. -* Settings can be configured in readonly mode if the value doesn't change. -* Added the ability to retrieve non-integer granules of the MergeTree engine in order to meet restrictions on the block size specified in the preferred_block_size_bytes setting. The purpose is to reduce the consumption of RAM and increase cache locality when processing queries from tables with large columns. -* Efficient use of indexes that contain expressions like `toStartOfHour(x)` for conditions like `toStartOfHour(x) op сonstexpr.` -* Added new settings for MergeTree engines (the merge_tree section in config.xml): - - replicated_deduplication_window_seconds sets the number of seconds allowed for deduplicating inserts in Replicated tables. - - cleanup_delay_period sets how often to start cleanup to remove outdated data. - - replicated_can_become_leader can prevent a replica from becoming the leader (and assigning merges). -* Accelerated cleanup to remove outdated data from ZooKeeper. -* Multiple improvements and fixes for clustered DDL queries. Of particular interest is the new setting distributed_ddl_task_timeout, which limits the time to wait for a response from the servers in the cluster. If a ddl request has not been performed on all hosts, a response will contain a timeout error and a request will be executed in an async mode. -* Improved display of stack traces in the server logs. -* Added the "none" value for the compression method. -* You can use multiple dictionaries_config sections in config.xml. -* It is possible to connect to MySQL through a socket in the file system. -* The system.parts table has a new column with information about the size of marks, in bytes. - -#### Bug fixes: - -* Distributed tables using a Merge table now work correctly for a SELECT query with a condition on the `_table` field. -* Fixed a rare race condition in ReplicatedMergeTree when checking data parts. -* Fixed possible freezing on "leader election" when starting a server. -* The max_replica_delay_for_distributed_queries setting was ignored when using a local replica of the data source. This has been fixed. -* Fixed incorrect behavior of `ALTER TABLE CLEAR COLUMN IN PARTITION` when attempting to clean a non-existing column. -* Fixed an exception in the multiIf function when using empty arrays or strings. -* Fixed excessive memory allocations when deserializing Native format. -* Fixed incorrect auto-update of Trie dictionaries. -* Fixed an exception when running queries with a GROUP BY clause from a Merge table when using SAMPLE. -* Fixed a crash of GROUP BY when using distributed_aggregation_memory_efficient=1. -* Now you can specify the database.table in the right side of IN and JOIN. -* Too many threads were used for parallel aggregation. This has been fixed. -* Fixed how the "if" function works with FixedString arguments. -* SELECT worked incorrectly from a Distributed table for shards with a weight of 0. This has been fixed. -* Running `CREATE VIEW IF EXISTS no longer causes crashes.` -* Fixed incorrect behavior when input_format_skip_unknown_fields=1 is set and there are negative numbers. -* Fixed an infinite loop in the `dictGetHierarchy()` function if there is some invalid data in the dictionary. -* Fixed `Syntax error: unexpected (...)` errors when running distributed queries with subqueries in an IN or JOIN clause and Merge tables. -* Fixed an incorrect interpretation of a SELECT query from Dictionary tables. -* Fixed the "Cannot mremap" error when using arrays in IN and JOIN clauses with more than 2 billion elements. -* Fixed the failover for dictionaries with MySQL as the source. - -#### Improved workflow for developing and assembling ClickHouse: - -* Builds can be assembled in Arcadia. -* You can use gcc 7 to compile ClickHouse. -* Parallel builds using ccache+distcc are faster now. - -### ClickHouse release 1.1.54245, 2017-07-04 - -#### New features: - -* Distributed DDL (for example, `CREATE TABLE ON CLUSTER`) -* The replicated query `ALTER TABLE CLEAR COLUMN IN PARTITION.` -* The engine for Dictionary tables (access to dictionary data in the form of a table). -* Dictionary database engine (this type of database automatically has Dictionary tables available for all the connected external dictionaries). -* You can check for updates to the dictionary by sending a request to the source. -* Qualified column names -* Quoting identifiers using double quotation marks. -* Sessions in the HTTP interface. -* The OPTIMIZE query for a Replicated table can can run not only on the leader. - -#### Backward incompatible changes: - -* Removed SET GLOBAL. - -#### Minor changes: - -* Now after an alert is triggered, the log prints the full stack trace. -* Relaxed the verification of the number of damaged/extra data parts at startup (there were too many false positives). - -#### Bug fixes: - -* Fixed a bad connection "sticking" when inserting into a Distributed table. -* GLOBAL IN now works for a query from a Merge table that looks at a Distributed table. -* The incorrect number of cores was detected on a Google Compute Engine virtual machine. This has been fixed. -* Changes in how an executable source of cached external dictionaries works. -* Fixed the comparison of strings containing null characters. -* Fixed the comparison of Float32 primary key fields with constants. -* Previously, an incorrect estimate of the size of a field could lead to overly large allocations. -* Fixed a crash when querying a Nullable column added to a table using ALTER. -* Fixed a crash when sorting by a Nullable column, if the number of rows is less than LIMIT. -* Fixed an ORDER BY subquery consisting of only constant values. -* Previously, a Replicated table could remain in the invalid state after a failed DROP TABLE. -* Aliases for scalar subqueries with empty results are no longer lost. -* Now a query that used compilation does not fail with an error if the .so file gets damaged. From 83b4efeeec372ecd514e1fd17fda9c02eb265623 Mon Sep 17 00:00:00 2001 From: Ivan <5627721+abyss7@users.noreply.github.com> Date: Tue, 17 Mar 2020 21:00:57 +0300 Subject: [PATCH 038/115] Update Dockerfile --- docker/builder/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/builder/Dockerfile b/docker/builder/Dockerfile index fbb5396365a..c9e37dc75e5 100644 --- a/docker/builder/Dockerfile +++ b/docker/builder/Dockerfile @@ -22,7 +22,8 @@ RUN apt-get update -y \ python-termcolor \ sudo \ tzdata \ - gperf + gperf \ + libcctz-dev RUN apt install -y wget RUN printf "deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-8 main\ndeb-src http://apt.llvm.org/bionic/ llvm-toolchain-bionic-8 main" >> /etc/apt/sources.list \ From 71697a4afc228bd207e9bae1e5bc7d2f50150905 Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Tue, 17 Mar 2020 15:10:23 -0300 Subject: [PATCH 039/115] =?UTF-8?q?Doc.=20ontime.md=20refresh=20to=20?= =?UTF-8?q?=D1=81orrespond=20modern=20CH=20(modern=20create/join)=20(#9709?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ontime.md refresh to сorrespond modern CH * RU ontime.md refresh to сorrespond modern CH * ontime.md FA refresh to сorrespond modern CH * ontime.md ZH refresh to сorrespond modern CH --- .../getting_started/example_datasets/ontime.md | 17 ++++++++++------- .../getting_started/example_datasets/ontime.md | 17 ++++++++++------- .../getting_started/example_datasets/ontime.md | 17 ++++++++++------- .../getting_started/example_datasets/ontime.md | 17 ++++++++++------- 4 files changed, 40 insertions(+), 28 deletions(-) diff --git a/docs/en/getting_started/example_datasets/ontime.md b/docs/en/getting_started/example_datasets/ontime.md index 4aea3985aab..e29305bcef8 100644 --- a/docs/en/getting_started/example_datasets/ontime.md +++ b/docs/en/getting_started/example_datasets/ontime.md @@ -135,7 +135,10 @@ CREATE TABLE `ontime` ( `Div5LongestGTime` String, `Div5WheelsOff` String, `Div5TailNum` String -) ENGINE = MergeTree(FlightDate, (Year, FlightDate), 8192) +) ENGINE = MergeTree +PARTITION BY Year +ORDER BY (Carrier, FlightDate) +SETTINGS index_granularity = 8192; ``` Loading data: @@ -227,7 +230,7 @@ FROM AND Year=2007 GROUP BY Carrier ) -ANY INNER JOIN +JOIN ( SELECT Carrier, @@ -246,7 +249,7 @@ SELECT Carrier, avg(DepDelay>10)*100 AS c3 FROM ontime WHERE Year=2007 GROUP BY Carrier -ORDER BY Carrier +ORDER BY c3 DESC ``` Q6. The previous request for a broader range of years, 2000-2008 @@ -263,7 +266,7 @@ FROM AND Year>=2000 AND Year<=2008 GROUP BY Carrier ) -ANY INNER JOIN +JOIN ( SELECT Carrier, @@ -282,7 +285,7 @@ SELECT Carrier, avg(DepDelay>10)*100 AS c3 FROM ontime WHERE Year>=2000 AND Year<=2008 GROUP BY Carrier -ORDER BY Carrier; +ORDER BY c3 DESC; ``` Q7. Percentage of flights delayed for more than 10 minutes, by year @@ -298,7 +301,7 @@ FROM WHERE DepDelay>10 GROUP BY Year ) -ANY INNER JOIN +JOIN ( select Year, @@ -312,7 +315,7 @@ ORDER BY Year; Better version of the same query: ```sql -SELECT Year, avg(DepDelay>10) +SELECT Year, avg(DepDelay>10)*100 FROM ontime GROUP BY Year ORDER BY Year; diff --git a/docs/fa/getting_started/example_datasets/ontime.md b/docs/fa/getting_started/example_datasets/ontime.md index e07ce8572e2..89145bfb1af 100644 --- a/docs/fa/getting_started/example_datasets/ontime.md +++ b/docs/fa/getting_started/example_datasets/ontime.md @@ -136,7 +136,10 @@ CREATE TABLE `ontime` ( `Div5LongestGTime` String, `Div5WheelsOff` String, `Div5TailNum` String -) ENGINE = MergeTree(FlightDate, (Year, FlightDate), 8192) +) ENGINE = MergeTree +PARTITION BY Year +ORDER BY (Carrier, FlightDate) +SETTINGS index_granularity = 8192; ```
@@ -218,7 +221,7 @@ FROM AND Year=2007 GROUP BY Carrier ) -ANY INNER JOIN +JOIN ( SELECT Carrier, @@ -237,7 +240,7 @@ ORDER BY c3 DESC;
``` sql -SELECT Carrier, avg(DepDelay > 10) * 100 AS c3 FROM ontime WHERE Year = 2007 GROUP BY Carrier ORDER BY Carrier +SELECT Carrier, avg(DepDelay > 10) * 100 AS c3 FROM ontime WHERE Year = 2007 GROUP BY Carrier ORDER BY c3 DESC ```
@@ -258,7 +261,7 @@ FROM AND Year >= 2000 AND Year <= 2008 GROUP BY Carrier ) -ANY INNER JOIN +JOIN ( SELECT Carrier, @@ -277,7 +280,7 @@ ORDER BY c3 DESC;
``` sql -SELECT Carrier, avg(DepDelay > 10) * 100 AS c3 FROM ontime WHERE Year >= 2000 AND Year <= 2008 GROUP BY Carrier ORDER BY Carrier +SELECT Carrier, avg(DepDelay > 10) * 100 AS c3 FROM ontime WHERE Year >= 2000 AND Year <= 2008 GROUP BY Carrier ORDER BY c3 DESC ```
@@ -297,7 +300,7 @@ FROM WHERE DepDelay>10 GROUP BY Year ) -ANY INNER JOIN +JOIN ( select Year, @@ -315,7 +318,7 @@ ORDER BY Year
``` sql -SELECT Year, avg(DepDelay > 10) FROM ontime GROUP BY Year ORDER BY Year +SELECT Year, avg(DepDelay > 10)*100 FROM ontime GROUP BY Year ORDER BY Year ```
diff --git a/docs/ru/getting_started/example_datasets/ontime.md b/docs/ru/getting_started/example_datasets/ontime.md index ec0c6f8f5ab..2d7a8500bd3 100644 --- a/docs/ru/getting_started/example_datasets/ontime.md +++ b/docs/ru/getting_started/example_datasets/ontime.md @@ -132,7 +132,10 @@ CREATE TABLE `ontime` ( `Div5LongestGTime` String, `Div5WheelsOff` String, `Div5TailNum` String -) ENGINE = MergeTree(FlightDate, (Year, FlightDate), 8192) +) ENGINE = MergeTree +PARTITION BY Year +ORDER BY (Carrier, FlightDate) +SETTINGS index_granularity = 8192; ``` Загрузка данных: @@ -224,7 +227,7 @@ FROM AND Year=2007 GROUP BY Carrier ) -ANY INNER JOIN +JOIN ( SELECT Carrier, @@ -243,7 +246,7 @@ SELECT Carrier, avg(DepDelay>10)*100 AS c3 FROM ontime WHERE Year=2007 GROUP BY Carrier -ORDER BY Carrier +ORDER BY c3 DESC ``` Q6. Предыдущий запрос за более широкий диапазон лет, 2000-2008 @@ -260,7 +263,7 @@ FROM AND Year>=2000 AND Year<=2008 GROUP BY Carrier ) -ANY INNER JOIN +JOIN ( SELECT Carrier, @@ -279,7 +282,7 @@ SELECT Carrier, avg(DepDelay>10)*100 AS c3 FROM ontime WHERE Year>=2000 AND Year<=2008 GROUP BY Carrier -ORDER BY Carrier; +ORDER BY c3 DESC; ``` Q7. Процент полетов, задержанных на более 10 минут, в разбивке по годам @@ -295,7 +298,7 @@ FROM WHERE DepDelay>10 GROUP BY Year ) -ANY INNER JOIN +JOIN ( select Year, @@ -309,7 +312,7 @@ ORDER BY Year; Более оптимальная версия того же запроса: ```sql -SELECT Year, avg(DepDelay>10) +SELECT Year, avg(DepDelay>10)*100 FROM ontime GROUP BY Year ORDER BY Year; diff --git a/docs/zh/getting_started/example_datasets/ontime.md b/docs/zh/getting_started/example_datasets/ontime.md index ec4053490a5..11994bfa97b 100644 --- a/docs/zh/getting_started/example_datasets/ontime.md +++ b/docs/zh/getting_started/example_datasets/ontime.md @@ -135,7 +135,10 @@ CREATE TABLE `ontime` ( `Div5LongestGTime` String, `Div5WheelsOff` String, `Div5TailNum` String -) ENGINE = MergeTree(FlightDate, (Year, FlightDate), 8192) +) ENGINE = MergeTree +PARTITION BY Year +ORDER BY (Carrier, FlightDate) +SETTINGS index_granularity = 8192; ``` 加载数据: @@ -226,7 +229,7 @@ FROM AND Year=2007 GROUP BY Carrier ) -ANY INNER JOIN +JOIN ( SELECT Carrier, @@ -245,7 +248,7 @@ SELECT Carrier, avg(DepDelay>10)*100 AS c3 FROM ontime WHERE Year=2007 GROUP BY Carrier -ORDER BY Carrier +ORDER BY c3 DESC ``` Q6. 同上一个查询一致,只是查询范围扩大到2000年到2008年 @@ -262,7 +265,7 @@ FROM AND Year>=2000 AND Year<=2008 GROUP BY Carrier ) -ANY INNER JOIN +JOIN ( SELECT Carrier, @@ -281,7 +284,7 @@ SELECT Carrier, avg(DepDelay>10)*100 AS c3 FROM ontime WHERE Year>=2000 AND Year<=2008 GROUP BY Carrier -ORDER BY Carrier; +ORDER BY c3 DESC; ``` Q7. 每年航班延误超过10分钟的百分比 @@ -297,7 +300,7 @@ FROM WHERE DepDelay>10 GROUP BY Year ) -ANY INNER JOIN +JOIN ( select Year, @@ -311,7 +314,7 @@ ORDER BY Year; 更好的查询版本: ```sql -SELECT Year, avg(DepDelay>10) +SELECT Year, avg(DepDelay>10)*100 FROM ontime GROUP BY Year ORDER BY Year; From 50fb0ec9f6fb87952e622fd56d02119c3a6383fe Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 17 Mar 2020 23:01:36 +0300 Subject: [PATCH 040/115] Add links to changelogs from other years. (#9710) * Add links to changelogs from other years. * Move some changelogs to other folders. * Fix links * Renames and fixes. --- docs/en/changelog.md | 1 - CHANGELOG_2017.md => docs/en/changelog/2017.md | 0 CHANGELOG_2018.md => docs/en/changelog/2018.md | 0 CHANGELOG_2019.md => docs/en/changelog/2019.md | 0 docs/en/changelog/index.md | 1 + docs/fa/changelog.md | 1 - docs/fa/changelog/2017.md | 1 + docs/fa/changelog/2018.md | 1 + docs/fa/changelog/2019.md | 1 + docs/fa/changelog/index.md | 1 + docs/ja/changelog.md | 1 - docs/ja/changelog/2017.md | 1 + docs/ja/changelog/2018.md | 1 + docs/ja/changelog/2019.md | 1 + docs/ja/changelog/index.md | 1 + docs/ru/changelog.md | 1 - docs/ru/changelog/2017.md | 1 + docs/ru/changelog/2018.md | 1 + docs/ru/changelog/2019.md | 1 + docs/ru/changelog/index.md | 1 + docs/toc_en.yml | 6 +++++- docs/toc_fa.yml | 6 +++++- docs/toc_ja.yml | 6 +++++- docs/toc_ru.yml | 6 +++++- docs/toc_zh.yml | 6 +++++- docs/zh/changelog.md | 1 - docs/zh/changelog/2017.md | 1 + docs/zh/changelog/2018.md | 1 + docs/zh/changelog/2019.md | 1 + docs/zh/changelog/index.md | 1 + 30 files changed, 42 insertions(+), 10 deletions(-) delete mode 120000 docs/en/changelog.md rename CHANGELOG_2017.md => docs/en/changelog/2017.md (100%) rename CHANGELOG_2018.md => docs/en/changelog/2018.md (100%) rename CHANGELOG_2019.md => docs/en/changelog/2019.md (100%) create mode 120000 docs/en/changelog/index.md delete mode 120000 docs/fa/changelog.md create mode 120000 docs/fa/changelog/2017.md create mode 120000 docs/fa/changelog/2018.md create mode 120000 docs/fa/changelog/2019.md create mode 120000 docs/fa/changelog/index.md delete mode 120000 docs/ja/changelog.md create mode 120000 docs/ja/changelog/2017.md create mode 120000 docs/ja/changelog/2018.md create mode 120000 docs/ja/changelog/2019.md create mode 120000 docs/ja/changelog/index.md delete mode 120000 docs/ru/changelog.md create mode 120000 docs/ru/changelog/2017.md create mode 120000 docs/ru/changelog/2018.md create mode 120000 docs/ru/changelog/2019.md create mode 120000 docs/ru/changelog/index.md delete mode 120000 docs/zh/changelog.md create mode 120000 docs/zh/changelog/2017.md create mode 120000 docs/zh/changelog/2018.md create mode 120000 docs/zh/changelog/2019.md create mode 120000 docs/zh/changelog/index.md diff --git a/docs/en/changelog.md b/docs/en/changelog.md deleted file mode 120000 index 699cc9e7b7c..00000000000 --- a/docs/en/changelog.md +++ /dev/null @@ -1 +0,0 @@ -../../CHANGELOG.md \ No newline at end of file diff --git a/CHANGELOG_2017.md b/docs/en/changelog/2017.md similarity index 100% rename from CHANGELOG_2017.md rename to docs/en/changelog/2017.md diff --git a/CHANGELOG_2018.md b/docs/en/changelog/2018.md similarity index 100% rename from CHANGELOG_2018.md rename to docs/en/changelog/2018.md diff --git a/CHANGELOG_2019.md b/docs/en/changelog/2019.md similarity index 100% rename from CHANGELOG_2019.md rename to docs/en/changelog/2019.md diff --git a/docs/en/changelog/index.md b/docs/en/changelog/index.md new file mode 120000 index 00000000000..79b747aee1b --- /dev/null +++ b/docs/en/changelog/index.md @@ -0,0 +1 @@ +../../../CHANGELOG.md \ No newline at end of file diff --git a/docs/fa/changelog.md b/docs/fa/changelog.md deleted file mode 120000 index 699cc9e7b7c..00000000000 --- a/docs/fa/changelog.md +++ /dev/null @@ -1 +0,0 @@ -../../CHANGELOG.md \ No newline at end of file diff --git a/docs/fa/changelog/2017.md b/docs/fa/changelog/2017.md new file mode 120000 index 00000000000..bf4fe14279d --- /dev/null +++ b/docs/fa/changelog/2017.md @@ -0,0 +1 @@ +../../en/changelog/2017.md \ No newline at end of file diff --git a/docs/fa/changelog/2018.md b/docs/fa/changelog/2018.md new file mode 120000 index 00000000000..20799251f43 --- /dev/null +++ b/docs/fa/changelog/2018.md @@ -0,0 +1 @@ +../../en/changelog/2018.md \ No newline at end of file diff --git a/docs/fa/changelog/2019.md b/docs/fa/changelog/2019.md new file mode 120000 index 00000000000..105ca144fca --- /dev/null +++ b/docs/fa/changelog/2019.md @@ -0,0 +1 @@ +../../en/changelog/2019.md \ No newline at end of file diff --git a/docs/fa/changelog/index.md b/docs/fa/changelog/index.md new file mode 120000 index 00000000000..79b747aee1b --- /dev/null +++ b/docs/fa/changelog/index.md @@ -0,0 +1 @@ +../../../CHANGELOG.md \ No newline at end of file diff --git a/docs/ja/changelog.md b/docs/ja/changelog.md deleted file mode 120000 index 699cc9e7b7c..00000000000 --- a/docs/ja/changelog.md +++ /dev/null @@ -1 +0,0 @@ -../../CHANGELOG.md \ No newline at end of file diff --git a/docs/ja/changelog/2017.md b/docs/ja/changelog/2017.md new file mode 120000 index 00000000000..bf4fe14279d --- /dev/null +++ b/docs/ja/changelog/2017.md @@ -0,0 +1 @@ +../../en/changelog/2017.md \ No newline at end of file diff --git a/docs/ja/changelog/2018.md b/docs/ja/changelog/2018.md new file mode 120000 index 00000000000..20799251f43 --- /dev/null +++ b/docs/ja/changelog/2018.md @@ -0,0 +1 @@ +../../en/changelog/2018.md \ No newline at end of file diff --git a/docs/ja/changelog/2019.md b/docs/ja/changelog/2019.md new file mode 120000 index 00000000000..105ca144fca --- /dev/null +++ b/docs/ja/changelog/2019.md @@ -0,0 +1 @@ +../../en/changelog/2019.md \ No newline at end of file diff --git a/docs/ja/changelog/index.md b/docs/ja/changelog/index.md new file mode 120000 index 00000000000..79b747aee1b --- /dev/null +++ b/docs/ja/changelog/index.md @@ -0,0 +1 @@ +../../../CHANGELOG.md \ No newline at end of file diff --git a/docs/ru/changelog.md b/docs/ru/changelog.md deleted file mode 120000 index 699cc9e7b7c..00000000000 --- a/docs/ru/changelog.md +++ /dev/null @@ -1 +0,0 @@ -../../CHANGELOG.md \ No newline at end of file diff --git a/docs/ru/changelog/2017.md b/docs/ru/changelog/2017.md new file mode 120000 index 00000000000..bf4fe14279d --- /dev/null +++ b/docs/ru/changelog/2017.md @@ -0,0 +1 @@ +../../en/changelog/2017.md \ No newline at end of file diff --git a/docs/ru/changelog/2018.md b/docs/ru/changelog/2018.md new file mode 120000 index 00000000000..20799251f43 --- /dev/null +++ b/docs/ru/changelog/2018.md @@ -0,0 +1 @@ +../../en/changelog/2018.md \ No newline at end of file diff --git a/docs/ru/changelog/2019.md b/docs/ru/changelog/2019.md new file mode 120000 index 00000000000..105ca144fca --- /dev/null +++ b/docs/ru/changelog/2019.md @@ -0,0 +1 @@ +../../en/changelog/2019.md \ No newline at end of file diff --git a/docs/ru/changelog/index.md b/docs/ru/changelog/index.md new file mode 120000 index 00000000000..79b747aee1b --- /dev/null +++ b/docs/ru/changelog/index.md @@ -0,0 +1 @@ +../../../CHANGELOG.md \ No newline at end of file diff --git a/docs/toc_en.yml b/docs/toc_en.yml index 9764943ed7a..05ea0850f69 100644 --- a/docs/toc_en.yml +++ b/docs/toc_en.yml @@ -242,7 +242,11 @@ nav: - 'What''s New': - 'Roadmap': 'roadmap.md' - - 'Changelog': 'changelog.md' + - 'Changelog': + - '2020': 'changelog/index.md' + - '2019': 'changelog/2019.md' + - '2018': 'changelog/2018.md' + - '2017': 'changelog/2017.md' - 'Security Changelog': 'security_changelog.md' - 'F.A.Q.': diff --git a/docs/toc_fa.yml b/docs/toc_fa.yml index f963aa6ba3e..965f3dbf95c 100644 --- a/docs/toc_fa.yml +++ b/docs/toc_fa.yml @@ -241,5 +241,9 @@ nav: - 'What''s New': - 'Roadmap': 'roadmap.md' - - 'Changelog': 'changelog.md' + - 'Changelog': + - '2020': 'changelog/index.md' + - '2019': 'changelog/2019.md' + - '2018': 'changelog/2018.md' + - '2017': 'changelog/2017.md' - 'Security Changelog': 'security_changelog.md' diff --git a/docs/toc_ja.yml b/docs/toc_ja.yml index 5884de98a3d..c385d08a581 100644 --- a/docs/toc_ja.yml +++ b/docs/toc_ja.yml @@ -239,7 +239,11 @@ nav: - 'What''s New': - 'Roadmap': 'roadmap.md' - - 'Changelog': 'changelog.md' + - 'Changelog': + - '2020': 'changelog/index.md' + - '2019': 'changelog/2019.md' + - '2018': 'changelog/2018.md' + - '2017': 'changelog/2017.md' - 'Security Changelog': 'security_changelog.md' - 'F.A.Q.': diff --git a/docs/toc_ru.yml b/docs/toc_ru.yml index d44f7e2cb4b..4f028a4463f 100644 --- a/docs/toc_ru.yml +++ b/docs/toc_ru.yml @@ -238,7 +238,11 @@ nav: - 'Сторонние библиотеки': 'development/contrib.md' - 'Что нового': - - 'Changelog': 'changelog.md' + - 'Changelog': + - '2020': 'changelog/index.md' + - '2019': 'changelog/2019.md' + - '2018': 'changelog/2018.md' + - '2017': 'changelog/2017.md' - 'Security changelog': 'security_changelog.md' - 'Roadmap': 'roadmap.md' - 'Подробный roadmap 2020': 'extended_roadmap.md' diff --git a/docs/toc_zh.yml b/docs/toc_zh.yml index 054ee192e53..fd28d056a98 100644 --- a/docs/toc_zh.yml +++ b/docs/toc_zh.yml @@ -241,5 +241,9 @@ nav: - '新功能特性': - '路线图': 'roadmap.md' - - '更新日志': 'changelog.md' + - '更新日志': + - '2020': 'changelog/index.md' + - '2019': 'changelog/2019.md' + - '2018': 'changelog/2018.md' + - '2017': 'changelog/2017.md' - '安全更改日志': 'security_changelog.md' diff --git a/docs/zh/changelog.md b/docs/zh/changelog.md deleted file mode 120000 index 699cc9e7b7c..00000000000 --- a/docs/zh/changelog.md +++ /dev/null @@ -1 +0,0 @@ -../../CHANGELOG.md \ No newline at end of file diff --git a/docs/zh/changelog/2017.md b/docs/zh/changelog/2017.md new file mode 120000 index 00000000000..bf4fe14279d --- /dev/null +++ b/docs/zh/changelog/2017.md @@ -0,0 +1 @@ +../../en/changelog/2017.md \ No newline at end of file diff --git a/docs/zh/changelog/2018.md b/docs/zh/changelog/2018.md new file mode 120000 index 00000000000..20799251f43 --- /dev/null +++ b/docs/zh/changelog/2018.md @@ -0,0 +1 @@ +../../en/changelog/2018.md \ No newline at end of file diff --git a/docs/zh/changelog/2019.md b/docs/zh/changelog/2019.md new file mode 120000 index 00000000000..105ca144fca --- /dev/null +++ b/docs/zh/changelog/2019.md @@ -0,0 +1 @@ +../../en/changelog/2019.md \ No newline at end of file diff --git a/docs/zh/changelog/index.md b/docs/zh/changelog/index.md new file mode 120000 index 00000000000..79b747aee1b --- /dev/null +++ b/docs/zh/changelog/index.md @@ -0,0 +1 @@ +../../../CHANGELOG.md \ No newline at end of file From 458b724600000a29773a0b64a422bea2d7b3d7e9 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 17 Mar 2020 23:27:42 +0300 Subject: [PATCH 041/115] fixpu --- dbms/tests/performance/synthetic_hardware_benchmark.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/tests/performance/synthetic_hardware_benchmark.xml b/dbms/tests/performance/synthetic_hardware_benchmark.xml index 41ac7ef1dae..a56fb085141 100644 --- a/dbms/tests/performance/synthetic_hardware_benchmark.xml +++ b/dbms/tests/performance/synthetic_hardware_benchmark.xml @@ -55,8 +55,8 @@ SELECT number % 10000000 AS k, count() FROM numbers( 10000000) GROUP BY k FORMAT Null SELECT number % 10000000 AS k, count() FROM numbers_mt(80000000) GROUP BY k FORMAT Null -SELECT number % 500000000 AS k, count() FROM numbers( 100000000) GROUP BY k FORMAT Null -SELECT number % 500000000 AS k, count() FROM numbers_mt(800000000) GROUP BY k FORMAT Null +SELECT number % 100000000 AS k, count() FROM numbers( 10000000) GROUP BY k FORMAT Null +SELECT number % 100000000 AS k, count() FROM numbers_mt(80000000) GROUP BY k FORMAT Null From 67ce2fbf8ae93e79df35a1daa81d3ba5bb6a83f8 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 17 Mar 2020 23:38:32 +0300 Subject: [PATCH 042/115] Update CHANGELOG.md --- CHANGELOG.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bbddfd47917..352ceddb0b5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,12 +3,12 @@ ### ClickHouse release v20.3.3.6, 2020-03-17 ### Bug Fix -* Fixed incorrect internal function names for `sumKahan` and `sumWithOverflow`. I lead to exception while using this functions in remote queries. [#9636](https://github.com/ClickHouse/ClickHouse/pull/9636) ([Azat Khuzhin](https://github.com/azat)) -* Fixed the issue: timezone was not preserved if you write a simple arithmetic expression like `time + 1` (in contrast to an expression like `time + INTERVAL 1 SECOND`). This fixes [#5743](https://github.com/ClickHouse/ClickHouse/issues/5743). [#9323](https://github.com/ClickHouse/ClickHouse/pull/9323) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Add setting `use_compact_format_in_distributed_parts_names` which allows to write files for `INSERT` queries into `Distributed` table with more compact format. This fixes [#9647](https://github.com/ClickHouse/ClickHouse/issues/9647). [#9653](https://github.com/ClickHouse/ClickHouse/pull/9653) ([alesapin](https://github.com/alesapin)). It makes version 20.3 backward compatible again. +* Fix bug in a replication that doesn't allow replication to work if the user has executed mutations on the previous version. This fixes [#9645](https://github.com/ClickHouse/ClickHouse/issues/9645). [#9652](https://github.com/ClickHouse/ClickHouse/pull/9652) ([alesapin](https://github.com/alesapin)). It makes version 20.3 backward compatible again. +* Fixed incorrect internal function names for `sumKahan` and `sumWithOverflow`. I lead to exception while using this functions in remote queries. [#9636](https://github.com/ClickHouse/ClickHouse/pull/9636) ([Azat Khuzhin](https://github.com/azat)). This issue was in all ClickHouse releases. +* Fixed the issue: timezone was not preserved if you write a simple arithmetic expression like `time + 1` (in contrast to an expression like `time + INTERVAL 1 SECOND`). This fixes [#5743](https://github.com/ClickHouse/ClickHouse/issues/5743). [#9323](https://github.com/ClickHouse/ClickHouse/pull/9323) ([alexey-milovidov](https://github.com/alexey-milovidov)). This issue was in all ClickHouse releases. * Fix possible exceptions `Size of filter doesn't match size of column` and `Invalid number of rows in Chunk` in `MergeTreeRangeReader`. They could appear while executing `PREWHERE` in some cases. Fixes [#9132](https://github.com/ClickHouse/ClickHouse/issues/9132). [#9612](https://github.com/ClickHouse/ClickHouse/pull/9612) ([Anton Popov](https://github.com/CurtizJ)) -* Allow `ALTER ON CLUSTER` of `Distributed` tables with internal replication. This fixes [#3268](https://github.com/ClickHouse/ClickHouse/issues/3268). [#9617](https://github.com/ClickHouse/ClickHouse/pull/9617) ([shinoi2](https://github.com/shinoi2)) -* Fix bug in a replication that doesn't allow replication to work if the user has executed mutations on the previous version. This fixes [#9645](https://github.com/ClickHouse/ClickHouse/issues/9645). [#9652](https://github.com/ClickHouse/ClickHouse/pull/9652) ([alesapin](https://github.com/alesapin)) -* Add setting `use_compact_format_in_distributed_parts_names` which allows to write files for `INSERT` queries into `Distributed` table with more compact format. This fixes [#9647](https://github.com/ClickHouse/ClickHouse/issues/9647). [#9653](https://github.com/ClickHouse/ClickHouse/pull/9653) ([alesapin](https://github.com/alesapin)) +* Allow `ALTER ON CLUSTER` of `Distributed` tables with internal replication. This fixes [#3268](https://github.com/ClickHouse/ClickHouse/issues/3268). [#9617](https://github.com/ClickHouse/ClickHouse/pull/9617) ([shinoi2](https://github.com/shinoi2)). This issue was in all ClickHouse releases. ### ClickHouse release v20.3.2.1, 2020-03-12 From f0f532fd091c51b65c28bb870d7fa629330fec95 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 17 Mar 2020 23:46:43 +0300 Subject: [PATCH 043/115] Minor update on roadmap --- docs/ru/extended_roadmap.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/ru/extended_roadmap.md b/docs/ru/extended_roadmap.md index 2dc1b36fd42..f8950c238ec 100644 --- a/docs/ru/extended_roadmap.md +++ b/docs/ru/extended_roadmap.md @@ -1846,7 +1846,7 @@ Amos Bird, но его решение слишком громоздкое и п Требуется проработать вопрос безопасности и изоляции инстансов (поднятие в контейнерах с ограничениями по сети), подключение тестовых датасетов с помощью copy-on-write файловой системы; органичения ресурсов. -Есть минимальный прототип. +Есть минимальный прототип. Сделал Илья Яцишин. Этот прототип работает только с одной версией ClickHouse и не позволяет делиться ссылками на результаты запросов. ### 25.17. Взаимодействие с ВУЗами: ВШЭ, УрФУ, ICT Beijing. @@ -1886,6 +1886,6 @@ Amos Bird, но его решение слишком громоздкое и п UPD: не участвуем. -### 25.27. Обновить сайт ClickHouse. +### 25.27. + Обновить сайт ClickHouse. -Иван Блинков. Нет рисков. Нужно для Яндекс.Облака. +Иван Блинков. Нет рисков. Нужно для Яндекс.Облака. Upd. Сделано. From f64549b27a3f665b8444cc86e1282dfe9349c687 Mon Sep 17 00:00:00 2001 From: Ivan <5627721+abyss7@users.noreply.github.com> Date: Tue, 17 Mar 2020 23:56:26 +0300 Subject: [PATCH 044/115] Trigger rebuild with whitespace update --- docker/test/stateless/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 6e5870a3560..017b53036cf 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -1,4 +1,4 @@ -# docker build -t yandex/clickhouse-stateless-test . +# docker build -t yandex/clickhouse-stateless-test . FROM yandex/clickhouse-deb-builder ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.4.20200302/clickhouse-odbc-1.1.4-Linux.tar.gz" From dbc08d7644b6f7afdc91937b7f835907a603cc71 Mon Sep 17 00:00:00 2001 From: Artem Zuikov Date: Wed, 18 Mar 2020 00:56:47 +0300 Subject: [PATCH 045/115] Add DATE and TIMESTAMP operators (#9691) * DATE and TIMESTAMP operators --- dbms/src/Parsers/ExpressionListParsers.cpp | 63 +++++++++++++++++++ dbms/src/Parsers/ExpressionListParsers.h | 28 +++++++-- .../0_stateless/01053_if_chain_check.sql | 4 +- .../01062_max_parser_depth.reference | 2 +- .../0_stateless/01062_max_parser_depth.sh | 4 +- .../0_stateless/01095_tpch_like_smoke.sql | 42 ++++++------- ...099_operators_date_and_timestamp.reference | 14 +++++ .../01099_operators_date_and_timestamp.sql | 31 +++++++++ 8 files changed, 157 insertions(+), 31 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/01099_operators_date_and_timestamp.reference create mode 100644 dbms/tests/queries/0_stateless/01099_operators_date_and_timestamp.sql diff --git a/dbms/src/Parsers/ExpressionListParsers.cpp b/dbms/src/Parsers/ExpressionListParsers.cpp index 58e4a4e9319..a967ae19691 100644 --- a/dbms/src/Parsers/ExpressionListParsers.cpp +++ b/dbms/src/Parsers/ExpressionListParsers.cpp @@ -595,6 +595,69 @@ bool ParserNullityChecking::parseImpl(Pos & pos, ASTPtr & node, Expected & expec return true; } +bool ParserDateOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto begin = pos; + + /// If no DATE keyword, go to the nested parser. + if (!ParserKeyword("DATE").ignore(pos, expected)) + return next_parser.parse(pos, node, expected); + + ASTPtr expr; + if (!ParserStringLiteral().parse(pos, expr, expected)) + { + pos = begin; + return next_parser.parse(pos, node, expected); + } + + /// the function corresponding to the operator + auto function = std::make_shared(); + + /// function arguments + auto exp_list = std::make_shared(); + + /// the first argument of the function is the previous element, the second is the next one + function->name = "toDate"; + function->arguments = exp_list; + function->children.push_back(exp_list); + + exp_list->children.push_back(expr); + + node = function; + return true; +} + +bool ParserTimestampOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto begin = pos; + + /// If no TIMESTAMP keyword, go to the nested parser. + if (!ParserKeyword("TIMESTAMP").ignore(pos, expected)) + return next_parser.parse(pos, node, expected); + + ASTPtr expr; + if (!ParserStringLiteral().parse(pos, expr, expected)) + { + pos = begin; + return next_parser.parse(pos, node, expected); + } + + /// the function corresponding to the operator + auto function = std::make_shared(); + + /// function arguments + auto exp_list = std::make_shared(); + + /// the first argument of the function is the previous element, the second is the next one + function->name = "toDateTime"; + function->arguments = exp_list; + function->children.push_back(exp_list); + + exp_list->children.push_back(expr); + + node = function; + return true; +} bool ParserIntervalOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { diff --git a/dbms/src/Parsers/ExpressionListParsers.h b/dbms/src/Parsers/ExpressionListParsers.h index b0909fe8bf7..0cef29b6d67 100644 --- a/dbms/src/Parsers/ExpressionListParsers.h +++ b/dbms/src/Parsers/ExpressionListParsers.h @@ -159,7 +159,7 @@ private: ParserLeftAssociativeBinaryOperatorList operator_parser {operators, std::make_unique()}; protected: - const char * getName() const override{ return "multiplicative expression"; } + const char * getName() const override { return "multiplicative expression"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override { @@ -167,18 +167,36 @@ protected: } }; +/// DATE operator. "DATE '2001-01-01'" would be parsed as "toDate('2001-01-01')". +class ParserDateOperatorExpression : public IParserBase +{ +protected: + ParserMultiplicativeExpression next_parser; + + const char * getName() const override { return "DATE operator expression"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +/// TIMESTAMP operator. "TIMESTAMP '2001-01-01 12:34:56'" would be parsed as "toDateTime('2001-01-01 12:34:56')". +class ParserTimestampOperatorExpression : public IParserBase +{ +protected: + ParserDateOperatorExpression next_parser; + + const char * getName() const override { return "TIMESTAMP operator expression"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; /// Optional conversion to INTERVAL data type. Example: "INTERVAL x SECOND" parsed as "toIntervalSecond(x)". class ParserIntervalOperatorExpression : public IParserBase { protected: - ParserMultiplicativeExpression next_parser; + ParserTimestampOperatorExpression next_parser; - const char * getName() const override{ return "INTERVAL operator expression"; } + const char * getName() const override { return "INTERVAL operator expression"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; - class ParserAdditiveExpression : public IParserBase { private: @@ -186,7 +204,7 @@ private: ParserLeftAssociativeBinaryOperatorList operator_parser {operators, std::make_unique()}; protected: - const char * getName() const override{ return "additive expression"; } + const char * getName() const override { return "additive expression"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override { diff --git a/dbms/tests/queries/0_stateless/01053_if_chain_check.sql b/dbms/tests/queries/0_stateless/01053_if_chain_check.sql index 3a98b85c473..ee5ccf588ee 100644 --- a/dbms/tests/queries/0_stateless/01053_if_chain_check.sql +++ b/dbms/tests/queries/0_stateless/01053_if_chain_check.sql @@ -1,3 +1,3 @@ +SET max_parser_depth = 4000; SELECT x FROM (SELECT number % 16 = 0 ? nan : (number % 24 = 0 ? NULL : (number % 37 = 0 ? nan : (number % 34 = 0 ? nan : (number % 3 = 0 ? NULL : (number % 68 = 0 ? 42 : (number % 28 = 0 ? nan : (number % 46 = 0 ? nan : (number % 13 = 0 ? nan : (number % 27 = 0 ? NULL : (number % 39 = 0 ? NULL : (number % 27 = 0 ? NULL : (number % 30 = 0 ? NULL : (number % 72 = 0 ? NULL : (number % 36 = 0 ? NULL : (number % 51 = 0 ? NULL : (number % 58 = 0 ? nan : (number % 26 = 0 ? 42 : (number % 13 = 0 ? nan : (number % 12 = 0 ? NULL : (number % 22 = 0 ? nan : (number % 36 = 0 ? NULL : (number % 63 = 0 ? NULL : (number % 27 = 0 ? NULL : (number % 18 = 0 ? NULL : (number % 69 = 0 ? NULL : (number % 76 = 0 ? nan : (number % 42 = 0 ? NULL : (number % 9 = 0 ? NULL : (toFloat64(number)))))))))))))))))))))))))))))) AS x FROM system.numbers LIMIT 1001) ORDER BY x ASC NULLS FIRST; - -SELECT x FROM (SELECT number % 22 = 0 ? nan : (number % 56 = 0 ? 42 : (number % 45 = 0 ? NULL : (number % 47 = 0 ? 42 : (number % 39 = 0 ? NULL : (number % 1 = 0 ? nan : (number % 43 = 0 ? nan : (number % 40 = 0 ? nan : (number % 42 = 0 ? NULL : (number % 26 = 0 ? 42 : (number % 41 = 0 ? 42 : (number % 6 = 0 ? NULL : (number % 39 = 0 ? NULL : (number % 34 = 0 ? nan : (number % 74 = 0 ? 42 : (number % 40 = 0 ? nan : (number % 37 = 0 ? nan : (number % 51 = 0 ? NULL : (number % 46 = 0 ? nan : (toFloat64(number)))))))))))))))))))) AS x FROM system.numbers LIMIT 1001) ORDER BY x ASC NULLS FIRST; \ No newline at end of file +SELECT x FROM (SELECT number % 22 = 0 ? nan : (number % 56 = 0 ? 42 : (number % 45 = 0 ? NULL : (number % 47 = 0 ? 42 : (number % 39 = 0 ? NULL : (number % 1 = 0 ? nan : (number % 43 = 0 ? nan : (number % 40 = 0 ? nan : (number % 42 = 0 ? NULL : (number % 26 = 0 ? 42 : (number % 41 = 0 ? 42 : (number % 6 = 0 ? NULL : (number % 39 = 0 ? NULL : (number % 34 = 0 ? nan : (number % 74 = 0 ? 42 : (number % 40 = 0 ? nan : (number % 37 = 0 ? nan : (number % 51 = 0 ? NULL : (number % 46 = 0 ? nan : (toFloat64(number)))))))))))))))))))) AS x FROM system.numbers LIMIT 1001) ORDER BY x ASC NULLS FIRST; diff --git a/dbms/tests/queries/0_stateless/01062_max_parser_depth.reference b/dbms/tests/queries/0_stateless/01062_max_parser_depth.reference index 590b981971b..3efc4f06710 100644 --- a/dbms/tests/queries/0_stateless/01062_max_parser_depth.reference +++ b/dbms/tests/queries/0_stateless/01062_max_parser_depth.reference @@ -1,4 +1,4 @@ - -Maximum parse depth (40) exceeded. +Maximum parse depth (42) exceeded. - Maximum parse depth (20) exceeded. diff --git a/dbms/tests/queries/0_stateless/01062_max_parser_depth.sh b/dbms/tests/queries/0_stateless/01062_max_parser_depth.sh index 17816db4758..ac2ce338366 100755 --- a/dbms/tests/queries/0_stateless/01062_max_parser_depth.sh +++ b/dbms/tests/queries/0_stateless/01062_max_parser_depth.sh @@ -3,8 +3,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -echo 'select 1' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}&max_parser_depth=40" -d @- 2>&1 | grep -oP "Maximum parse depth .* exceeded." +echo 'select 1' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}&max_parser_depth=42" -d @- 2>&1 | grep -oP "Maximum parse depth .* exceeded." echo - -echo 'select (1+1)*(2+1)' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}&max_parser_depth=40" -d @- 2>&1 | grep -oP "Maximum parse depth .* exceeded." +echo 'select (1+1)*(2+1)' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}&max_parser_depth=42" -d @- 2>&1 | grep -oP "Maximum parse depth .* exceeded." echo - echo 'select 1' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}&max_parser_depth=20" -d @- 2>&1 | grep -oP "Maximum parse depth .* exceeded." diff --git a/dbms/tests/queries/0_stateless/01095_tpch_like_smoke.sql b/dbms/tests/queries/0_stateless/01095_tpch_like_smoke.sql index ae43a9cfc28..074889d45ae 100644 --- a/dbms/tests/queries/0_stateless/01095_tpch_like_smoke.sql +++ b/dbms/tests/queries/0_stateless/01095_tpch_like_smoke.sql @@ -129,7 +129,7 @@ select from lineitem where - l_shipdate <= toDate('1998-12-01') - interval 90 day + l_shipdate <= date '1998-12-01' - interval 90 day group by l_returnflag, l_linestatus @@ -197,8 +197,8 @@ where c_mktsegment = 'BUILDING' and c_custkey = o_custkey and l_orderkey = o_orderkey - and o_orderdate < toDate('1995-03-15') - and l_shipdate > toDate('1995-03-15') + and o_orderdate < date '1995-03-15' + and l_shipdate > date '1995-03-15' group by l_orderkey, o_orderdate, @@ -215,8 +215,8 @@ limit 10; -- from -- orders -- where --- o_orderdate >= toDate('1993-07-01') --- and o_orderdate < toDate('1993-07-01') + interval '3' month +-- o_orderdate >= date '1993-07-01' +-- and o_orderdate < date '1993-07-01' + interval '3' month -- and exists ( -- select -- * @@ -250,8 +250,8 @@ where and s_nationkey = n_nationkey and n_regionkey = r_regionkey and r_name = 'ASIA' - and o_orderdate >= toDate('1994-01-01') - and o_orderdate < toDate('1994-01-01') + interval '1' year + and o_orderdate >= date '1994-01-01' + and o_orderdate < date '1994-01-01' + interval '1' year group by n_name order by @@ -263,8 +263,8 @@ select from lineitem where - l_shipdate >= toDate('1994-01-01') - and l_shipdate < toDate('1994-01-01') + interval '1' year + l_shipdate >= date '1994-01-01' + and l_shipdate < date '1994-01-01' + interval '1' year and l_discount between toDecimal32(0.06, 2) - toDecimal32(0.01, 2) and toDecimal32(0.06, 2) + toDecimal32(0.01, 2) and l_quantity < 24; @@ -299,7 +299,7 @@ where -- (n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY') -- or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE') -- ) --- and l_shipdate between toDate('1995-01-01') and toDate('1996-12-31') +-- and l_shipdate between date '1995-01-01' and date '1996-12-31' -- ) as shipping -- group by -- supp_nation, @@ -341,7 +341,7 @@ where -- and n1.n_regionkey = r_regionkey -- and r_name = 'AMERICA' -- and s_nationkey = n2.n_nationkey --- and o_orderdate between toDate('1995-01-01') and toDate('1996-12-31') +-- and o_orderdate between date '1995-01-01' and date '1996-12-31' -- and p_type = 'ECONOMY ANODIZED STEEL' -- ) as all_nations -- group by @@ -401,8 +401,8 @@ from where c_custkey = o_custkey and l_orderkey = o_orderkey - and o_orderdate >= toDate('1993-10-01') - and o_orderdate < toDate('1993-10-01') + interval '3' month + and o_orderdate >= date '1993-10-01' + and o_orderdate < date '1993-10-01' + interval '3' month and l_returnflag = 'R' and c_nationkey = n_nationkey group by @@ -472,8 +472,8 @@ where and l_shipmode in ('MAIL', 'SHIP') and l_commitdate < l_receiptdate and l_shipdate < l_commitdate - and l_receiptdate >= toDate('1994-01-01') - and l_receiptdate < toDate('1994-01-01') + interval '1' year + and l_receiptdate >= date '1994-01-01' + and l_receiptdate < date '1994-01-01' + interval '1' year group by l_shipmode order by @@ -513,8 +513,8 @@ from part where l_partkey = p_partkey - and l_shipdate >= toDate('1995-09-01') - and l_shipdate < toDate('1995-09-01') + interval '1' month; + and l_shipdate >= date '1995-09-01' + and l_shipdate < date '1995-09-01' + interval '1' month; -- select 15; -- create view revenue0 as @@ -524,8 +524,8 @@ where -- from -- lineitem -- where --- l_shipdate >= toDate('1996-01-01') --- and l_shipdate < toDate('1996-01-01') + interval '3' month +-- l_shipdate >= date '1996-01-01' +-- and l_shipdate < date '1996-01-01' + interval '3' month -- group by -- l_suppkey; -- select @@ -702,8 +702,8 @@ where -- where -- l_partkey = ps_partkey -- and l_suppkey = ps_suppkey --- and l_shipdate >= toDate('1994-01-01') --- and l_shipdate < toDate('1994-01-01') + interval '1' year +-- and l_shipdate >= date '1994-01-01' +-- and l_shipdate < date '1994-01-01' + interval '1' year -- ) -- ) -- and s_nationkey = n_nationkey diff --git a/dbms/tests/queries/0_stateless/01099_operators_date_and_timestamp.reference b/dbms/tests/queries/0_stateless/01099_operators_date_and_timestamp.reference new file mode 100644 index 00000000000..103053a19f0 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01099_operators_date_and_timestamp.reference @@ -0,0 +1,14 @@ +1 1 1 +1 1 1 +1 1 +2001-09-28 +2001-10-05 Date +2001-09-24 Date +2001-10-05 Date +2001-09-24 Date +2001-09-28 01:00:00 DateTime +2001-09-27 23:00:00 DateTime +3 Int32 +2001-09-29 00:00:00 +2001-09-28 00:00:00 +140400 Int32 diff --git a/dbms/tests/queries/0_stateless/01099_operators_date_and_timestamp.sql b/dbms/tests/queries/0_stateless/01099_operators_date_and_timestamp.sql new file mode 100644 index 00000000000..f52d2b774c1 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01099_operators_date_and_timestamp.sql @@ -0,0 +1,31 @@ +select interval 1 second, interval 1 minute, interval 1 hour; +select interval 1 day, interval 1 week, interval 1 month; +select interval 1 quarter, interval 1 year; + +select date '2001-09-28'; +select (date '2001-09-28' + interval 7 day) x, toTypeName(x); +select (date '2001-10-01' - interval 7 day) x, toTypeName(x); +select (date '2001-09-28' + 7) x, toTypeName(x); +select (date '2001-10-01' - 7) x, toTypeName(x); +select (date '2001-09-28' + interval 1 hour) x, toTypeName(x); +select (date '2001-09-28' - interval 1 hour) x, toTypeName(x); +select (date '2001-10-01' - date '2001-09-28') x, toTypeName(x); +select timestamp '2001-09-28 01:00:00' + interval 23 hour; +select timestamp '2001-09-28 23:00:00' - interval 23 hour; + +-- TODO: return interval +select (timestamp '2001-09-29 03:00:00' - timestamp '2001-09-27 12:00:00') x, toTypeName(x); -- interval '1 day 15:00:00' + +-- select -interval 23 hour; -- interval '-23:00:00' +-- select interval 1 day + interval 1 hour; -- interval '1 day 01:00:00' +-- select interval '1 day' - interval '1 hour'; -- interval '1 day -01:00:00' + +-- select date '2001-09-28' + time '03:00'; -- timestamp '2001-09-28 03:00:00' +-- select time '01:00' + interval '3 hours'; -- time '04:00:00' +-- select time '05:00' - time '03:00'; -- interval '02:00:00' +-- select time '05:00' - interval '2 hours'; -- time '03:00:00' + +-- select 900 * interval '1 second'; -- interval '00:15:00' +-- select (21 * interval '1 day') x, toTypeName(x); -- interval '21 days' +-- select (double precision '3.5' * interval '1 hour') x, toTypeName(x); -- interval '03:30:00' +-- select (interval '1 hour' / double precision '1.5') x, toTypeName(x); -- interval '00:40:00' From 3ec3732d993a80c0965f460470b30eecc0688f15 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 18 Mar 2020 03:15:05 +0300 Subject: [PATCH 046/115] Update synthetic_hardware_benchmark.xml --- dbms/tests/performance/synthetic_hardware_benchmark.xml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dbms/tests/performance/synthetic_hardware_benchmark.xml b/dbms/tests/performance/synthetic_hardware_benchmark.xml index a56fb085141..cb118ca0f5a 100644 --- a/dbms/tests/performance/synthetic_hardware_benchmark.xml +++ b/dbms/tests/performance/synthetic_hardware_benchmark.xml @@ -55,8 +55,10 @@ SELECT number % 10000000 AS k, count() FROM numbers( 10000000) GROUP BY k FORMAT Null SELECT number % 10000000 AS k, count() FROM numbers_mt(80000000) GROUP BY k FORMAT Null -SELECT number % 100000000 AS k, count() FROM numbers( 10000000) GROUP BY k FORMAT Null -SELECT number % 100000000 AS k, count() FROM numbers_mt(80000000) GROUP BY k FORMAT Null + +SELECT number % toUInt32(1e8) AS k, count() FROM numbers_mt(toUInt32(4e8)) GROUP BY k FORMAT Null From 2510aed7b5da13c94e91ffcc0404a8ce4db20642 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Wed, 18 Mar 2020 03:33:32 +0300 Subject: [PATCH 047/115] More stable perf tests --- dbms/tests/performance/arithmetic.xml | 11 ++++++++++- dbms/tests/performance/general_purpose_hashes.xml | 8 ++++---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/dbms/tests/performance/arithmetic.xml b/dbms/tests/performance/arithmetic.xml index a2e7c654fc8..30b7707a2f2 100644 --- a/dbms/tests/performance/arithmetic.xml +++ b/dbms/tests/performance/arithmetic.xml @@ -62,7 +62,16 @@ ; - SELECT count() FROM nums WHERE NOT ignore({op}({arg}, {arg})) + SELECT count() FROM nums WHERE NOT ignore( + identity({op}({arg}, {arg})), + identity({op}({arg}, {arg})), + identity({op}({arg}, {arg})), + identity({op}({arg}, {arg})), + identity({op}({arg}, {arg})), + identity({op}({arg}, {arg})), + identity({op}({arg}, {arg})), + identity({op}({arg}, {arg})) + ) DROP TABLE nums diff --git a/dbms/tests/performance/general_purpose_hashes.xml b/dbms/tests/performance/general_purpose_hashes.xml index 4746271d664..458e646f3a7 100644 --- a/dbms/tests/performance/general_purpose_hashes.xml +++ b/dbms/tests/performance/general_purpose_hashes.xml @@ -47,15 +47,15 @@ table_slow - zeros(100000) - zeros_mt(1000000) + zeros(1000000) + zeros_mt(10000000) table - numbers(10000000) - numbers_mt(100000000) + numbers(100000000) + numbers_mt(1000000000) From 1bd7e594b00dc3a93399d04ca1994613fa354e33 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 18 Mar 2020 03:57:00 +0300 Subject: [PATCH 048/115] clang-tidy, part 2 --- .clang-tidy | 63 +++++- base/daemon/BaseDaemon.cpp | 44 +++-- base/daemon/BaseDaemon.h | 8 +- dbms/programs/client/Client.cpp | 6 +- dbms/programs/copier/ClusterCopier.cpp | 5 +- dbms/programs/copier/ClusterCopier.h | 5 - dbms/programs/local/LocalServer.cpp | 32 +-- dbms/programs/local/LocalServer.h | 4 - dbms/programs/obfuscator/Obfuscator.cpp | 6 +- .../performance-test/ConfigPreprocessor.cpp | 2 +- .../performance-test/ConfigPreprocessor.h | 4 +- .../performance-test/ReportBuilder.cpp | 4 +- .../programs/performance-test/ReportBuilder.h | 10 +- dbms/programs/server/HTTPHandler.h | 2 +- dbms/src/Access/AccessRights.cpp | 5 +- dbms/src/Access/IAccessStorage.cpp | 2 +- dbms/src/Access/IAccessStorage.h | 5 +- dbms/src/Columns/ColumnAggregateFunction.cpp | 5 +- dbms/src/Columns/ColumnLowCardinality.cpp | 13 +- dbms/src/Columns/ColumnLowCardinality.h | 2 - dbms/src/Common/Config/ConfigProcessor.cpp | 2 +- dbms/src/Common/Config/ConfigProcessor.h | 6 +- dbms/src/Common/UTF8Helpers.cpp | 5 - .../tests/gtest_compressionCodec.cpp | 4 +- dbms/src/Core/SettingsCollection.cpp | 2 +- dbms/src/Core/SettingsCollection.h | 2 +- .../AddingDefaultsBlockInputStream.cpp | 186 +++++++++--------- .../AddingDefaultsBlockInputStream.h | 6 - .../SummingSortedBlockInputStream.cpp | 166 ++++++++-------- .../SummingSortedBlockInputStream.h | 17 +- .../Embedded/GeoDictionariesLoader.h | 5 +- dbms/src/Dictionaries/TrieDictionary.cpp | 23 +-- dbms/src/Dictionaries/TrieDictionary.h | 2 - dbms/src/Disks/DiskS3.cpp | 17 +- dbms/src/Disks/DiskS3.h | 2 - dbms/src/Functions/URL/URLHierarchy.cpp | 2 +- dbms/src/Functions/URL/URLPathHierarchy.cpp | 2 +- .../URL/extractURLParameterNames.cpp | 2 +- .../Functions/URL/extractURLParameters.cpp | 2 +- dbms/src/Functions/array/arrayDistinct.cpp | 6 +- dbms/src/Functions/array/arrayElement.cpp | 14 +- dbms/src/Functions/array/arrayReverse.cpp | 8 +- dbms/src/Functions/blockSerializedSize.cpp | 2 +- dbms/src/Functions/convertCharset.cpp | 4 +- dbms/src/Functions/if.cpp | 4 +- .../Functions/reinterpretAsFixedString.cpp | 4 +- dbms/src/Functions/reinterpretAsString.cpp | 2 +- dbms/src/Functions/transform.cpp | 2 +- dbms/src/IO/S3Common.cpp | 2 +- dbms/src/IO/WriteBufferValidUTF8.cpp | 2 +- dbms/src/IO/parseDateTimeBestEffort.cpp | 4 +- dbms/src/Interpreters/Aggregator.cpp | 2 +- dbms/src/Interpreters/Aggregator.h | 4 +- dbms/src/Interpreters/CatBoostModel.cpp | 10 +- dbms/src/Interpreters/DDLWorker.cpp | 2 +- dbms/src/Interpreters/DDLWorker.h | 4 +- dbms/src/Interpreters/ExternalLoader.cpp | 2 +- .../src/Interpreters/InterpreterDropQuery.cpp | 11 +- dbms/src/Interpreters/InterpreterDropQuery.h | 2 - .../src/Interpreters/InterpreterSelectQuery.h | 8 +- .../Interpreters/InterpreterWatchQuery.cpp | 4 - dbms/src/Interpreters/InterpreterWatchQuery.h | 1 - .../LogicalExpressionsOptimizer.h | 2 +- dbms/src/Interpreters/SelectQueryOptions.h | 4 +- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 2 +- dbms/src/Interpreters/SyntaxAnalyzer.h | 2 +- .../TranslateQualifiedNamesVisitor.h | 2 +- .../Interpreters/tests/hash_map_string.cpp | 2 +- dbms/src/Parsers/ASTWithAlias.cpp | 6 +- dbms/src/Parsers/ASTWithAlias.h | 2 - dbms/src/Parsers/ParserDropQuery.cpp | 83 ++++---- dbms/src/Parsers/ParserDropQuery.h | 4 - .../Impl/ConstantExpressionTemplate.cpp | 2 +- .../RowInputFormatWithDiagnosticInfo.cpp | 19 +- .../RowInputFormatWithDiagnosticInfo.h | 1 - dbms/src/Processors/QueryPipeline.h | 2 +- dbms/src/Processors/ResizeProcessor.cpp | 2 +- dbms/src/Storages/MergeTree/KeyCondition.h | 2 +- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 27 ++- dbms/src/Storages/MergeTree/MergeTreeData.h | 5 +- .../MergeTree/MergeTreeDataMergerMutator.cpp | 10 +- .../MergeTree/MergeTreeDataMergerMutator.h | 10 +- .../MergeTreeIndexGranuleBloomFilter.cpp | 29 +-- .../MergeTreeIndexGranuleBloomFilter.h | 2 - .../Storages/MergeTree/MergeTreeIndexSet.cpp | 2 +- .../Storages/MergeTree/MergeTreeIndexSet.h | 2 +- .../MergeTree/MergeTreeRangeReader.cpp | 35 ++-- .../Storages/MergeTree/MergeTreeRangeReader.h | 1 - .../MergeTree/MergeTreeWhereOptimizer.cpp | 100 +++++----- .../MergeTree/MergeTreeWhereOptimizer.h | 4 +- dbms/src/Storages/StorageDistributed.cpp | 13 +- dbms/src/Storages/StorageDistributed.h | 1 - dbms/src/Storages/StorageMySQL.cpp | 3 +- dbms/src/Storages/StorageView.cpp | 44 ++--- dbms/src/Storages/StorageView.h | 2 - .../Storages/System/StorageSystemGraphite.cpp | 8 +- .../Storages/System/StorageSystemGraphite.h | 1 - .../Storages/System/StorageSystemNumbers.cpp | 2 +- dbms/src/TableFunctions/TableFunctionS3.cpp | 14 +- dbms/src/TableFunctions/TableFunctionS3.h | 4 +- 100 files changed, 633 insertions(+), 607 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index a00642e87b7..49773ad31c9 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -1,2 +1,63 @@ -Checks: '-*,google-readability-avoid-underscore-in-googletest-name,misc-throw-by-value-catch-by-reference,misc-misplaced-const,misc-unconventional-assign-operator,modernize-avoid-bind,modernize-loop-convert,modernize-make-shared,modernize-make-unique,modernize-raw-string-literal,modernize-redundant-void-arg,modernize-replace-auto-ptr,modernize-replace-random-shuffle,modernize-use-bool-literals,modernize-use-nullptr,modernize-use-using,performance-faster-string-find,performance-for-range-copy,readability-avoid-const-params-in-decls,readability-const-return-type,readability-container-size-empty,readability-convert-member-functions-to-static,readability-delete-null-pointer,readability-deleted-default,readability-make-member-function-const,readability-misplaced-array-index,readability-non-const-parameter,readability-qualified-auto,readability-redundant-access-specifiers,readability-redundant-control-flow,readability-redundant-function-ptr-dereference,readability-redundant-smartptr-get,readability-redundant-string-cstr,readability-redundant-string-init,readability-static-definition-in-anonymous-namespace,readability-string-compare,readability-uniqueptr-delete-release,modernize-use-equals-default,modernize-use-equals-delete,bugprone-undelegated-constructor,readability-redundant-member-init,readability-simplify-subscript-expr,readability-simplify-boolean-expr,readability-inconsistent-declaration-parameter-name' +Checks: '-*, + google-readability-avoid-underscore-in-googletest-name, + + misc-throw-by-value-catch-by-reference, + misc-misplaced-const, + misc-unconventional-assign-operator, + + modernize-avoid-bind, + modernize-loop-convert, + modernize-make-shared, + modernize-make-unique, + modernize-raw-string-literal, + modernize-redundant-void-arg, + modernize-replace-auto-ptr, + modernize-replace-random-shuffle, + modernize-use-bool-literals, + modernize-use-nullptr, + modernize-use-using, + modernize-use-equals-default, + modernize-use-equals-delete, + + performance-faster-string-find, + performance-for-range-copy, + + readability-avoid-const-params-in-decls, + readability-const-return-type, + readability-container-size-empty, + readability-convert-member-functions-to-static, + readability-delete-null-pointer, + readability-deleted-default, + readability-make-member-function-const, + readability-misplaced-array-index, + readability-non-const-parameter, + readability-qualified-auto, + readability-redundant-access-specifiers, + readability-redundant-control-flow, + readability-redundant-function-ptr-dereference, + readability-redundant-smartptr-get, + readability-redundant-string-cstr, + readability-redundant-string-init, + readability-static-definition-in-anonymous-namespace, + readability-string-compare, + readability-uniqueptr-delete-release, + readability-redundant-member-init, + readability-simplify-subscript-expr, + readability-simplify-boolean-expr, + readability-inconsistent-declaration-parameter-name, + + bugprone-undelegated-constructor, + bugprone-argument-comment, + bugprone-bad-signal-to-kill-thread, + bugprone-bool-pointer-implicit-conversion, + bugprone-copy-constructor-init, + bugprone-dangling-handle, + bugprone-forward-declaration-namespace, + bugprone-fold-init-type, + bugprone-inaccurate-erase, + bugprone-incorrect-roundings, + bugprone-infinite-loop, + + boost-use-to-string, +' WarningsAsErrors: '*' diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index 71a11964481..41c705f41c2 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -362,19 +362,8 @@ void BaseDaemon::reloadConfiguration() } -BaseDaemon::BaseDaemon() +namespace { - checkRequiredInstructions(); -} - - -BaseDaemon::~BaseDaemon() -{ - writeSignalIDtoSignalPipe(SignalListener::StopThread); - signal_listener_thread.join(); - signal_pipe.close(); -} - enum class InstructionFail { @@ -388,7 +377,7 @@ enum class InstructionFail AVX512 = 7 }; -static std::string instructionFailToString(InstructionFail fail) +std::string instructionFailToString(InstructionFail fail) { switch (fail) { @@ -413,16 +402,16 @@ static std::string instructionFailToString(InstructionFail fail) } -static sigjmp_buf jmpbuf; +sigjmp_buf jmpbuf; -static void sigIllCheckHandler(int sig, siginfo_t * info, void * context) +void sigIllCheckHandler(int sig, siginfo_t * info, void * context) { siglongjmp(jmpbuf, 1); } /// Check if necessary sse extensions are available by trying to execute some sse instructions. /// If instruction is unavailable, SIGILL will be sent by kernel. -static void checkRequiredInstructions(volatile InstructionFail & fail) +void checkRequiredInstructionsImpl(volatile InstructionFail & fail) { #if __SSE3__ fail = InstructionFail::SSE3; @@ -463,8 +452,9 @@ static void checkRequiredInstructions(volatile InstructionFail & fail) fail = InstructionFail::NONE; } - -void BaseDaemon::checkRequiredInstructions() +/// Check SSE and others instructions availability +/// Calls exit on fail +void checkRequiredInstructions() { struct sigaction sa{}; struct sigaction sa_old{}; @@ -487,7 +477,7 @@ void BaseDaemon::checkRequiredInstructions() exit(1); } - ::checkRequiredInstructions(fail); + checkRequiredInstructionsImpl(fail); if (sigaction(signal, &sa_old, nullptr)) { @@ -496,6 +486,22 @@ void BaseDaemon::checkRequiredInstructions() } } +} + + +BaseDaemon::BaseDaemon() +{ + checkRequiredInstructions(); +} + + +BaseDaemon::~BaseDaemon() +{ + writeSignalIDtoSignalPipe(SignalListener::StopThread); + signal_listener_thread.join(); + signal_pipe.close(); +} + void BaseDaemon::terminate() { diff --git a/base/daemon/BaseDaemon.h b/base/daemon/BaseDaemon.h index b7070c76e9b..881c711d1df 100644 --- a/base/daemon/BaseDaemon.h +++ b/base/daemon/BaseDaemon.h @@ -128,7 +128,7 @@ public: /// close all process FDs except /// 0-2 -- stdin, stdout, stderr /// also doesn't close global internal pipes for signal handling - void closeFDs(); + static void closeFDs(); protected: /// Возвращает TaskManager приложения @@ -198,12 +198,6 @@ protected: std::string config_path; DB::ConfigProcessor::LoadedConfig loaded_config; Poco::Util::AbstractConfiguration * last_configuration = nullptr; - -private: - - /// Check SSE and others instructions availability - /// Calls exit on fail - void checkRequiredInstructions(); }; diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 4885b08ad91..3c6a9d4bc40 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -281,7 +281,7 @@ private: } /// Should we celebrate a bit? - bool isNewYearMode() + static bool isNewYearMode() { time_t current_time = time(nullptr); @@ -294,7 +294,7 @@ private: || (now.month() == 1 && now.day() <= 5); } - bool isChineseNewYearMode(const String & local_tz) + static bool isChineseNewYearMode(const String & local_tz) { /// Days of Dec. 20 in Chinese calendar starting from year 2019 to year 2105 static constexpr UInt16 chineseNewYearIndicators[] @@ -1594,7 +1594,7 @@ private: std::cout << "Ok." << std::endl; } - void showClientVersion() + static void showClientVersion() { std::cout << DBMS_NAME << " client version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl; } diff --git a/dbms/programs/copier/ClusterCopier.cpp b/dbms/programs/copier/ClusterCopier.cpp index 35c112da4d1..d59a895af41 100644 --- a/dbms/programs/copier/ClusterCopier.cpp +++ b/dbms/programs/copier/ClusterCopier.cpp @@ -499,7 +499,10 @@ ASTPtr ClusterCopier::removeAliasColumnsFromCreateQuery(const ASTPtr & query_ast return new_query_ast; } -std::shared_ptr ClusterCopier::rewriteCreateQueryStorage(const ASTPtr & create_query_ast, const DatabaseAndTableName & new_table, const ASTPtr & new_storage_ast) + +/// Replaces ENGINE and table name in a create query +static std::shared_ptr rewriteCreateQueryStorage( + const ASTPtr & create_query_ast, const DatabaseAndTableName & new_table, const ASTPtr & new_storage_ast) { const auto & create = create_query_ast->as(); auto res = std::make_shared(create); diff --git a/dbms/programs/copier/ClusterCopier.h b/dbms/programs/copier/ClusterCopier.h index 90a003a2528..cdb06185992 100644 --- a/dbms/programs/copier/ClusterCopier.h +++ b/dbms/programs/copier/ClusterCopier.h @@ -88,11 +88,6 @@ protected: /// Removes MATERIALIZED and ALIAS columns from create table query static ASTPtr removeAliasColumnsFromCreateQuery(const ASTPtr &query_ast); - /// Replaces ENGINE and table name in a create query - std::shared_ptr - rewriteCreateQueryStorage(const ASTPtr & create_query_ast, const DatabaseAndTableName & new_table, - const ASTPtr & new_storage_ast); - bool tryDropPartition(ShardPartition & task_partition, const zkutil::ZooKeeperPtr & zookeeper, const CleanStateClock & clean_state_clock); diff --git a/dbms/programs/local/LocalServer.cpp b/dbms/programs/local/LocalServer.cpp index eded06416f7..2d93c792350 100644 --- a/dbms/programs/local/LocalServer.cpp +++ b/dbms/programs/local/LocalServer.cpp @@ -118,6 +118,20 @@ void LocalServer::tryInitPath() } +static void attachSystemTables() +{ + DatabasePtr system_database = DatabaseCatalog::instance().tryGetDatabase(DatabaseCatalog::SYSTEM_DATABASE); + if (!system_database) + { + /// TODO: add attachTableDelayed into DatabaseMemory to speedup loading + system_database = std::make_shared(DatabaseCatalog::SYSTEM_DATABASE); + DatabaseCatalog::instance().attachDatabase(DatabaseCatalog::SYSTEM_DATABASE, system_database); + } + + attachSystemTablesLocal(*system_database); +} + + int LocalServer::main(const std::vector & /*args*/) try { @@ -248,20 +262,6 @@ std::string LocalServer::getInitialCreateTableQuery() } -void LocalServer::attachSystemTables() -{ - DatabasePtr system_database = DatabaseCatalog::instance().tryGetDatabase(DatabaseCatalog::SYSTEM_DATABASE); - if (!system_database) - { - /// TODO: add attachTableDelayed into DatabaseMemory to speedup loading - system_database = std::make_shared(DatabaseCatalog::SYSTEM_DATABASE); - DatabaseCatalog::instance().attachDatabase(DatabaseCatalog::SYSTEM_DATABASE, system_database); - } - - attachSystemTablesLocal(*system_database); -} - - void LocalServer::processQueries() { String initial_create_query = getInitialCreateTableQuery(); @@ -375,7 +375,7 @@ static void showClientVersion() std::cout << DBMS_NAME << " client version " << VERSION_STRING << VERSION_OFFICIAL << "." << '\n'; } -std::string LocalServer::getHelpHeader() const +static std::string getHelpHeader() { return "usage: clickhouse-local [initial table definition] [--query ]\n" @@ -390,7 +390,7 @@ std::string LocalServer::getHelpHeader() const "Either through corresponding command line parameters --table --structure --input-format and --file."; } -std::string LocalServer::getHelpFooter() const +static std::string getHelpFooter() { return "Example printing memory used by each Unix user:\n" diff --git a/dbms/programs/local/LocalServer.h b/dbms/programs/local/LocalServer.h index a79ab484107..874319c1ea5 100644 --- a/dbms/programs/local/LocalServer.h +++ b/dbms/programs/local/LocalServer.h @@ -37,13 +37,9 @@ private: void tryInitPath(); void applyCmdOptions(); void applyCmdSettings(); - void attachSystemTables(); void processQueries(); void setupUsers(); - std::string getHelpHeader() const; - std::string getHelpFooter() const; - protected: std::unique_ptr context; diff --git a/dbms/programs/obfuscator/Obfuscator.cpp b/dbms/programs/obfuscator/Obfuscator.cpp index 7251c0a4473..b67041f44d6 100644 --- a/dbms/programs/obfuscator/Obfuscator.cpp +++ b/dbms/programs/obfuscator/Obfuscator.cpp @@ -529,13 +529,13 @@ private: static constexpr CodePoint END = -2; - NGramHash hashContext(const CodePoint * begin, const CodePoint * end) const + static NGramHash hashContext(const CodePoint * begin, const CodePoint * end) { return CRC32Hash()(StringRef(reinterpret_cast(begin), (end - begin) * sizeof(CodePoint))); } /// By the way, we don't have to use actual Unicode numbers. We use just arbitrary bijective mapping. - CodePoint readCodePoint(const char *& pos, const char * end) + static CodePoint readCodePoint(const char *& pos, const char * end) { size_t length = UTF8::seqLength(*pos); @@ -550,7 +550,7 @@ private: return res; } - bool writeCodePoint(CodePoint code, char *& pos, const char * end) + static bool writeCodePoint(CodePoint code, char *& pos, const char * end) { size_t length = (code & 0xFF000000) ? 4 diff --git a/dbms/programs/performance-test/ConfigPreprocessor.cpp b/dbms/programs/performance-test/ConfigPreprocessor.cpp index 850fd9f14c6..471a796e9f3 100644 --- a/dbms/programs/performance-test/ConfigPreprocessor.cpp +++ b/dbms/programs/performance-test/ConfigPreprocessor.cpp @@ -41,7 +41,7 @@ void ConfigPreprocessor::removeConfigurationsIf( std::vector & configs, ConfigPreprocessor::FilterType filter_type, const Strings & values, - bool leave) const + bool leave) { auto checker = [&filter_type, &values, &leave] (XMLConfigurationPtr & config) { diff --git a/dbms/programs/performance-test/ConfigPreprocessor.h b/dbms/programs/performance-test/ConfigPreprocessor.h index 375bf9503cb..dac59405799 100644 --- a/dbms/programs/performance-test/ConfigPreprocessor.h +++ b/dbms/programs/performance-test/ConfigPreprocessor.h @@ -39,11 +39,11 @@ private: /// Removes configurations that has a given value. /// If leave is true, the logic is reversed. - void removeConfigurationsIf( + static void removeConfigurationsIf( std::vector & configs, FilterType filter_type, const Strings & values, - bool leave = false) const; + bool leave = false); const Strings paths; }; diff --git a/dbms/programs/performance-test/ReportBuilder.cpp b/dbms/programs/performance-test/ReportBuilder.cpp index ef4417f1713..0bb4f3fdb6f 100644 --- a/dbms/programs/performance-test/ReportBuilder.cpp +++ b/dbms/programs/performance-test/ReportBuilder.cpp @@ -40,7 +40,7 @@ ReportBuilder::ReportBuilder(const std::string & server_version_) { } -std::string ReportBuilder::getCurrentTime() const +static std::string getCurrentTime() { return DateLUT::instance().timeToString(time(nullptr)); } @@ -163,7 +163,7 @@ std::string ReportBuilder::buildFullReport( std::string ReportBuilder::buildCompactReport( const PerformanceTestInfo & test_info, std::vector & stats, - const std::vector & queries_to_run) const + const std::vector & queries_to_run) { FormatSettings settings; std::ostringstream output; diff --git a/dbms/programs/performance-test/ReportBuilder.h b/dbms/programs/performance-test/ReportBuilder.h index 473ba42b728..531680a6456 100644 --- a/dbms/programs/performance-test/ReportBuilder.h +++ b/dbms/programs/performance-test/ReportBuilder.h @@ -10,16 +10,16 @@ class ReportBuilder { public: ReportBuilder(const std::string & server_version_); + std::string buildFullReport( const PerformanceTestInfo & test_info, std::vector & stats, const std::vector & queries_to_run) const; - - std::string buildCompactReport( + static std::string buildCompactReport( const PerformanceTestInfo & test_info, std::vector & stats, - const std::vector & queries_to_run) const; + const std::vector & queries_to_run); private: std::string server_version; @@ -27,10 +27,6 @@ private: size_t num_cores; size_t num_threads; size_t ram; - -private: - std::string getCurrentTime() const; - }; } diff --git a/dbms/programs/server/HTTPHandler.h b/dbms/programs/server/HTTPHandler.h index fb6c9fb532c..681a3cce932 100644 --- a/dbms/programs/server/HTTPHandler.h +++ b/dbms/programs/server/HTTPHandler.h @@ -77,7 +77,7 @@ private: Poco::Net::HTTPServerResponse & response, Output & used_output); - void pushDelayedResults(Output & used_output); + static void pushDelayedResults(Output & used_output); }; } diff --git a/dbms/src/Access/AccessRights.cpp b/dbms/src/Access/AccessRights.cpp index 5b66538e876..4f92d8c31c9 100644 --- a/dbms/src/Access/AccessRights.cpp +++ b/dbms/src/Access/AccessRights.cpp @@ -158,7 +158,7 @@ public: void revoke(const AccessFlags & access_to_revoke, const Helper & helper) { if constexpr (mode == NORMAL_REVOKE_MODE) - { + { // NOLINT if (level == TABLE_LEVEL) removeExplicitGrantsRec(access_to_revoke); else @@ -166,11 +166,12 @@ public: } else if constexpr (mode == PARTIAL_REVOKE_MODE) { - AccessFlags new_partial_revokes = access_to_revoke - explicit_grants; if (level == TABLE_LEVEL) removeExplicitGrantsRec(access_to_revoke); else removeExplicitGrants(access_to_revoke); + + AccessFlags new_partial_revokes = access_to_revoke - explicit_grants; removePartialRevokesRec(new_partial_revokes); partial_revokes |= new_partial_revokes; } diff --git a/dbms/src/Access/IAccessStorage.cpp b/dbms/src/Access/IAccessStorage.cpp index 236f1325beb..3dfc3e232ba 100644 --- a/dbms/src/Access/IAccessStorage.cpp +++ b/dbms/src/Access/IAccessStorage.cpp @@ -379,7 +379,7 @@ void IAccessStorage::throwNotFound(std::type_index type, const String & name) co } -void IAccessStorage::throwBadCast(const UUID & id, std::type_index type, const String & name, std::type_index required_type) const +void IAccessStorage::throwBadCast(const UUID & id, std::type_index type, const String & name, std::type_index required_type) { throw Exception( "ID {" + toString(id) + "}: " + getTypeName(type) + backQuote(name) + " expected to be of type " + getTypeName(required_type), diff --git a/dbms/src/Access/IAccessStorage.h b/dbms/src/Access/IAccessStorage.h index 505c635aac8..30a1a6bdc32 100644 --- a/dbms/src/Access/IAccessStorage.h +++ b/dbms/src/Access/IAccessStorage.h @@ -151,8 +151,9 @@ protected: static String getTypeName(std::type_index type) { return IAccessEntity::getTypeName(type); } [[noreturn]] void throwNotFound(const UUID & id) const; [[noreturn]] void throwNotFound(std::type_index type, const String & name) const; - [[noreturn]] void throwBadCast(const UUID & id, std::type_index type, const String & name, std::type_index required_type) const; - [[noreturn]] void throwIDCollisionCannotInsert(const UUID & id, std::type_index type, const String & name, std::type_index existing_type, const String & existing_name) const; + [[noreturn]] static void throwBadCast(const UUID & id, std::type_index type, const String & name, std::type_index required_type); + [[noreturn]] void throwIDCollisionCannotInsert( + const UUID & id, std::type_index type, const String & name, std::type_index existing_type, const String & existing_name) const; [[noreturn]] void throwNameCollisionCannotInsert(std::type_index type, const String & name) const; [[noreturn]] void throwNameCollisionCannotRename(std::type_index type, const String & old_name, const String & new_name) const; [[noreturn]] void throwReadonlyCannotInsert(std::type_index type, const String & name) const; diff --git a/dbms/src/Columns/ColumnAggregateFunction.cpp b/dbms/src/Columns/ColumnAggregateFunction.cpp index 507ff6c2db9..7ea2a3f9dfe 100644 --- a/dbms/src/Columns/ColumnAggregateFunction.cpp +++ b/dbms/src/Columns/ColumnAggregateFunction.cpp @@ -576,8 +576,9 @@ ColumnAggregateFunction::MutablePtr ColumnAggregateFunction::createView() const } ColumnAggregateFunction::ColumnAggregateFunction(const ColumnAggregateFunction & src_) - : foreign_arenas(concatArenas(src_.foreign_arenas, src_.my_arena)), - func(src_.func), src(src_.getPtr()), data(src_.data.begin(), src_.data.end()) + : COWHelper(src_), + foreign_arenas(concatArenas(src_.foreign_arenas, src_.my_arena)), + func(src_.func), src(src_.getPtr()), data(src_.data.begin(), src_.data.end()) { } diff --git a/dbms/src/Columns/ColumnLowCardinality.cpp b/dbms/src/Columns/ColumnLowCardinality.cpp index 23d43579bbd..aaec56f37cc 100644 --- a/dbms/src/Columns/ColumnLowCardinality.cpp +++ b/dbms/src/Columns/ColumnLowCardinality.cpp @@ -16,6 +16,12 @@ namespace ErrorCodes namespace { + void checkColumn(const IColumn & column) + { + if (!dynamic_cast(&column)) + throw Exception("ColumnUnique expected as an argument of ColumnLowCardinality.", ErrorCodes::ILLEGAL_COLUMN); + } + template PaddedPODArray * getIndexesData(IColumn & indexes) { @@ -651,13 +657,6 @@ ColumnLowCardinality::Dictionary::Dictionary(ColumnPtr column_unique_, bool is_s checkColumn(*column_unique); } -void ColumnLowCardinality::Dictionary::checkColumn(const IColumn & column) -{ - - if (!dynamic_cast(&column)) - throw Exception("ColumnUnique expected as an argument of ColumnLowCardinality.", ErrorCodes::ILLEGAL_COLUMN); -} - void ColumnLowCardinality::Dictionary::setShared(const ColumnPtr & column_unique_) { checkColumn(*column_unique_); diff --git a/dbms/src/Columns/ColumnLowCardinality.h b/dbms/src/Columns/ColumnLowCardinality.h index 621fffb4a19..a76bd98de17 100644 --- a/dbms/src/Columns/ColumnLowCardinality.h +++ b/dbms/src/Columns/ColumnLowCardinality.h @@ -275,8 +275,6 @@ private: private: WrappedPtr column_unique; bool shared = false; - - void checkColumn(const IColumn & column); }; Dictionary dictionary; diff --git a/dbms/src/Common/Config/ConfigProcessor.cpp b/dbms/src/Common/Config/ConfigProcessor.cpp index ce96bb70ddc..0213e2abe90 100644 --- a/dbms/src/Common/Config/ConfigProcessor.cpp +++ b/dbms/src/Common/Config/ConfigProcessor.cpp @@ -219,7 +219,7 @@ void ConfigProcessor::merge(XMLDocumentPtr config, XMLDocumentPtr with) mergeRecursive(config, config_root, with_root); } -std::string ConfigProcessor::layerFromHost() +static std::string layerFromHost() { utsname buf; if (uname(&buf)) diff --git a/dbms/src/Common/Config/ConfigProcessor.h b/dbms/src/Common/Config/ConfigProcessor.h index ae88234f077..b6f772f8c16 100644 --- a/dbms/src/Common/Config/ConfigProcessor.h +++ b/dbms/src/Common/Config/ConfigProcessor.h @@ -97,8 +97,8 @@ public: /// If preprocessed_dir is empty - calculate from loaded_config.path + /preprocessed_configs/ void savePreprocessedConfig(const LoadedConfig & loaded_config, std::string preprocessed_dir); - /// Set path of main config.xml . It will be cutted from all configs placed to preprocessed_configs/ - void setConfigPath(const std::string & config_path); + /// Set path of main config.xml. It will be cutted from all configs placed to preprocessed_configs/ + static void setConfigPath(const std::string & config_path); public: using Files = std::vector; @@ -131,8 +131,6 @@ private: void merge(XMLDocumentPtr config, XMLDocumentPtr with); - std::string layerFromHost(); - void doIncludesRecursive( XMLDocumentPtr config, XMLDocumentPtr include_from, diff --git a/dbms/src/Common/UTF8Helpers.cpp b/dbms/src/Common/UTF8Helpers.cpp index c18cce6df4e..ff799315d3b 100644 --- a/dbms/src/Common/UTF8Helpers.cpp +++ b/dbms/src/Common/UTF8Helpers.cpp @@ -75,15 +75,10 @@ static int wcwidth(wchar_t wc) switch (width) { case widechar_nonprint: - [[fallthrough]]; case widechar_combining: - [[fallthrough]]; case widechar_unassigned: - return 0; case widechar_ambiguous: - [[fallthrough]]; case widechar_private_use: - [[fallthrough]]; case widechar_widened_in_9: return 1; default: diff --git a/dbms/src/Compression/tests/gtest_compressionCodec.cpp b/dbms/src/Compression/tests/gtest_compressionCodec.cpp index e3b226c302d..eb3c98b5ba1 100644 --- a/dbms/src/Compression/tests/gtest_compressionCodec.cpp +++ b/dbms/src/Compression/tests/gtest_compressionCodec.cpp @@ -519,7 +519,7 @@ public: CODEC_WITHOUT_DATA_TYPE, }; - CompressionCodecPtr makeCodec(MakeCodecParam with_data_type) + static CompressionCodecPtr makeCodec(MakeCodecParam with_data_type) { const auto & codec_string = std::get<0>(GetParam()).codec_statement; const auto & data_type = with_data_type == CODEC_WITH_DATA_TYPE ? std::get<1>(GetParam()).data_type : nullptr; @@ -527,7 +527,7 @@ public: return ::makeCodec(codec_string, data_type); } - void testTranscoding(ICompressionCodec & codec) + static void testTranscoding(ICompressionCodec & codec) { NoOpTimer timer; ::testTranscoding(timer, codec, std::get<1>(GetParam()), std::get<0>(GetParam()).expected_compression_ratio); diff --git a/dbms/src/Core/SettingsCollection.cpp b/dbms/src/Core/SettingsCollection.cpp index f2ac331b028..b830c35b81c 100644 --- a/dbms/src/Core/SettingsCollection.cpp +++ b/dbms/src/Core/SettingsCollection.cpp @@ -208,7 +208,7 @@ void SettingMaxThreads::setAuto() is_auto = true; } -UInt64 SettingMaxThreads::getAutoValue() const +UInt64 SettingMaxThreads::getAutoValue() { static auto res = getNumberOfPhysicalCPUCores(); return res; diff --git a/dbms/src/Core/SettingsCollection.h b/dbms/src/Core/SettingsCollection.h index 700e96f0d40..466b80b1fd3 100644 --- a/dbms/src/Core/SettingsCollection.h +++ b/dbms/src/Core/SettingsCollection.h @@ -91,7 +91,7 @@ struct SettingMaxThreads void deserialize(ReadBuffer & buf, SettingsBinaryFormat format); void setAuto(); - UInt64 getAutoValue() const; + static UInt64 getAutoValue(); }; diff --git a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp index d685b0225e6..10b0d0a7fd1 100644 --- a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp +++ b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp @@ -32,6 +32,101 @@ namespace ErrorCodes } +static void checkCalculated(const ColumnWithTypeAndName & col_read, + const ColumnWithTypeAndName & col_defaults, + size_t defaults_needed) +{ + size_t column_size = col_read.column->size(); + + if (column_size != col_defaults.column->size()) + throw Exception("Mismatch column sizes while adding defaults", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); + + if (column_size < defaults_needed) + throw Exception("Unexpected defaults count", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); + + if (!col_read.type->equals(*col_defaults.type)) + throw Exception("Mismach column types while adding defaults", ErrorCodes::TYPE_MISMATCH); +} + +static void mixNumberColumns( + TypeIndex type_idx, + MutableColumnPtr & column_mixed, + const ColumnPtr & col_defaults, + const BlockMissingValues::RowsBitMask & defaults_mask) +{ + auto call = [&](const auto & types) -> bool + { + using Types = std::decay_t; + using DataType = typename Types::LeftType; + + if constexpr (!std::is_same_v && !std::is_same_v) + { + using FieldType = typename DataType::FieldType; + using ColVecType = std::conditional_t, ColumnDecimal, ColumnVector>; + + auto col_read = typeid_cast(column_mixed.get()); + if (!col_read) + return false; + + typename ColVecType::Container & dst = col_read->getData(); + + if (auto const_col_defs = checkAndGetColumnConst(col_defaults.get())) + { + FieldType value = checkAndGetColumn(const_col_defs->getDataColumnPtr().get())->getData()[0]; + + for (size_t i = 0; i < defaults_mask.size(); ++i) + if (defaults_mask[i]) + dst[i] = value; + + return true; + } + else if (auto col_defs = checkAndGetColumn(col_defaults.get())) + { + auto & src = col_defs->getData(); + for (size_t i = 0; i < defaults_mask.size(); ++i) + if (defaults_mask[i]) + dst[i] = src[i]; + + return true; + } + } + + return false; + }; + + if (!callOnIndexAndDataType(type_idx, call)) + throw Exception("Unexpected type on mixNumberColumns", ErrorCodes::LOGICAL_ERROR); +} + +static MutableColumnPtr mixColumns(const ColumnWithTypeAndName & col_read, + const ColumnWithTypeAndName & col_defaults, + const BlockMissingValues::RowsBitMask & defaults_mask) +{ + size_t column_size = col_read.column->size(); + size_t defaults_needed = defaults_mask.size(); + + MutableColumnPtr column_mixed = col_read.column->cloneEmpty(); + + for (size_t i = 0; i < defaults_needed; ++i) + { + if (defaults_mask[i]) + { + if (isColumnConst(*col_defaults.column)) + column_mixed->insert((*col_defaults.column)[i]); + else + column_mixed->insertFrom(*col_defaults.column, i); + } + else + column_mixed->insertFrom(*col_read.column, i); + } + + for (size_t i = defaults_needed; i < column_size; ++i) + column_mixed->insertFrom(*col_read.column, i); + + return column_mixed; +} + + AddingDefaultsBlockInputStream::AddingDefaultsBlockInputStream(const BlockInputStreamPtr & input, const ColumnDefaults & column_defaults_, const Context & context_) @@ -124,95 +219,4 @@ Block AddingDefaultsBlockInputStream::readImpl() return res; } -void AddingDefaultsBlockInputStream::checkCalculated(const ColumnWithTypeAndName & col_read, - const ColumnWithTypeAndName & col_defaults, - size_t defaults_needed) const -{ - size_t column_size = col_read.column->size(); - - if (column_size != col_defaults.column->size()) - throw Exception("Mismatch column sizes while adding defaults", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); - - if (column_size < defaults_needed) - throw Exception("Unexpected defaults count", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); - - if (!col_read.type->equals(*col_defaults.type)) - throw Exception("Mismach column types while adding defaults", ErrorCodes::TYPE_MISMATCH); -} - -void AddingDefaultsBlockInputStream::mixNumberColumns(TypeIndex type_idx, MutableColumnPtr & column_mixed, const ColumnPtr & col_defaults, - const BlockMissingValues::RowsBitMask & defaults_mask) const -{ - auto call = [&](const auto & types) -> bool - { - using Types = std::decay_t; - using DataType = typename Types::LeftType; - - if constexpr (!std::is_same_v && !std::is_same_v) - { - using FieldType = typename DataType::FieldType; - using ColVecType = std::conditional_t, ColumnDecimal, ColumnVector>; - - auto col_read = typeid_cast(column_mixed.get()); - if (!col_read) - return false; - - typename ColVecType::Container & dst = col_read->getData(); - - if (auto const_col_defs = checkAndGetColumnConst(col_defaults.get())) - { - FieldType value = checkAndGetColumn(const_col_defs->getDataColumnPtr().get())->getData()[0]; - - for (size_t i = 0; i < defaults_mask.size(); ++i) - if (defaults_mask[i]) - dst[i] = value; - - return true; - } - else if (auto col_defs = checkAndGetColumn(col_defaults.get())) - { - auto & src = col_defs->getData(); - for (size_t i = 0; i < defaults_mask.size(); ++i) - if (defaults_mask[i]) - dst[i] = src[i]; - - return true; - } - } - - return false; - }; - - if (!callOnIndexAndDataType(type_idx, call)) - throw Exception("Unexpected type on mixNumberColumns", ErrorCodes::LOGICAL_ERROR); -} - -MutableColumnPtr AddingDefaultsBlockInputStream::mixColumns(const ColumnWithTypeAndName & col_read, - const ColumnWithTypeAndName & col_defaults, - const BlockMissingValues::RowsBitMask & defaults_mask) const -{ - size_t column_size = col_read.column->size(); - size_t defaults_needed = defaults_mask.size(); - - MutableColumnPtr column_mixed = col_read.column->cloneEmpty(); - - for (size_t i = 0; i < defaults_needed; ++i) - { - if (defaults_mask[i]) - { - if (isColumnConst(*col_defaults.column)) - column_mixed->insert((*col_defaults.column)[i]); - else - column_mixed->insertFrom(*col_defaults.column, i); - } - else - column_mixed->insertFrom(*col_read.column, i); - } - - for (size_t i = defaults_needed; i < column_size; ++i) - column_mixed->insertFrom(*col_read.column, i); - - return column_mixed; -} - } diff --git a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.h b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.h index 0d6f36861a4..cf16ec3fb75 100644 --- a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.h +++ b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.h @@ -27,12 +27,6 @@ private: Block header; const ColumnDefaults column_defaults; const Context & context; - - void checkCalculated(const ColumnWithTypeAndName & col_read, const ColumnWithTypeAndName & col_defaults, size_t needed) const; - MutableColumnPtr mixColumns(const ColumnWithTypeAndName & col_read, const ColumnWithTypeAndName & col_defaults, - const BlockMissingValues::RowsBitMask & defaults_mask) const; - void mixNumberColumns(TypeIndex type_idx, MutableColumnPtr & column_mixed, const ColumnPtr & col_defaults, - const BlockMissingValues::RowsBitMask & defaults_mask) const; }; } diff --git a/dbms/src/DataStreams/SummingSortedBlockInputStream.cpp b/dbms/src/DataStreams/SummingSortedBlockInputStream.cpp index fbb1622326e..991df4dda7f 100644 --- a/dbms/src/DataStreams/SummingSortedBlockInputStream.cpp +++ b/dbms/src/DataStreams/SummingSortedBlockInputStream.cpp @@ -37,6 +37,89 @@ namespace return false; } + + /// Returns true if merge result is not empty + bool mergeMap(const SummingSortedBlockInputStream::MapDescription & desc, Row & row, SortCursor & cursor) + { + /// Strongly non-optimal. + + Row & left = row; + Row right(left.size()); + + for (size_t col_num : desc.key_col_nums) + right[col_num] = (*cursor->all_columns[col_num])[cursor->pos].template get(); + + for (size_t col_num : desc.val_col_nums) + right[col_num] = (*cursor->all_columns[col_num])[cursor->pos].template get(); + + auto at_ith_column_jth_row = [&](const Row & matrix, size_t i, size_t j) -> const Field & + { + return matrix[i].get()[j]; + }; + + auto tuple_of_nth_columns_at_jth_row = [&](const Row & matrix, const ColumnNumbers & col_nums, size_t j) -> Array + { + size_t size = col_nums.size(); + Array res(size); + for (size_t col_num_index = 0; col_num_index < size; ++col_num_index) + res[col_num_index] = at_ith_column_jth_row(matrix, col_nums[col_num_index], j); + return res; + }; + + std::map merged; + + auto accumulate = [](Array & dst, const Array & src) + { + bool has_non_zero = false; + size_t size = dst.size(); + for (size_t i = 0; i < size; ++i) + if (applyVisitor(FieldVisitorSum(src[i]), dst[i])) + has_non_zero = true; + return has_non_zero; + }; + + auto merge = [&](const Row & matrix) + { + size_t rows = matrix[desc.key_col_nums[0]].get().size(); + + for (size_t j = 0; j < rows; ++j) + { + Array key = tuple_of_nth_columns_at_jth_row(matrix, desc.key_col_nums, j); + Array value = tuple_of_nth_columns_at_jth_row(matrix, desc.val_col_nums, j); + + auto it = merged.find(key); + if (merged.end() == it) + merged.emplace(std::move(key), std::move(value)); + else + { + if (!accumulate(it->second, value)) + merged.erase(it); + } + } + }; + + merge(left); + merge(right); + + for (size_t col_num : desc.key_col_nums) + row[col_num] = Array(merged.size()); + for (size_t col_num : desc.val_col_nums) + row[col_num] = Array(merged.size()); + + size_t row_num = 0; + for (const auto & key_value : merged) + { + for (size_t col_num_index = 0, size = desc.key_col_nums.size(); col_num_index < size; ++col_num_index) + row[desc.key_col_nums[col_num_index]].get()[row_num] = key_value.first[col_num_index]; + + for (size_t col_num_index = 0, size = desc.val_col_nums.size(); col_num_index < size; ++col_num_index) + row[desc.val_col_nums[col_num_index]].get()[row_num] = key_value.second[col_num_index]; + + ++row_num; + } + + return row_num != 0; + } } @@ -401,89 +484,6 @@ void SummingSortedBlockInputStream::merge(MutableColumns & merged_columns, Sorti } -bool SummingSortedBlockInputStream::mergeMap(const MapDescription & desc, Row & row, SortCursor & cursor) -{ - /// Strongly non-optimal. - - Row & left = row; - Row right(left.size()); - - for (size_t col_num : desc.key_col_nums) - right[col_num] = (*cursor->all_columns[col_num])[cursor->pos].template get(); - - for (size_t col_num : desc.val_col_nums) - right[col_num] = (*cursor->all_columns[col_num])[cursor->pos].template get(); - - auto at_ith_column_jth_row = [&](const Row & matrix, size_t i, size_t j) -> const Field & - { - return matrix[i].get()[j]; - }; - - auto tuple_of_nth_columns_at_jth_row = [&](const Row & matrix, const ColumnNumbers & col_nums, size_t j) -> Array - { - size_t size = col_nums.size(); - Array res(size); - for (size_t col_num_index = 0; col_num_index < size; ++col_num_index) - res[col_num_index] = at_ith_column_jth_row(matrix, col_nums[col_num_index], j); - return res; - }; - - std::map merged; - - auto accumulate = [](Array & dst, const Array & src) - { - bool has_non_zero = false; - size_t size = dst.size(); - for (size_t i = 0; i < size; ++i) - if (applyVisitor(FieldVisitorSum(src[i]), dst[i])) - has_non_zero = true; - return has_non_zero; - }; - - auto merge = [&](const Row & matrix) - { - size_t rows = matrix[desc.key_col_nums[0]].get().size(); - - for (size_t j = 0; j < rows; ++j) - { - Array key = tuple_of_nth_columns_at_jth_row(matrix, desc.key_col_nums, j); - Array value = tuple_of_nth_columns_at_jth_row(matrix, desc.val_col_nums, j); - - auto it = merged.find(key); - if (merged.end() == it) - merged.emplace(std::move(key), std::move(value)); - else - { - if (!accumulate(it->second, value)) - merged.erase(it); - } - } - }; - - merge(left); - merge(right); - - for (size_t col_num : desc.key_col_nums) - row[col_num] = Array(merged.size()); - for (size_t col_num : desc.val_col_nums) - row[col_num] = Array(merged.size()); - - size_t row_num = 0; - for (const auto & key_value : merged) - { - for (size_t col_num_index = 0, size = desc.key_col_nums.size(); col_num_index < size; ++col_num_index) - row[desc.key_col_nums[col_num_index]].get()[row_num] = key_value.first[col_num_index]; - - for (size_t col_num_index = 0, size = desc.val_col_nums.size(); col_num_index < size; ++col_num_index) - row[desc.val_col_nums[col_num_index]].get()[row_num] = key_value.second[col_num_index]; - - ++row_num; - } - - return row_num != 0; -} - - void SummingSortedBlockInputStream::addRow(SortCursor & cursor) { for (auto & desc : columns_to_aggregate) diff --git a/dbms/src/DataStreams/SummingSortedBlockInputStream.h b/dbms/src/DataStreams/SummingSortedBlockInputStream.h index bdd68c991cc..bd5ab3c8165 100644 --- a/dbms/src/DataStreams/SummingSortedBlockInputStream.h +++ b/dbms/src/DataStreams/SummingSortedBlockInputStream.h @@ -35,6 +35,13 @@ public: String getName() const override { return "SummingSorted"; } + /// Stores numbers of key-columns and value-columns. + struct MapDescription + { + std::vector key_col_nums; + std::vector val_col_nums; + }; + protected: /// Can return 1 more records than max_block_size. Block readImpl() override; @@ -120,13 +127,6 @@ private: AggregateDescription(const AggregateDescription &) = delete; }; - /// Stores numbers of key-columns and value-columns. - struct MapDescription - { - std::vector key_col_nums; - std::vector val_col_nums; - }; - std::vector columns_to_aggregate; std::vector maps_to_sum; @@ -146,9 +146,6 @@ private: /// Insert the summed row for the current group into the result and updates some of per-block flags if the row is not "zero". void insertCurrentRowIfNeeded(MutableColumns & merged_columns); - /// Returns true if merge result is not empty - bool mergeMap(const MapDescription & desc, Row & row, SortCursor & cursor); - // Add the row under the cursor to the `row`. void addRow(SortCursor & cursor); }; diff --git a/dbms/src/Dictionaries/Embedded/GeoDictionariesLoader.h b/dbms/src/Dictionaries/Embedded/GeoDictionariesLoader.h index 3231a92b724..d09e69cf561 100644 --- a/dbms/src/Dictionaries/Embedded/GeoDictionariesLoader.h +++ b/dbms/src/Dictionaries/Embedded/GeoDictionariesLoader.h @@ -10,7 +10,6 @@ class GeoDictionariesLoader { public: - std::unique_ptr reloadRegionsHierarchies(const Poco::Util::AbstractConfiguration & config); - - std::unique_ptr reloadRegionsNames(const Poco::Util::AbstractConfiguration & config); + static std::unique_ptr reloadRegionsHierarchies(const Poco::Util::AbstractConfiguration & config); + static std::unique_ptr reloadRegionsNames(const Poco::Util::AbstractConfiguration & config); }; diff --git a/dbms/src/Dictionaries/TrieDictionary.cpp b/dbms/src/Dictionaries/TrieDictionary.cpp index dcefc873b4f..d5ab7dc31ce 100644 --- a/dbms/src/Dictionaries/TrieDictionary.cpp +++ b/dbms/src/Dictionaries/TrieDictionary.cpp @@ -34,6 +34,18 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } +static void validateKeyTypes(const DataTypes & key_types) +{ + if (key_types.size() != 1) + throw Exception{"Expected a single IP address", ErrorCodes::TYPE_MISMATCH}; + + const auto & actual_type = key_types[0]->getName(); + + if (actual_type != "UInt32" && actual_type != "FixedString(16)") + throw Exception{"Key does not match, expected either UInt32 or FixedString(16)", ErrorCodes::TYPE_MISMATCH}; +} + + TrieDictionary::TrieDictionary( const std::string & database_, const std::string & name_, @@ -416,17 +428,6 @@ void TrieDictionary::calculateBytesAllocated() bytes_allocated += btrie_allocated(trie); } -void TrieDictionary::validateKeyTypes(const DataTypes & key_types) const -{ - if (key_types.size() != 1) - throw Exception{"Expected a single IP address", ErrorCodes::TYPE_MISMATCH}; - - const auto & actual_type = key_types[0]->getName(); - - if (actual_type != "UInt32" && actual_type != "FixedString(16)") - throw Exception{"Key does not match, expected either UInt32 or FixedString(16)", ErrorCodes::TYPE_MISMATCH}; -} - template void TrieDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value) diff --git a/dbms/src/Dictionaries/TrieDictionary.h b/dbms/src/Dictionaries/TrieDictionary.h index 81f5a02a00b..59f946ebe71 100644 --- a/dbms/src/Dictionaries/TrieDictionary.h +++ b/dbms/src/Dictionaries/TrieDictionary.h @@ -207,8 +207,6 @@ private: void calculateBytesAllocated(); - void validateKeyTypes(const DataTypes & key_types) const; - template void createAttributeImpl(Attribute & attribute, const Field & null_value); diff --git a/dbms/src/Disks/DiskS3.cpp b/dbms/src/Disks/DiskS3.cpp index b610a1fc42f..6b98520637a 100644 --- a/dbms/src/Disks/DiskS3.cpp +++ b/dbms/src/Disks/DiskS3.cpp @@ -34,6 +34,15 @@ namespace ErrorCodes namespace { + String getRandomName() + { + std::uniform_int_distribution distribution('a', 'z'); + String res(32, ' '); /// The number of bits of entropy should be not less than 128. + for (auto & c : res) + c = distribution(thread_local_rng); + return res; + } + template void throwIfError(Aws::Utils::Outcome && response) { @@ -570,14 +579,6 @@ void DiskS3::removeRecursive(const String & path) } } -String DiskS3::getRandomName() const -{ - std::uniform_int_distribution distribution('a', 'z'); - String res(32, ' '); /// The number of bits of entropy should be not less than 128. - for (auto & c : res) - c = distribution(thread_local_rng); - return res; -} bool DiskS3::tryReserve(UInt64 bytes) { diff --git a/dbms/src/Disks/DiskS3.h b/dbms/src/Disks/DiskS3.h index 5b59089ffd1..d2950940063 100644 --- a/dbms/src/Disks/DiskS3.h +++ b/dbms/src/Disks/DiskS3.h @@ -92,8 +92,6 @@ public: Poco::Timestamp getLastModified(const String & path) override; private: - String getRandomName() const; - bool tryReserve(UInt64 bytes); private: diff --git a/dbms/src/Functions/URL/URLHierarchy.cpp b/dbms/src/Functions/URL/URLHierarchy.cpp index 0ed7a11f41c..776b70c23cf 100644 --- a/dbms/src/Functions/URL/URLHierarchy.cpp +++ b/dbms/src/Functions/URL/URLHierarchy.cpp @@ -32,7 +32,7 @@ public: void init(Block & /*block*/, const ColumnNumbers & /*arguments*/) {} /// Returns the position of the argument that is the column of rows - size_t getStringsArgumentPosition() + static size_t getStringsArgumentPosition() { return 0; } diff --git a/dbms/src/Functions/URL/URLPathHierarchy.cpp b/dbms/src/Functions/URL/URLPathHierarchy.cpp index 5bea296a9ca..d695877b1c5 100644 --- a/dbms/src/Functions/URL/URLPathHierarchy.cpp +++ b/dbms/src/Functions/URL/URLPathHierarchy.cpp @@ -31,7 +31,7 @@ public: void init(Block & /*block*/, const ColumnNumbers & /*arguments*/) {} /// Returns the position of the argument that is the column of rows - size_t getStringsArgumentPosition() + static size_t getStringsArgumentPosition() { return 0; } diff --git a/dbms/src/Functions/URL/extractURLParameterNames.cpp b/dbms/src/Functions/URL/extractURLParameterNames.cpp index aa9b87d69f6..0f7966fadeb 100644 --- a/dbms/src/Functions/URL/extractURLParameterNames.cpp +++ b/dbms/src/Functions/URL/extractURLParameterNames.cpp @@ -29,7 +29,7 @@ public: } /// Returns the position of the argument that is the column of rows - size_t getStringsArgumentPosition() + static size_t getStringsArgumentPosition() { return 0; } diff --git a/dbms/src/Functions/URL/extractURLParameters.cpp b/dbms/src/Functions/URL/extractURLParameters.cpp index ab0aef2c039..1c1a155fd65 100644 --- a/dbms/src/Functions/URL/extractURLParameters.cpp +++ b/dbms/src/Functions/URL/extractURLParameters.cpp @@ -31,7 +31,7 @@ public: void init(Block & /*block*/, const ColumnNumbers & /*arguments*/) {} /// Returns the position of the argument that is the column of rows - size_t getStringsArgumentPosition() + static size_t getStringsArgumentPosition() { return 0; } diff --git a/dbms/src/Functions/array/arrayDistinct.cpp b/dbms/src/Functions/array/arrayDistinct.cpp index 920ee6d39fd..d24de638865 100644 --- a/dbms/src/Functions/array/arrayDistinct.cpp +++ b/dbms/src/Functions/array/arrayDistinct.cpp @@ -62,21 +62,21 @@ private: static constexpr size_t INITIAL_SIZE_DEGREE = 9; template - bool executeNumber( + static bool executeNumber( const IColumn & src_data, const ColumnArray::Offsets & src_offsets, IColumn & res_data_col, ColumnArray::Offsets & res_offsets, const ColumnNullable * nullable_col); - bool executeString( + static bool executeString( const IColumn & src_data, const ColumnArray::Offsets & src_offsets, IColumn & res_data_col, ColumnArray::Offsets & res_offsets, const ColumnNullable * nullable_col); - void executeHashed( + static void executeHashed( const IColumn & src_data, const ColumnArray::Offsets & src_offsets, IColumn & res_data_col, diff --git a/dbms/src/Functions/array/arrayElement.cpp b/dbms/src/Functions/array/arrayElement.cpp index 2921a4bd02a..7c610017b29 100644 --- a/dbms/src/Functions/array/arrayElement.cpp +++ b/dbms/src/Functions/array/arrayElement.cpp @@ -53,29 +53,29 @@ private: ArrayImpl::NullMapBuilder & builder, size_t input_rows_count); template - bool executeNumberConst(Block & block, const ColumnNumbers & arguments, size_t result, const Field & index, + static bool executeNumberConst(Block & block, const ColumnNumbers & arguments, size_t result, const Field & index, ArrayImpl::NullMapBuilder & builder); template - bool executeNumber(Block & block, const ColumnNumbers & arguments, size_t result, const PaddedPODArray & indices, + static bool executeNumber(Block & block, const ColumnNumbers & arguments, size_t result, const PaddedPODArray & indices, ArrayImpl::NullMapBuilder & builder); - bool executeStringConst(Block & block, const ColumnNumbers & arguments, size_t result, const Field & index, + static bool executeStringConst(Block & block, const ColumnNumbers & arguments, size_t result, const Field & index, ArrayImpl::NullMapBuilder & builder); template - bool executeString(Block & block, const ColumnNumbers & arguments, size_t result, const PaddedPODArray & indices, + static bool executeString(Block & block, const ColumnNumbers & arguments, size_t result, const PaddedPODArray & indices, ArrayImpl::NullMapBuilder & builder); - bool executeGenericConst(Block & block, const ColumnNumbers & arguments, size_t result, const Field & index, + static bool executeGenericConst(Block & block, const ColumnNumbers & arguments, size_t result, const Field & index, ArrayImpl::NullMapBuilder & builder); template - bool executeGeneric(Block & block, const ColumnNumbers & arguments, size_t result, const PaddedPODArray & indices, + static bool executeGeneric(Block & block, const ColumnNumbers & arguments, size_t result, const PaddedPODArray & indices, ArrayImpl::NullMapBuilder & builder); template - bool executeConst(Block & block, const ColumnNumbers & arguments, size_t result, + static bool executeConst(Block & block, const ColumnNumbers & arguments, size_t result, const PaddedPODArray & indices, ArrayImpl::NullMapBuilder & builder, size_t input_rows_count); diff --git a/dbms/src/Functions/array/arrayReverse.cpp b/dbms/src/Functions/array/arrayReverse.cpp index 81ddb8e8e5e..8913b27db4c 100644 --- a/dbms/src/Functions/array/arrayReverse.cpp +++ b/dbms/src/Functions/array/arrayReverse.cpp @@ -45,11 +45,11 @@ public: private: template - bool executeNumber(const IColumn & src_data, const ColumnArray::Offsets & src_offsets, IColumn & res_data); + static bool executeNumber(const IColumn & src_data, const ColumnArray::Offsets & src_offsets, IColumn & res_data); - bool executeFixedString(const IColumn & src_data, const ColumnArray::Offsets & src_offsets, IColumn & res_data); - bool executeString(const IColumn & src_data, const ColumnArray::Offsets & src_array_offsets, IColumn & res_data); - bool executeGeneric(const IColumn & src_data, const ColumnArray::Offsets & src_array_offsets, IColumn & res_data); + static bool executeFixedString(const IColumn & src_data, const ColumnArray::Offsets & src_offsets, IColumn & res_data); + static bool executeString(const IColumn & src_data, const ColumnArray::Offsets & src_array_offsets, IColumn & res_data); + static bool executeGeneric(const IColumn & src_data, const ColumnArray::Offsets & src_array_offsets, IColumn & res_data); }; diff --git a/dbms/src/Functions/blockSerializedSize.cpp b/dbms/src/Functions/blockSerializedSize.cpp index 0a13e8ab013..0e8833d795f 100644 --- a/dbms/src/Functions/blockSerializedSize.cpp +++ b/dbms/src/Functions/blockSerializedSize.cpp @@ -39,7 +39,7 @@ public: input_rows_count, size)->convertToFullColumnIfConst(); } - UInt64 blockSerializedSizeOne(const ColumnWithTypeAndName & elem) const + static UInt64 blockSerializedSizeOne(const ColumnWithTypeAndName & elem) { ColumnPtr full_column = elem.column->convertToFullColumnIfConst(); diff --git a/dbms/src/Functions/convertCharset.cpp b/dbms/src/Functions/convertCharset.cpp index 00ccc8ae2c0..372bf08a0d2 100644 --- a/dbms/src/Functions/convertCharset.cpp +++ b/dbms/src/Functions/convertCharset.cpp @@ -78,13 +78,13 @@ private: /// Separate converter is created for each thread. using Pool = ObjectPoolMap; - Pool::Pointer getConverter(const String & charset) + static Pool::Pointer getConverter(const String & charset) { static Pool pool; return pool.get(charset, [&charset] { return new Converter(charset); }); } - void convert(const String & from_charset, const String & to_charset, + static void convert(const String & from_charset, const String & to_charset, const ColumnString::Chars & from_chars, const ColumnString::Offsets & from_offsets, ColumnString::Chars & to_chars, ColumnString::Offsets & to_offsets) { diff --git a/dbms/src/Functions/if.cpp b/dbms/src/Functions/if.cpp index 7ae1042c22c..220f1505bcd 100644 --- a/dbms/src/Functions/if.cpp +++ b/dbms/src/Functions/if.cpp @@ -416,7 +416,7 @@ private: return true; } - bool executeString(const ColumnUInt8 * cond_col, Block & block, const ColumnNumbers & arguments, size_t result) + static bool executeString(const ColumnUInt8 * cond_col, Block & block, const ColumnNumbers & arguments, size_t result) { const IColumn * col_then_untyped = block.getByPosition(arguments[1]).column.get(); const IColumn * col_else_untyped = block.getByPosition(arguments[2]).column.get(); @@ -494,7 +494,7 @@ private: return false; } - bool executeGenericArray(const ColumnUInt8 * cond_col, Block & block, const ColumnNumbers & arguments, size_t result) + static bool executeGenericArray(const ColumnUInt8 * cond_col, Block & block, const ColumnNumbers & arguments, size_t result) { /// For generic implementation, arrays must be of same type. if (!block.getByPosition(arguments[1]).type->equals(*block.getByPosition(arguments[2]).type)) diff --git a/dbms/src/Functions/reinterpretAsFixedString.cpp b/dbms/src/Functions/reinterpretAsFixedString.cpp index 3be84c7810a..b56d97708d2 100644 --- a/dbms/src/Functions/reinterpretAsFixedString.cpp +++ b/dbms/src/Functions/reinterpretAsFixedString.cpp @@ -39,7 +39,7 @@ public: throw Exception("Cannot reinterpret " + type.getName() + " as FixedString because it is not fixed size and contiguous in memory", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } - void NO_INLINE executeToFixedString(const IColumn & src, ColumnFixedString & dst, size_t n) + static void NO_INLINE executeToFixedString(const IColumn & src, ColumnFixedString & dst, size_t n) { size_t rows = src.size(); ColumnFixedString::Chars & data_to = dst.getChars(); @@ -54,7 +54,7 @@ public: } } - void NO_INLINE executeContiguousToFixedString(const IColumn & src, ColumnFixedString & dst, size_t n) + static void NO_INLINE executeContiguousToFixedString(const IColumn & src, ColumnFixedString & dst, size_t n) { size_t rows = src.size(); ColumnFixedString::Chars & data_to = dst.getChars(); diff --git a/dbms/src/Functions/reinterpretAsString.cpp b/dbms/src/Functions/reinterpretAsString.cpp index 09ae1a20dee..b86c9b14c5a 100644 --- a/dbms/src/Functions/reinterpretAsString.cpp +++ b/dbms/src/Functions/reinterpretAsString.cpp @@ -39,7 +39,7 @@ public: throw Exception("Cannot reinterpret " + type.getName() + " as String because it is not contiguous in memory", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } - void executeToString(const IColumn & src, ColumnString & dst) + static void executeToString(const IColumn & src, ColumnString & dst) { size_t rows = src.size(); ColumnString::Chars & data_to = dst.getChars(); diff --git a/dbms/src/Functions/transform.cpp b/dbms/src/Functions/transform.cpp index 42111899320..0ad3c6874bd 100644 --- a/dbms/src/Functions/transform.cpp +++ b/dbms/src/Functions/transform.cpp @@ -188,7 +188,7 @@ public: } private: - void executeConst(Block & block, const ColumnNumbers & arguments, const size_t result, size_t input_rows_count) + static void executeConst(Block & block, const ColumnNumbers & arguments, const size_t result, size_t input_rows_count) { /// Materialize the input column and compute the function as usual. diff --git a/dbms/src/IO/S3Common.cpp b/dbms/src/IO/S3Common.cpp index f41d5c3b253..700901ebc02 100644 --- a/dbms/src/IO/S3Common.cpp +++ b/dbms/src/IO/S3Common.cpp @@ -84,7 +84,7 @@ namespace S3 return ret; } - std::shared_ptr ClientFactory::create( + std::shared_ptr ClientFactory::create( // NOLINT const String & endpoint, const String & access_key_id, const String & secret_access_key) diff --git a/dbms/src/IO/WriteBufferValidUTF8.cpp b/dbms/src/IO/WriteBufferValidUTF8.cpp index b40424fc463..0e04aa4c22d 100644 --- a/dbms/src/IO/WriteBufferValidUTF8.cpp +++ b/dbms/src/IO/WriteBufferValidUTF8.cpp @@ -81,7 +81,7 @@ void WriteBufferValidUTF8::nextImpl() size_t len = length_of_utf8_sequence[static_cast(*p)]; if (len > 4) - { + { // NOLINT /// Invalid start of sequence. Skip one byte. putValid(valid_start, p - valid_start); putReplacement(); diff --git a/dbms/src/IO/parseDateTimeBestEffort.cpp b/dbms/src/IO/parseDateTimeBestEffort.cpp index 59fb39ed846..24d05f73aa0 100644 --- a/dbms/src/IO/parseDateTimeBestEffort.cpp +++ b/dbms/src/IO/parseDateTimeBestEffort.cpp @@ -479,12 +479,12 @@ ReturnType parseDateTimeBestEffortImpl(time_t & res, ReadBuffer & in, const Date if (read_alpha_month(alpha)) { } - else if (0 == strncasecmp(alpha, "UTC", 3)) has_time_zone_offset = true; + else if (0 == strncasecmp(alpha, "UTC", 3)) has_time_zone_offset = true; // NOLINT else if (0 == strncasecmp(alpha, "GMT", 3)) has_time_zone_offset = true; else if (0 == strncasecmp(alpha, "MSK", 3)) { has_time_zone_offset = true; time_zone_offset_hour = 3; } else if (0 == strncasecmp(alpha, "MSD", 3)) { has_time_zone_offset = true; time_zone_offset_hour = 4; } - else if (0 == strncasecmp(alpha, "Mon", 3)) has_day_of_week = true; + else if (0 == strncasecmp(alpha, "Mon", 3)) has_day_of_week = true; // NOLINT else if (0 == strncasecmp(alpha, "Tue", 3)) has_day_of_week = true; else if (0 == strncasecmp(alpha, "Wed", 3)) has_day_of_week = true; else if (0 == strncasecmp(alpha, "Thu", 3)) has_day_of_week = true; diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 30d2afdb2d8..0ab4949371b 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -510,7 +510,7 @@ void NO_INLINE Aggregator::executeWithoutKeyImpl( AggregatedDataWithoutKey & res, size_t rows, AggregateFunctionInstruction * aggregate_instructions, - Arena * arena) const + Arena * arena) { /// Adding values for (AggregateFunctionInstruction * inst = aggregate_instructions; inst->that; ++inst) diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index 0fa9bd509bd..bf6fb88ca64 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -1082,11 +1082,11 @@ protected: AggregateFunctionInstruction * aggregate_instructions) const; /// For case when there are no keys (all aggregate into one row). - void executeWithoutKeyImpl( + static void executeWithoutKeyImpl( AggregatedDataWithoutKey & res, size_t rows, AggregateFunctionInstruction * aggregate_instructions, - Arena * arena) const; + Arena * arena); template void writeToTemporaryFileImpl( diff --git a/dbms/src/Interpreters/CatBoostModel.cpp b/dbms/src/Interpreters/CatBoostModel.cpp index 552905cfd5a..3d365ab3927 100644 --- a/dbms/src/Interpreters/CatBoostModel.cpp +++ b/dbms/src/Interpreters/CatBoostModel.cpp @@ -218,7 +218,7 @@ private: /// Buffer should be allocated with features_count * column->size() elements. /// Place string pointers in positions buffer[0], buffer[features_count], ... , buffer[size * features_count] - void placeStringColumn(const ColumnString & column, const char ** buffer, size_t features_count) const + static void placeStringColumn(const ColumnString & column, const char ** buffer, size_t features_count) { size_t size = column.size(); for (size_t i = 0; i < size; ++i) @@ -231,8 +231,8 @@ private: /// Buffer should be allocated with features_count * column->size() elements. /// Place string pointers in positions buffer[0], buffer[features_count], ... , buffer[size * features_count] /// Returns PODArray which holds data (because ColumnFixedString doesn't store terminating zero). - PODArray placeFixedStringColumn( - const ColumnFixedString & column, const char ** buffer, size_t features_count) const + static PODArray placeFixedStringColumn( + const ColumnFixedString & column, const char ** buffer, size_t features_count) { size_t size = column.size(); size_t str_size = column.getN(); @@ -281,8 +281,8 @@ private: /// Place columns into buffer, returns data which was used for fixed string columns. /// Buffer should contains column->size() values, each value contains size strings. - std::vector> placeStringColumns( - const ColumnRawPtrs & columns, size_t offset, size_t size, const char ** buffer) const + static std::vector> placeStringColumns( + const ColumnRawPtrs & columns, size_t offset, size_t size, const char ** buffer) { if (size == 0) return {}; diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp index 8dafdbb4e37..a10b953e644 100644 --- a/dbms/src/Interpreters/DDLWorker.cpp +++ b/dbms/src/Interpreters/DDLWorker.cpp @@ -668,7 +668,7 @@ void DDLWorker::processTask(DDLTask & task, const ZooKeeperPtr & zookeeper) } -bool DDLWorker::taskShouldBeExecutedOnLeader(const ASTPtr ast_ddl, const StoragePtr storage) const +bool DDLWorker::taskShouldBeExecutedOnLeader(const ASTPtr ast_ddl, const StoragePtr storage) { /// Pure DROP queries have to be executed on each node separately if (auto query = ast_ddl->as(); query && query->kind != ASTDropQuery::Kind::Truncate) diff --git a/dbms/src/Interpreters/DDLWorker.h b/dbms/src/Interpreters/DDLWorker.h index 39ae768d7d8..32b7cd5f172 100644 --- a/dbms/src/Interpreters/DDLWorker.h +++ b/dbms/src/Interpreters/DDLWorker.h @@ -59,7 +59,7 @@ private: void processTask(DDLTask & task, const ZooKeeperPtr & zookeeper); /// Check that query should be executed on leader replica only - bool taskShouldBeExecutedOnLeader(const ASTPtr ast_ddl, StoragePtr storage) const; + static bool taskShouldBeExecutedOnLeader(const ASTPtr ast_ddl, StoragePtr storage); /// Check that shard has consistent config with table void checkShardConfig(const String & table, const DDLTask & task, StoragePtr storage) const; @@ -84,7 +84,7 @@ private: void cleanupQueue(Int64 current_time_seconds, const ZooKeeperPtr & zookeeper); /// Init task node - void createStatusDirs(const std::string & node_path, const ZooKeeperPtr & zookeeper); + static void createStatusDirs(const std::string & node_path, const ZooKeeperPtr & zookeeper); void runMainThread(); diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index 5a012bfb5a2..60467b6eb4f 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -868,7 +868,7 @@ private: } } - void cancelLoading(Info & info) + static void cancelLoading(Info & info) { if (!info.is_loading()) return; diff --git a/dbms/src/Interpreters/InterpreterDropQuery.cpp b/dbms/src/Interpreters/InterpreterDropQuery.cpp index bb62d704d2d..12a39e5aac4 100644 --- a/dbms/src/Interpreters/InterpreterDropQuery.cpp +++ b/dbms/src/Interpreters/InterpreterDropQuery.cpp @@ -25,6 +25,12 @@ namespace ErrorCodes } +static DatabasePtr tryGetDatabase(const String & database_name, bool if_exists) +{ + return if_exists ? DatabaseCatalog::instance().tryGetDatabase(database_name) : DatabaseCatalog::instance().getDatabase(database_name); +} + + InterpreterDropQuery::InterpreterDropQuery(const ASTPtr & query_ptr_, Context & context_) : query_ptr(query_ptr_), context(context_) {} @@ -227,6 +233,7 @@ BlockIO InterpreterDropQuery::executeToTemporaryTable(const String & table_name, return {}; } + BlockIO InterpreterDropQuery::executeToDatabase(const String & database_name, ASTDropQuery::Kind kind, bool if_exists) { auto ddl_guard = DatabaseCatalog::instance().getDDLGuard(database_name, ""); @@ -263,10 +270,6 @@ BlockIO InterpreterDropQuery::executeToDatabase(const String & database_name, AS return {}; } -DatabasePtr InterpreterDropQuery::tryGetDatabase(const String & database_name, bool if_exists) -{ - return if_exists ? DatabaseCatalog::instance().tryGetDatabase(database_name) : DatabaseCatalog::instance().getDatabase(database_name); -} DatabaseAndTable InterpreterDropQuery::tryGetDatabaseAndTable(const String & database_name, const String & table_name, bool if_exists) { diff --git a/dbms/src/Interpreters/InterpreterDropQuery.h b/dbms/src/Interpreters/InterpreterDropQuery.h index 6a9c249973e..fd6f369e876 100644 --- a/dbms/src/Interpreters/InterpreterDropQuery.h +++ b/dbms/src/Interpreters/InterpreterDropQuery.h @@ -34,8 +34,6 @@ private: BlockIO executeToDictionary(const String & database_name, const String & dictionary_name, ASTDropQuery::Kind kind, bool if_exists, bool is_temporary, bool no_ddl_lock); - DatabasePtr tryGetDatabase(const String & database_name, bool exists); - DatabaseAndTable tryGetDatabaseAndTable(const String & database_name, const String & table_name, bool if_exists); BlockIO executeToTemporaryTable(const String & table_name, ASTDropQuery::Kind kind); diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h index c33dc5bdb57..d3007813218 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectQuery.h @@ -168,7 +168,7 @@ private: void executeMergeAggregated(Pipeline & pipeline, bool overflow_row, bool final); void executeTotalsAndHaving(Pipeline & pipeline, bool has_having, const ExpressionActionsPtr & expression, bool overflow_row, bool final); void executeHaving(Pipeline & pipeline, const ExpressionActionsPtr & expression); - void executeExpression(Pipeline & pipeline, const ExpressionActionsPtr & expression); + static void executeExpression(Pipeline & pipeline, const ExpressionActionsPtr & expression); void executeOrder(Pipeline & pipeline, InputSortingInfoPtr sorting_info); void executeWithFill(Pipeline & pipeline); void executeMergeSorted(Pipeline & pipeline); @@ -176,7 +176,7 @@ private: void executeUnion(Pipeline & pipeline, Block header); void executeLimitBy(Pipeline & pipeline); void executeLimit(Pipeline & pipeline); - void executeProjection(Pipeline & pipeline, const ExpressionActionsPtr & expression); + static void executeProjection(Pipeline & pipeline, const ExpressionActionsPtr & expression); void executeDistinct(Pipeline & pipeline, bool before_order, Names columns); void executeExtremes(Pipeline & pipeline); void executeSubqueriesInSetsAndJoins(Pipeline & pipeline, const std::unordered_map & subqueries_for_sets); @@ -187,14 +187,14 @@ private: void executeMergeAggregated(QueryPipeline & pipeline, bool overflow_row, bool final); void executeTotalsAndHaving(QueryPipeline & pipeline, bool has_having, const ExpressionActionsPtr & expression, bool overflow_row, bool final); void executeHaving(QueryPipeline & pipeline, const ExpressionActionsPtr & expression); - void executeExpression(QueryPipeline & pipeline, const ExpressionActionsPtr & expression); + static void executeExpression(QueryPipeline & pipeline, const ExpressionActionsPtr & expression); void executeOrder(QueryPipeline & pipeline, InputSortingInfoPtr sorting_info); void executeWithFill(QueryPipeline & pipeline); void executeMergeSorted(QueryPipeline & pipeline); void executePreLimit(QueryPipeline & pipeline, bool do_not_skip_offset); void executeLimitBy(QueryPipeline & pipeline); void executeLimit(QueryPipeline & pipeline); - void executeProjection(QueryPipeline & pipeline, const ExpressionActionsPtr & expression); + static void executeProjection(QueryPipeline & pipeline, const ExpressionActionsPtr & expression); void executeDistinct(QueryPipeline & pipeline, bool before_order, Names columns); void executeExtremes(QueryPipeline & pipeline); void executeSubqueriesInSetsAndJoins(QueryPipeline & pipeline, const std::unordered_map & subqueries_for_sets); diff --git a/dbms/src/Interpreters/InterpreterWatchQuery.cpp b/dbms/src/Interpreters/InterpreterWatchQuery.cpp index f9be3e22c55..91e7bfac2a3 100644 --- a/dbms/src/Interpreters/InterpreterWatchQuery.cpp +++ b/dbms/src/Interpreters/InterpreterWatchQuery.cpp @@ -28,10 +28,6 @@ namespace ErrorCodes extern const int SUPPORT_IS_DISABLED; } -BlockInputStreamPtr InterpreterWatchQuery::executeImpl() -{ - return std::make_shared(Block()); -} BlockIO InterpreterWatchQuery::execute() { diff --git a/dbms/src/Interpreters/InterpreterWatchQuery.h b/dbms/src/Interpreters/InterpreterWatchQuery.h index 9315ee1f889..7a5d57a1cf5 100644 --- a/dbms/src/Interpreters/InterpreterWatchQuery.h +++ b/dbms/src/Interpreters/InterpreterWatchQuery.h @@ -39,7 +39,6 @@ private: ASTPtr query_ptr; Context & context; - BlockInputStreamPtr executeImpl(); /// Table from where to read data, if not subquery. StoragePtr storage; /// Streams of read data diff --git a/dbms/src/Interpreters/LogicalExpressionsOptimizer.h b/dbms/src/Interpreters/LogicalExpressionsOptimizer.h index fa5289b3f5f..1a04b199a13 100644 --- a/dbms/src/Interpreters/LogicalExpressionsOptimizer.h +++ b/dbms/src/Interpreters/LogicalExpressionsOptimizer.h @@ -81,7 +81,7 @@ private: bool mayOptimizeDisjunctiveEqualityChain(const DisjunctiveEqualityChain & chain) const; /// Insert the IN expression into the OR chain. - void addInExpression(const DisjunctiveEqualityChain & chain); + static void addInExpression(const DisjunctiveEqualityChain & chain); /// Delete the equalities that were replaced by the IN expressions. void cleanupOrExpressions(); diff --git a/dbms/src/Interpreters/SelectQueryOptions.h b/dbms/src/Interpreters/SelectQueryOptions.h index a49245f5609..ed6f759a7d5 100644 --- a/dbms/src/Interpreters/SelectQueryOptions.h +++ b/dbms/src/Interpreters/SelectQueryOptions.h @@ -45,9 +45,9 @@ struct SelectQueryOptions return out; } - SelectQueryOptions & analyze(bool value = true) + SelectQueryOptions & analyze(bool dry_run = true) { - only_analyze = value; + only_analyze = dry_run; return *this; } diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index ce6f163ab5b..0379d928442 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -887,7 +887,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(ASTPtr & query, const NamesAndTy return std::make_shared(result); } -void SyntaxAnalyzer::normalize(ASTPtr & query, Aliases & aliases, const Settings & settings) const +void SyntaxAnalyzer::normalize(ASTPtr & query, Aliases & aliases, const Settings & settings) { CustomizeFunctionsVisitor::Data data{settings.count_distinct_implementation}; CustomizeFunctionsVisitor(data).visit(query); diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.h b/dbms/src/Interpreters/SyntaxAnalyzer.h index 671e91b663f..1578b261571 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.h +++ b/dbms/src/Interpreters/SyntaxAnalyzer.h @@ -99,7 +99,7 @@ public: private: const Context & context; - void normalize(ASTPtr & query, Aliases & aliases, const Settings & settings) const; + static void normalize(ASTPtr & query, Aliases & aliases, const Settings & settings); }; } diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h index bcd032938aa..51c6c2c42f0 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h @@ -64,7 +64,7 @@ struct RestoreQualifiedNamesData { using TypeToVisit = ASTIdentifier; - void visit(ASTIdentifier & identifier, ASTPtr & ast); + static void visit(ASTIdentifier & identifier, ASTPtr & ast); }; using RestoreQualifiedNamesMatcher = OneTypeMatcher; diff --git a/dbms/src/Interpreters/tests/hash_map_string.cpp b/dbms/src/Interpreters/tests/hash_map_string.cpp index 6c1f6b813ff..f6103556986 100644 --- a/dbms/src/Interpreters/tests/hash_map_string.cpp +++ b/dbms/src/Interpreters/tests/hash_map_string.cpp @@ -277,7 +277,7 @@ struct Grower : public HashTableGrower<> } /// Set the buffer size by the number of elements in the hash table. Used when deserializing a hash table. - [[noreturn]] void set(size_t /*num_elems*/) + [[noreturn]] static void set(size_t /*num_elems*/) { throw Poco::Exception(__PRETTY_FUNCTION__); } diff --git a/dbms/src/Parsers/ASTWithAlias.cpp b/dbms/src/Parsers/ASTWithAlias.cpp index 0239d0b34cd..ad93102e1b7 100644 --- a/dbms/src/Parsers/ASTWithAlias.cpp +++ b/dbms/src/Parsers/ASTWithAlias.cpp @@ -6,11 +6,11 @@ namespace DB { -void ASTWithAlias::writeAlias(const String & name, const FormatSettings & settings) const +static void writeAlias(const String & name, const ASTWithAlias::FormatSettings & settings) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS " << (settings.hilite ? hilite_alias : ""); + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " AS " << (settings.hilite ? IAST::hilite_alias : ""); settings.writeIdentifier(name); - settings.ostr << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? IAST::hilite_none : ""); } diff --git a/dbms/src/Parsers/ASTWithAlias.h b/dbms/src/Parsers/ASTWithAlias.h index f7a777e0a43..7a272a157e2 100644 --- a/dbms/src/Parsers/ASTWithAlias.h +++ b/dbms/src/Parsers/ASTWithAlias.h @@ -32,8 +32,6 @@ public: protected: virtual void appendColumnNameImpl(WriteBuffer & ostr) const = 0; - - void writeAlias(const String & name, const FormatSettings & settings) const; }; /// helper for setting aliases and chaining result to other functions diff --git a/dbms/src/Parsers/ParserDropQuery.cpp b/dbms/src/Parsers/ParserDropQuery.cpp index c82b4d4997c..b4ff0cb3804 100644 --- a/dbms/src/Parsers/ParserDropQuery.cpp +++ b/dbms/src/Parsers/ParserDropQuery.cpp @@ -8,49 +8,10 @@ namespace DB { -namespace ErrorCodes +namespace { -} -bool ParserDropQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ParserKeyword s_drop("DROP"); - ParserKeyword s_detach("DETACH"); - ParserKeyword s_truncate("TRUNCATE"); - - if (s_drop.ignore(pos, expected)) - return parseDropQuery(pos, node, expected); - else if (s_detach.ignore(pos, expected)) - return parseDetachQuery(pos, node, expected); - else if (s_truncate.ignore(pos, expected)) - return parseTruncateQuery(pos, node, expected); - else - return false; -} - -bool ParserDropQuery::parseDetachQuery(Pos & pos, ASTPtr & node, Expected & expected) -{ - if (parseDropQuery(pos, node, expected)) - { - auto * drop_query = node->as(); - drop_query->kind = ASTDropQuery::Kind::Detach; - return true; - } - return false; -} - -bool ParserDropQuery::parseTruncateQuery(Pos & pos, ASTPtr & node, Expected & expected) -{ - if (parseDropQuery(pos, node, expected)) - { - auto * drop_query = node->as(); - drop_query->kind = ASTDropQuery::Kind::Truncate; - return true; - } - return false; -} - -bool ParserDropQuery::parseDropQuery(Pos & pos, ASTPtr & node, Expected & expected) +bool parseDropQuery(IParser::Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword s_temporary("TEMPORARY"); ParserKeyword s_table("TABLE"); @@ -129,4 +90,44 @@ bool ParserDropQuery::parseDropQuery(Pos & pos, ASTPtr & node, Expected & expect return true; } +bool parseDetachQuery(IParser::Pos & pos, ASTPtr & node, Expected & expected) +{ + if (parseDropQuery(pos, node, expected)) + { + auto * drop_query = node->as(); + drop_query->kind = ASTDropQuery::Kind::Detach; + return true; + } + return false; +} + +bool parseTruncateQuery(IParser::Pos & pos, ASTPtr & node, Expected & expected) +{ + if (parseDropQuery(pos, node, expected)) + { + auto * drop_query = node->as(); + drop_query->kind = ASTDropQuery::Kind::Truncate; + return true; + } + return false; +} + +} + +bool ParserDropQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKeyword s_drop("DROP"); + ParserKeyword s_detach("DETACH"); + ParserKeyword s_truncate("TRUNCATE"); + + if (s_drop.ignore(pos, expected)) + return parseDropQuery(pos, node, expected); + else if (s_detach.ignore(pos, expected)) + return parseDetachQuery(pos, node, expected); + else if (s_truncate.ignore(pos, expected)) + return parseTruncateQuery(pos, node, expected); + else + return false; +} + } diff --git a/dbms/src/Parsers/ParserDropQuery.h b/dbms/src/Parsers/ParserDropQuery.h index 704f93de1cf..069b9c34ddd 100644 --- a/dbms/src/Parsers/ParserDropQuery.h +++ b/dbms/src/Parsers/ParserDropQuery.h @@ -21,10 +21,6 @@ class ParserDropQuery : public IParserBase protected: const char * getName() const override{ return "DROP query"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - - bool parseDropQuery(Pos & pos, ASTPtr & node, Expected & expected); - bool parseDetachQuery(Pos & pos, ASTPtr & node, Expected & expected); - bool parseTruncateQuery(Pos & pos, ASTPtr & node, Expected & expected); }; } diff --git a/dbms/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/dbms/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index 83e449ee368..42f3eb3f94e 100644 --- a/dbms/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/dbms/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -128,7 +128,7 @@ private: return true; } - void setDataType(LiteralInfo & info) + static void setDataType(LiteralInfo & info) { /// Type (Field::Types:Which) of literal in AST can be: String, UInt64, Int64, Float64, Null or Array of simple literals (not of Arrays). /// Null and empty Array literals are considered as tokens, because template with Nullable(Nothing) or Array(Nothing) is useless. diff --git a/dbms/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp b/dbms/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp index ae16bc47e11..369c0740264 100644 --- a/dbms/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp +++ b/dbms/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp @@ -12,12 +12,19 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -DB::RowInputFormatWithDiagnosticInfo::RowInputFormatWithDiagnosticInfo(const Block & header_, ReadBuffer & in_, const Params & params_) +static String alignedName(const String & name, size_t max_length) +{ + size_t spaces_count = max_length >= name.size() ? max_length - name.size() : 0; + return name + ", " + std::string(spaces_count, ' '); +} + + +RowInputFormatWithDiagnosticInfo::RowInputFormatWithDiagnosticInfo(const Block & header_, ReadBuffer & in_, const Params & params_) : IRowInputFormat(header_, in_, params_) { } -void DB::RowInputFormatWithDiagnosticInfo::updateDiagnosticInfo() +void RowInputFormatWithDiagnosticInfo::updateDiagnosticInfo() { ++row_num; @@ -28,7 +35,7 @@ void DB::RowInputFormatWithDiagnosticInfo::updateDiagnosticInfo() offset_of_current_row = in.offset(); } -String DB::RowInputFormatWithDiagnosticInfo::getDiagnosticInfo() +String RowInputFormatWithDiagnosticInfo::getDiagnosticInfo() { if (in.eof()) /// Buffer has gone, cannot extract information about what has been parsed. return {}; @@ -158,12 +165,6 @@ bool RowInputFormatWithDiagnosticInfo::deserializeFieldAndPrintDiagnosticInfo(co return true; } -String RowInputFormatWithDiagnosticInfo::alignedName(const String & name, size_t max_length) const -{ - size_t spaces_count = max_length >= name.size() ? max_length - name.size() : 0; - return name + ", " + std::string(spaces_count, ' '); -} - void RowInputFormatWithDiagnosticInfo::resetParser() { IRowInputFormat::resetParser(); diff --git a/dbms/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.h b/dbms/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.h index 91f52f93c78..1d502ddc281 100644 --- a/dbms/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.h +++ b/dbms/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.h @@ -22,7 +22,6 @@ protected: void updateDiagnosticInfo(); bool deserializeFieldAndPrintDiagnosticInfo(const String & col_name, const DataTypePtr & type, IColumn & column, WriteBuffer & out, size_t file_column); - String alignedName(const String & name, size_t max_length) const; virtual bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) = 0; virtual void tryDeserializeFiled(const DataTypePtr & type, IColumn & column, size_t file_column, diff --git a/dbms/src/Processors/QueryPipeline.h b/dbms/src/Processors/QueryPipeline.h index 12ac80b9feb..be90e07f281 100644 --- a/dbms/src/Processors/QueryPipeline.h +++ b/dbms/src/Processors/QueryPipeline.h @@ -195,7 +195,7 @@ private: QueryStatus * process_list_element = nullptr; void checkInitialized(); - void checkSource(const ProcessorPtr & source, bool can_have_totals); + static void checkSource(const ProcessorPtr & source, bool can_have_totals); template void addSimpleTransformImpl(const TProcessorGetter & getter); diff --git a/dbms/src/Processors/ResizeProcessor.cpp b/dbms/src/Processors/ResizeProcessor.cpp index 25184b1ac5c..fd3d6510aa8 100644 --- a/dbms/src/Processors/ResizeProcessor.cpp +++ b/dbms/src/Processors/ResizeProcessor.cpp @@ -350,7 +350,7 @@ IProcessor::Status StrictResizeProcessor::prepare(const PortNumbers & updated_in if (waiting_output.status != OutputStatus::NeedData) throw Exception("Invalid status for associated output.", ErrorCodes::LOGICAL_ERROR); - waiting_output.port->pushData(input_with_data.port->pullData(/* set_not_deeded = */ true)); + waiting_output.port->pushData(input_with_data.port->pullData(/* set_not_needed = */ true)); waiting_output.status = OutputStatus::NotActive; if (input_with_data.port->isFinished()) diff --git a/dbms/src/Storages/MergeTree/KeyCondition.h b/dbms/src/Storages/MergeTree/KeyCondition.h index 3344d22c749..8667e0aea27 100644 --- a/dbms/src/Storages/MergeTree/KeyCondition.h +++ b/dbms/src/Storages/MergeTree/KeyCondition.h @@ -382,7 +382,7 @@ private: void traverseAST(const ASTPtr & node, const Context & context, Block & block_with_constants); bool tryParseAtomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out); - bool tryParseLogicalOperatorFromAST(const ASTFunction * func, RPNElement & out); + static bool tryParseLogicalOperatorFromAST(const ASTFunction * func, RPNElement & out); /** Is node the key column * or expression in which column of key is wrapped by chain of functions, diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 930eac69033..f7e9cb80103 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -2753,14 +2753,7 @@ MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const String & part_na } -MergeTreeData::MutableDataPartPtr MergeTreeData::loadPartAndFixMetadata(const DiskPtr & disk, const String & relative_path) -{ - MutableDataPartPtr part = createPart(Poco::Path(relative_path).getFileName(), disk, relative_path); - loadPartAndFixMetadata(part); - return part; -} - -void MergeTreeData::loadPartAndFixMetadata(MutableDataPartPtr part) +static void loadPartAndFixMetadataImpl(MergeTreeData::MutableDataPartPtr part) { String full_part_path = part->getFullPath(); @@ -2786,6 +2779,13 @@ void MergeTreeData::loadPartAndFixMetadata(MutableDataPartPtr part) } } +MergeTreeData::MutableDataPartPtr MergeTreeData::loadPartAndFixMetadata(const DiskPtr & disk, const String & relative_path) +{ + MutableDataPartPtr part = createPart(Poco::Path(relative_path).getFileName(), disk, relative_path); + loadPartAndFixMetadataImpl(part); + return part; +} + void MergeTreeData::calculateColumnSizesImpl() { @@ -3233,7 +3233,7 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const { LOG_DEBUG(log, "Checking part " << part_names.second); MutableDataPartPtr part = createPart(part_names.first, name_to_disk[part_names.first], source_dir + part_names.second); - loadPartAndFixMetadata(part); + loadPartAndFixMetadataImpl(part); loaded_parts.push_back(part); } @@ -3257,25 +3257,20 @@ inline ReservationPtr checkAndReturnReservation(UInt64 expected_size, Reservatio ReservationPtr MergeTreeData::reserveSpace(UInt64 expected_size) const { expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size); - auto reservation = getStoragePolicy()->reserve(expected_size); - return checkAndReturnReservation(expected_size, std::move(reservation)); } -ReservationPtr MergeTreeData::reserveSpace(UInt64 expected_size, SpacePtr space) const +ReservationPtr MergeTreeData::reserveSpace(UInt64 expected_size, SpacePtr space) { expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size); - auto reservation = tryReserveSpace(expected_size, space); - return checkAndReturnReservation(expected_size, std::move(reservation)); } -ReservationPtr MergeTreeData::tryReserveSpace(UInt64 expected_size, SpacePtr space) const +ReservationPtr MergeTreeData::tryReserveSpace(UInt64 expected_size, SpacePtr space) { expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size); - return space->reserve(expected_size); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 87a42a4d624..079fb316ffd 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -601,7 +601,6 @@ public: /// Check that the part is not broken and calculate the checksums for it if they are not present. MutableDataPartPtr loadPartAndFixMetadata(const DiskPtr & disk, const String & relative_path); - void loadPartAndFixMetadata(MutableDataPartPtr part); /** Create local backup (snapshot) for parts with specified prefix. * Backup is created in directory clickhouse_dir/shadow/i/, where i - incremental number, @@ -689,8 +688,8 @@ public: ReservationPtr reserveSpace(UInt64 expected_size) const; /// Reserves space at least 1MB on specific disk or volume. - ReservationPtr reserveSpace(UInt64 expected_size, SpacePtr space) const; - ReservationPtr tryReserveSpace(UInt64 expected_size, SpacePtr space) const; + static ReservationPtr reserveSpace(UInt64 expected_size, SpacePtr space); + static ReservationPtr tryReserveSpace(UInt64 expected_size, SpacePtr space); /// Reserves space at least 1MB preferring best destination according to `ttl_infos`. ReservationPtr reserveSpacePreferringTTLRules(UInt64 expected_size, diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 3de470abc20..ac073e76217 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -1318,7 +1318,7 @@ void MergeTreeDataMergerMutator::splitMutationCommands( MergeTreeData::DataPartPtr part, const MutationCommands & commands, MutationCommands & for_interpreter, - MutationCommands & for_file_renames) const + MutationCommands & for_file_renames) { NameSet removed_columns_from_compact_part; NameSet already_changed_columns; @@ -1379,7 +1379,7 @@ void MergeTreeDataMergerMutator::splitMutationCommands( NameSet MergeTreeDataMergerMutator::collectFilesToRemove( - MergeTreeData::DataPartPtr source_part, const MutationCommands & commands_for_removes, const String & mrk_extension) const + MergeTreeData::DataPartPtr source_part, const MutationCommands & commands_for_removes, const String & mrk_extension) { /// Collect counts for shared streams of different columns. As an example, Nested columns have shared stream with array sizes. std::map stream_counts; @@ -1393,7 +1393,6 @@ NameSet MergeTreeDataMergerMutator::collectFilesToRemove( {}); } - NameSet remove_files; /// Remove old indices for (const auto & command : commands_for_removes) @@ -1422,11 +1421,12 @@ NameSet MergeTreeDataMergerMutator::collectFilesToRemove( column->type->enumerateStreams(callback, stream_path); } } + return remove_files; } NameSet MergeTreeDataMergerMutator::collectFilesToSkip( - const Block & updated_header, const std::set & indices_to_recalc, const String & mrk_extension) const + const Block & updated_header, const std::set & indices_to_recalc, const String & mrk_extension) { NameSet files_to_skip = {"checksums.txt", "columns.txt"}; @@ -1454,7 +1454,7 @@ NameSet MergeTreeDataMergerMutator::collectFilesToSkip( NamesAndTypesList MergeTreeDataMergerMutator::getColumnsForNewDataPart( - MergeTreeData::DataPartPtr source_part, const Block & updated_header, NamesAndTypesList all_columns) const + MergeTreeData::DataPartPtr source_part, const Block & updated_header, NamesAndTypesList all_columns) { Names source_column_names = source_part->getColumns().getNames(); NameSet source_columns_name_set(source_column_names.begin(), source_column_names.end()); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index d55298bb944..8552df8cc04 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -138,23 +138,23 @@ private: * First part should be executed by mutations interpreter. * Other is just simple drop/renames, so they can be executed without interpreter. */ - void splitMutationCommands( + static void splitMutationCommands( MergeTreeData::DataPartPtr part, const MutationCommands & commands, MutationCommands & for_interpreter, - MutationCommands & for_file_renames) const; + MutationCommands & for_file_renames); /// Apply commands to source_part i.e. remove some columns in source_part /// and return set of files, that have to be removed from filesystem and checksums - NameSet collectFilesToRemove(MergeTreeData::DataPartPtr source_part, const MutationCommands & commands_for_removes, const String & mrk_extension) const; + static NameSet collectFilesToRemove(MergeTreeData::DataPartPtr source_part, const MutationCommands & commands_for_removes, const String & mrk_extension); /// Files, that we don't need to remove and don't need to hardlink, for example columns.txt and checksums.txt. /// Because we will generate new versions of them after we perform mutation. - NameSet collectFilesToSkip(const Block & updated_header, const std::set & indices_to_recalc, const String & mrk_extension) const; + static NameSet collectFilesToSkip(const Block & updated_header, const std::set & indices_to_recalc, const String & mrk_extension); /// Get the columns list of the resulting part in the same order as all_columns. - NamesAndTypesList getColumnsForNewDataPart(MergeTreeData::DataPartPtr source_part, const Block & updated_header, NamesAndTypesList all_columns) const; + static NamesAndTypesList getColumnsForNewDataPart(MergeTreeData::DataPartPtr source_part, const Block & updated_header, NamesAndTypesList all_columns); bool shouldExecuteTTL(const Names & columns, const MutationCommands & commands) const; diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp index e66accb2f92..6e76333da9d 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp @@ -17,6 +17,21 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +static void assertGranuleBlocksStructure(const Blocks & granule_index_blocks) +{ + Block prev_block; + for (size_t index = 0; index < granule_index_blocks.size(); ++index) + { + Block granule_index_block = granule_index_blocks[index]; + + if (index != 0) + assertBlocksHaveEqualStructure(prev_block, granule_index_block, "Granule blocks of bloom filter has difference structure."); + + prev_block = granule_index_block; + } +} + + MergeTreeIndexGranuleBloomFilter::MergeTreeIndexGranuleBloomFilter(size_t bits_per_row_, size_t hash_functions_, size_t index_columns_) : bits_per_row(bits_per_row_), hash_functions(hash_functions_) { @@ -96,20 +111,6 @@ void MergeTreeIndexGranuleBloomFilter::serializeBinary(WriteBuffer & ostr) const ostr.write(reinterpret_cast(bloom_filter->getFilter().data()), bytes_size); } -void MergeTreeIndexGranuleBloomFilter::assertGranuleBlocksStructure(const Blocks & granule_index_blocks) const -{ - Block prev_block; - for (size_t index = 0; index < granule_index_blocks.size(); ++index) - { - Block granule_index_block = granule_index_blocks[index]; - - if (index != 0) - assertBlocksHaveEqualStructure(prev_block, granule_index_block, "Granule blocks of bloom filter has difference structure."); - - prev_block = granule_index_block; - } -} - void MergeTreeIndexGranuleBloomFilter::fillingBloomFilter(BloomFilterPtr & bf, const Block & granule_index_block, size_t index_hash_column) { const auto & column = granule_index_block.getByPosition(index_hash_column); diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h b/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h index 782f84170c7..5524c904309 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h @@ -27,8 +27,6 @@ private: size_t hash_functions; std::vector bloom_filters; - void assertGranuleBlocksStructure(const Blocks & granule_index_blocks) const; - void fillingBloomFilter(BloomFilterPtr & bf, const Block & granule_index_block, size_t index_hash_column); }; diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexSet.cpp index 843b1e6fc98..f68184e2691 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -342,7 +342,7 @@ bool MergeTreeIndexConditionSet::atomFromAST(ASTPtr & node) const return false; } -bool MergeTreeIndexConditionSet::operatorFromAST(ASTPtr & node) const +bool MergeTreeIndexConditionSet::operatorFromAST(ASTPtr & node) { /// Functions AND, OR, NOT. Replace with bit*. auto * func = node->as(); diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexSet.h b/dbms/src/Storages/MergeTree/MergeTreeIndexSet.h index fc480e4024a..5b0448d13be 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexSet.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexSet.h @@ -78,7 +78,7 @@ public: private: void traverseAST(ASTPtr & node) const; bool atomFromAST(ASTPtr & node) const; - bool operatorFromAST(ASTPtr & node) const; + static bool operatorFromAST(ASTPtr & node); bool checkASTUseless(const ASTPtr & node, bool atomic = false) const; diff --git a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp index 7718ed642d3..b74fb5d1f5d 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -15,6 +15,25 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } + +static void filterColumns(Columns & columns, const IColumn::Filter & filter) +{ + for (auto & column : columns) + { + if (column) + { + column = column->filter(filter, -1); + + if (column->empty()) + { + columns.clear(); + return; + } + } + } +} + + MergeTreeRangeReader::DelayedStream::DelayedStream( size_t from_mark, IMergeTreeReader * merge_tree_reader_) : current_mark(from_mark), current_offset(0), num_delayed_rows(0) @@ -682,22 +701,6 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar return read_result; } -void MergeTreeRangeReader::filterColumns(Columns & columns, const IColumn::Filter & filter) const -{ - for (auto & column : columns) - { - if (column) - { - column = column->filter(filter, -1); - - if (column->empty()) - { - columns.clear(); - return; - } - } - } -} MergeTreeRangeReader::ReadResult MergeTreeRangeReader::startReadingChain(size_t max_rows, MarkRanges & ranges) { diff --git a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h index affdb00147a..0af19e70c2b 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h +++ b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h @@ -213,7 +213,6 @@ private: ReadResult startReadingChain(size_t max_rows, MarkRanges & ranges); Columns continueReadingChain(ReadResult & result, size_t & num_rows); void executePrewhereActionsAndFilterColumns(ReadResult & result); - void filterColumns(Columns & columns, const IColumn::Filter & filter) const; IMergeTreeReader * merge_tree_reader = nullptr; const MergeTreeIndexGranularity * index_granularity = nullptr; diff --git a/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index b3660674362..8f8b5111f48 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -71,6 +71,55 @@ static void collectIdentifiersNoSubqueries(const ASTPtr & ast, NameSet & set) collectIdentifiersNoSubqueries(child, set); } +static bool isConditionGood(const ASTPtr & condition) +{ + const auto * function = condition->as(); + if (!function) + return false; + + /** we are only considering conditions of form `equals(one, another)` or `one = another`, + * especially if either `one` or `another` is ASTIdentifier */ + if (function->name != "equals") + return false; + + auto left_arg = function->arguments->children.front().get(); + auto right_arg = function->arguments->children.back().get(); + + /// try to ensure left_arg points to ASTIdentifier + if (!left_arg->as() && right_arg->as()) + std::swap(left_arg, right_arg); + + if (left_arg->as()) + { + /// condition may be "good" if only right_arg is a constant and its value is outside the threshold + if (const auto * literal = right_arg->as()) + { + const auto & field = literal->value; + const auto type = field.getType(); + + /// check the value with respect to threshold + if (type == Field::Types::UInt64) + { + const auto value = field.get(); + return value > threshold; + } + else if (type == Field::Types::Int64) + { + const auto value = field.get(); + return value < -threshold || threshold < value; + } + else if (type == Field::Types::Float64) + { + const auto value = field.get(); + return value < threshold || threshold < value; + } + } + } + + return false; +} + + void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const ASTPtr & node) const { if (const auto * func_and = node->as(); func_and && func_and->name == "and") @@ -116,7 +165,7 @@ MergeTreeWhereOptimizer::Conditions MergeTreeWhereOptimizer::analyze(const ASTPt } /// Transform Conditions list to WHERE or PREWHERE expression. -ASTPtr MergeTreeWhereOptimizer::reconstruct(const Conditions & conditions) const +ASTPtr MergeTreeWhereOptimizer::reconstruct(const Conditions & conditions) { if (conditions.empty()) return {}; @@ -205,55 +254,6 @@ UInt64 MergeTreeWhereOptimizer::getIdentifiersColumnSize(const NameSet & identif } -bool MergeTreeWhereOptimizer::isConditionGood(const ASTPtr & condition) const -{ - const auto * function = condition->as(); - if (!function) - return false; - - /** we are only considering conditions of form `equals(one, another)` or `one = another`, - * especially if either `one` or `another` is ASTIdentifier */ - if (function->name != "equals") - return false; - - auto left_arg = function->arguments->children.front().get(); - auto right_arg = function->arguments->children.back().get(); - - /// try to ensure left_arg points to ASTIdentifier - if (!left_arg->as() && right_arg->as()) - std::swap(left_arg, right_arg); - - if (left_arg->as()) - { - /// condition may be "good" if only right_arg is a constant and its value is outside the threshold - if (const auto * literal = right_arg->as()) - { - const auto & field = literal->value; - const auto type = field.getType(); - - /// check the value with respect to threshold - if (type == Field::Types::UInt64) - { - const auto value = field.get(); - return value > threshold; - } - else if (type == Field::Types::Int64) - { - const auto value = field.get(); - return value < -threshold || threshold < value; - } - else if (type == Field::Types::Float64) - { - const auto value = field.get(); - return value < threshold || threshold < value; - } - } - } - - return false; -} - - bool MergeTreeWhereOptimizer::hasPrimaryKeyAtoms(const ASTPtr & ast) const { if (const auto * func = ast->as()) diff --git a/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.h index 6ec81592ee1..f9fdc01812b 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.h +++ b/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.h @@ -65,7 +65,7 @@ private: Conditions analyze(const ASTPtr & expression) const; /// Transform Conditions list to WHERE or PREWHERE expression. - ASTPtr reconstruct(const Conditions & conditions) const; + static ASTPtr reconstruct(const Conditions & conditions); void calculateColumnSizes(const MergeTreeData & data, const Names & column_names); @@ -75,8 +75,6 @@ private: UInt64 getIdentifiersColumnSize(const NameSet & identifiers) const; - bool isConditionGood(const ASTPtr & condition) const; - bool hasPrimaryKeyAtoms(const ASTPtr & ast) const; bool isPrimaryKeyAtom(const ASTPtr & ast) const; diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index fda10029fe4..244bca37399 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -318,13 +318,8 @@ StoragePtr StorageDistributed::createWithOwnCluster( return res; } -QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(const Context & context) const -{ - auto cluster = getCluster(); - return getQueryProcessingStage(context, cluster); -} -QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(const Context & context, const ClusterPtr & cluster) const +static QueryProcessingStage::Enum getQueryProcessingStageImpl(const Context & context, const ClusterPtr & cluster) { const Settings & settings = context.getSettingsRef(); @@ -339,6 +334,12 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(const Con : QueryProcessingStage::WithMergeableState; } +QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(const Context & context) const +{ + auto cluster = getCluster(); + return getQueryProcessingStageImpl(context, cluster); +} + Pipes StorageDistributed::read( const Names & column_names, const SelectQueryInfo & query_info, diff --git a/dbms/src/Storages/StorageDistributed.h b/dbms/src/Storages/StorageDistributed.h index 27374729db5..ee9b1e8f260 100644 --- a/dbms/src/Storages/StorageDistributed.h +++ b/dbms/src/Storages/StorageDistributed.h @@ -67,7 +67,6 @@ public: bool isRemote() const override { return true; } QueryProcessingStage::Enum getQueryProcessingStage(const Context & context) const override; - QueryProcessingStage::Enum getQueryProcessingStage(const Context & context, const ClusterPtr & cluster) const; Pipes read( const Names & column_names, diff --git a/dbms/src/Storages/StorageMySQL.cpp b/dbms/src/Storages/StorageMySQL.cpp index 1991215ad12..84b554115a6 100644 --- a/dbms/src/Storages/StorageMySQL.cpp +++ b/dbms/src/Storages/StorageMySQL.cpp @@ -175,7 +175,7 @@ public: return splitted_blocks; } - std::string dumpNamesWithBackQuote(const Block & block) const + static std::string dumpNamesWithBackQuote(const Block & block) { WriteBufferFromOwnString out; for (auto it = block.begin(); it != block.end(); ++it) @@ -187,7 +187,6 @@ public: return out.str(); } - private: const StorageMySQL & storage; std::string remote_database_name; diff --git a/dbms/src/Storages/StorageView.cpp b/dbms/src/Storages/StorageView.cpp index e99bbcb843a..05feeb7d786 100644 --- a/dbms/src/Storages/StorageView.cpp +++ b/dbms/src/Storages/StorageView.cpp @@ -91,6 +91,28 @@ ASTPtr StorageView::getRuntimeViewQuery(const ASTSelectQuery & outer_query, cons } +static void replaceTableNameWithSubquery(ASTSelectQuery * select_query, ASTPtr & subquery) +{ + auto * select_element = select_query->tables()->children[0]->as(); + + if (!select_element->table_expression) + throw Exception("Logical error: incorrect table expression", ErrorCodes::LOGICAL_ERROR); + + auto * table_expression = select_element->table_expression->as(); + + if (!table_expression->database_and_table_name) + throw Exception("Logical error: incorrect table expression", ErrorCodes::LOGICAL_ERROR); + + const auto alias = table_expression->database_and_table_name->tryGetAlias(); + table_expression->database_and_table_name = {}; + table_expression->subquery = std::make_shared(); + table_expression->subquery->children.push_back(subquery); + table_expression->children.push_back(table_expression->subquery); + if (!alias.empty()) + table_expression->subquery->setAlias(alias); +} + + ASTPtr StorageView::getRuntimeViewQuery(ASTSelectQuery * outer_query, const Context & context, bool normalize) { auto runtime_view_query = inner_query->clone(); @@ -117,28 +139,6 @@ ASTPtr StorageView::getRuntimeViewQuery(ASTSelectQuery * outer_query, const Cont return runtime_view_query; } -void StorageView::replaceTableNameWithSubquery(ASTSelectQuery * select_query, ASTPtr & subquery) -{ - auto * select_element = select_query->tables()->children[0]->as(); - - if (!select_element->table_expression) - throw Exception("Logical error: incorrect table expression", ErrorCodes::LOGICAL_ERROR); - - auto * table_expression = select_element->table_expression->as(); - - if (!table_expression->database_and_table_name) - throw Exception("Logical error: incorrect table expression", ErrorCodes::LOGICAL_ERROR); - - const auto alias = table_expression->database_and_table_name->tryGetAlias(); - table_expression->database_and_table_name = {}; - table_expression->subquery = std::make_shared(); - table_expression->subquery->children.push_back(subquery); - table_expression->children.push_back(table_expression->subquery); - if (!alias.empty()) - table_expression->subquery->setAlias(alias); -} - - void registerStorageView(StorageFactory & factory) { factory.registerStorage("View", [](const StorageFactory::Arguments & args) diff --git a/dbms/src/Storages/StorageView.h b/dbms/src/Storages/StorageView.h index 0e1e90ff612..372523f569a 100644 --- a/dbms/src/Storages/StorageView.h +++ b/dbms/src/Storages/StorageView.h @@ -36,8 +36,6 @@ public: private: ASTPtr inner_query; - void replaceTableNameWithSubquery(ASTSelectQuery * select_query, ASTPtr & subquery); - protected: StorageView( const StorageID & table_id_, diff --git a/dbms/src/Storages/System/StorageSystemGraphite.cpp b/dbms/src/Storages/System/StorageSystemGraphite.cpp index b5023cff7c3..a6163f89182 100644 --- a/dbms/src/Storages/System/StorageSystemGraphite.cpp +++ b/dbms/src/Storages/System/StorageSystemGraphite.cpp @@ -25,10 +25,10 @@ NamesAndTypesList StorageSystemGraphite::getNamesAndTypes() /* * Looking for (Replicated)*GraphiteMergeTree and get all configuration parameters for them */ -StorageSystemGraphite::Configs StorageSystemGraphite::getConfigs(const Context & context) const +static StorageSystemGraphite::Configs getConfigs(const Context & context) { const Databases databases = DatabaseCatalog::instance().getDatabases(); - Configs graphite_configs; + StorageSystemGraphite::Configs graphite_configs; for (const auto & db : databases) { @@ -51,7 +51,7 @@ StorageSystemGraphite::Configs StorageSystemGraphite::getConfigs(const Context & auto table_id = table_data->getStorageID(); if (!graphite_configs.count(config_name)) { - Config new_config = + StorageSystemGraphite::Config new_config = { table_data->merging_params.graphite_params, { table_id.database_name }, @@ -73,7 +73,7 @@ StorageSystemGraphite::Configs StorageSystemGraphite::getConfigs(const Context & void StorageSystemGraphite::fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo &) const { - Configs graphite_configs = StorageSystemGraphite::getConfigs(context); + Configs graphite_configs = getConfigs(context); for (const auto & config : graphite_configs) { diff --git a/dbms/src/Storages/System/StorageSystemGraphite.h b/dbms/src/Storages/System/StorageSystemGraphite.h index 5ba7b7bda8c..29e66e416d9 100644 --- a/dbms/src/Storages/System/StorageSystemGraphite.h +++ b/dbms/src/Storages/System/StorageSystemGraphite.h @@ -33,7 +33,6 @@ protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; void fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo & query_info) const override; - StorageSystemGraphite::Configs getConfigs(const Context & context) const; }; } diff --git a/dbms/src/Storages/System/StorageSystemNumbers.cpp b/dbms/src/Storages/System/StorageSystemNumbers.cpp index 19ae1f1e21f..20dcc58f652 100644 --- a/dbms/src/Storages/System/StorageSystemNumbers.cpp +++ b/dbms/src/Storages/System/StorageSystemNumbers.cpp @@ -106,7 +106,7 @@ private: UInt64 block_size; UInt64 max_counter; - Block createHeader() const + static Block createHeader() { return { ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), "number") }; } diff --git a/dbms/src/TableFunctions/TableFunctionS3.cpp b/dbms/src/TableFunctions/TableFunctionS3.cpp index 04ca48df1df..73121d342e2 100644 --- a/dbms/src/TableFunctions/TableFunctionS3.cpp +++ b/dbms/src/TableFunctions/TableFunctionS3.cpp @@ -84,13 +84,23 @@ StoragePtr TableFunctionS3::getStorage( const ColumnsDescription & columns, Context & global_context, const std::string & table_name, - const String & compression_method) const + const String & compression_method) { Poco::URI uri (source); S3::URI s3_uri (uri); UInt64 min_upload_part_size = global_context.getSettingsRef().s3_min_upload_part_size; - return StorageS3::create(s3_uri, access_key_id, secret_access_key, StorageID(getDatabaseName(), table_name), format, min_upload_part_size, columns, ConstraintsDescription{}, global_context, compression_method); + return StorageS3::create( + s3_uri, + access_key_id, + secret_access_key, + StorageID(getDatabaseName(), table_name), + format, + min_upload_part_size, + columns, + ConstraintsDescription{}, + global_context, + compression_method); } void registerTableFunctionS3(TableFunctionFactory & factory) diff --git a/dbms/src/TableFunctions/TableFunctionS3.h b/dbms/src/TableFunctions/TableFunctionS3.h index 76d96e56c74..a49033da1b4 100644 --- a/dbms/src/TableFunctions/TableFunctionS3.h +++ b/dbms/src/TableFunctions/TableFunctionS3.h @@ -29,7 +29,7 @@ private: const Context & context, const std::string & table_name) const override; - StoragePtr getStorage( + static StoragePtr getStorage( const String & source, const String & access_key_id, const String & secret_access_key, @@ -37,7 +37,7 @@ private: const ColumnsDescription & columns, Context & global_context, const std::string & table_name, - const String & compression_method) const; + const String & compression_method); }; } From 56c4b4984470b64777d30d112ea8ab175f1a9859 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Wed, 18 Mar 2020 04:28:57 +0300 Subject: [PATCH 049/115] performance comparison --- docker/test/performance-comparison/compare.sh | 61 ++----------------- .../test/performance-comparison/download.sh | 52 ++++++++++++++++ .../test/performance-comparison/entrypoint.sh | 13 ++-- 3 files changed, 65 insertions(+), 61 deletions(-) create mode 100755 docker/test/performance-comparison/download.sh diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 177ce3b9e2f..55f7b9a310d 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -2,55 +2,9 @@ set -ex set -o pipefail trap "exit" INT TERM -trap "kill 0" EXIT +trap "kill $(jobs -pr) ||:" EXIT stage=${stage:-} -script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" - -mkdir db0 ||: - -left_pr=$1 -left_sha=$2 - -right_pr=$3 -right_sha=$4 - -datasets=${CHPC_DATASETS:-"hits1 hits10 hits100 values"} - -declare -A dataset_paths -dataset_paths["hits10"]="https://s3.mds.yandex.net/clickhouse-private-datasets/hits_10m_single/partitions/hits_10m_single.tar" -dataset_paths["hits100"]="https://s3.mds.yandex.net/clickhouse-private-datasets/hits_100m_single/partitions/hits_100m_single.tar" -dataset_paths["hits1"]="https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_v1.tar" -dataset_paths["values"]="https://clickhouse-datasets.s3.yandex.net/values_with_expressions/partitions/test_values.tar" - -function download -{ - rm -r left ||: - mkdir left ||: - rm -r right ||: - mkdir right ||: - - # might have the same version on left and right - if ! [ "$left_sha" = "$right_sha" ] - then - wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/performance/performance.tgz" -O- | tar -C left --strip-components=1 -zxv & - wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$right_pr/$right_sha/performance/performance.tgz" -O- | tar -C right --strip-components=1 -zxv & - else - wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/performance/performance.tgz" -O- | tar -C left --strip-components=1 -zxv && cp -a left right & - fi - - for dataset_name in $datasets - do - dataset_path="${dataset_paths[$dataset_name]}" - [ "$dataset_path" != "" ] - cd db0 && wget -nv -nd -c "$dataset_path" -O- | tar -xv & - done - - mkdir ~/fg ||: - cd ~/fg && wget -nv -nd -c "https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl" && chmod +x ~/fg/flamegraph.pl & - - wait -} function configure { @@ -295,14 +249,14 @@ create table queries engine Memory as select -- immediately, so for now we pretend they don't exist. We don't want to -- remove them altogether because we want to be able to detect regressions, -- but the right way to do this is not yet clear. - left + right < 0.02 as short, + left + right < 0.05 as short, not short and abs(diff) < 0.10 and rd[3] > 0.10 as unstable, - -- Do not consider changed the queries with 5% RD below 1% -- e.g., we're - -- likely to observe a difference > 1% in less than 5% cases. + -- Do not consider changed the queries with 5% RD below 5% -- e.g., we're + -- likely to observe a difference > 5% in less than 5% cases. -- Not sure it is correct, but empirically it filters out a lot of noise. - not short and abs(diff) > 0.15 and abs(diff) > rd[3] and rd[1] > 0.01 as changed, + not short and abs(diff) > 0.15 and abs(diff) > rd[3] and rd[1] > 0.05 as changed, * from file('*-report.tsv', TSV, 'left float, right float, diff float, rd Array(float), query text'); @@ -411,7 +365,7 @@ create table metric_devation engine File(TSVWithNamesAndTypes, 'metric-deviation join queries using query group by query, metric having d > 0.5 - order by any(rd[3]) desc, d desc + order by any(rd[3]) desc, query desc, d desc ; create table stacks engine File(TSV, 'stacks.rep') as @@ -451,9 +405,6 @@ grep -H -m2 'Exception:[^:]' ./*-err.log | sed 's/:/\t/' > run-errors.tsv ||: case "$stage" in "") ;& -"download") - time download - ;& "configure") time configure ;& diff --git a/docker/test/performance-comparison/download.sh b/docker/test/performance-comparison/download.sh new file mode 100755 index 00000000000..fc4622fdf39 --- /dev/null +++ b/docker/test/performance-comparison/download.sh @@ -0,0 +1,52 @@ +#!/bin/bash +set -ex +set -o pipefail +trap "exit" INT TERM +trap "kill $(jobs -pr) ||:" EXIT + +mkdir db0 ||: + +left_pr=$1 +left_sha=$2 + +right_pr=$3 +right_sha=$4 + +datasets=${CHPC_DATASETS:-"hits1 hits10 hits100 values"} + +declare -A dataset_paths +dataset_paths["hits10"]="https://s3.mds.yandex.net/clickhouse-private-datasets/hits_10m_single/partitions/hits_10m_single.tar" +dataset_paths["hits100"]="https://s3.mds.yandex.net/clickhouse-private-datasets/hits_100m_single/partitions/hits_100m_single.tar" +dataset_paths["hits1"]="https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_v1.tar" +dataset_paths["values"]="https://clickhouse-datasets.s3.yandex.net/values_with_expressions/partitions/test_values.tar" + +function download +{ + rm -r left ||: + mkdir left ||: + rm -r right ||: + mkdir right ||: + + # might have the same version on left and right + if ! [ "$left_sha" = "$right_sha" ] + then + wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/performance/performance.tgz" -O- | tar -C left --strip-components=1 -zxv & + wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$right_pr/$right_sha/performance/performance.tgz" -O- | tar -C right --strip-components=1 -zxv & + else + wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/performance/performance.tgz" -O- | tar -C left --strip-components=1 -zxv && cp -a left right & + fi + + for dataset_name in $datasets + do + dataset_path="${dataset_paths[$dataset_name]}" + [ "$dataset_path" != "" ] + cd db0 && wget -nv -nd -c "$dataset_path" -O- | tar -xv & + done + + mkdir ~/fg ||: + cd ~/fg && wget -nv -nd -c "https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl" && chmod +x ~/fg/flamegraph.pl & + + wait +} + +download diff --git a/docker/test/performance-comparison/entrypoint.sh b/docker/test/performance-comparison/entrypoint.sh index 330304547b7..bcff2527473 100755 --- a/docker/test/performance-comparison/entrypoint.sh +++ b/docker/test/performance-comparison/entrypoint.sh @@ -92,12 +92,13 @@ export CHPC_RUNS=${CHPC_RUNS:-7} # Even if we have some errors, try our best to save the logs. set +e -# compare.sh kills its process group, so put it into a separate one. -# It's probably at fault for using `kill 0` as an error handling mechanism, -# but I can't be bothered to change this now. -set -m -time ../compare.sh "$REF_PR" "$REF_SHA" "$PR_TO_TEST" "$SHA_TO_TEST" 2>&1 | ts "$(printf '%%Y-%%m-%%d %%H:%%M:%%S\t')" | tee compare.log -set +m + +# Use main comparison script from the tested package, so that we can change it +# in PRs. +{ \ + time ../download.sh "$REF_PR" "$REF_SHA" "$PR_TO_TEST" "$SHA_TO_TEST" && \ + time stage=configure right/scripts/compare.sh ; \ +} 2>&1 | ts "$(printf '%%Y-%%m-%%d %%H:%%M:%%S\t')" | tee compare.log # Stop the servers to free memory. Normally they are restarted before getting # the profile info, so they shouldn't use much, but if the comparison script From 48a2b46499300d2ab2b8d1302e8ab45feb20fa57 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Wed, 18 Mar 2020 06:25:16 +0300 Subject: [PATCH 050/115] performance comparison --- docker/test/performance-comparison/entrypoint.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docker/test/performance-comparison/entrypoint.sh b/docker/test/performance-comparison/entrypoint.sh index bcff2527473..4176a1b1d7d 100755 --- a/docker/test/performance-comparison/entrypoint.sh +++ b/docker/test/performance-comparison/entrypoint.sh @@ -95,10 +95,14 @@ set +e # Use main comparison script from the tested package, so that we can change it # in PRs. +# Older version use 'kill 0', so put the script into a separate process group +# FIXME remove set +m in April 2020 +set +m { \ time ../download.sh "$REF_PR" "$REF_SHA" "$PR_TO_TEST" "$SHA_TO_TEST" && \ time stage=configure right/scripts/compare.sh ; \ } 2>&1 | ts "$(printf '%%Y-%%m-%%d %%H:%%M:%%S\t')" | tee compare.log +set -m # Stop the servers to free memory. Normally they are restarted before getting # the profile info, so they shouldn't use much, but if the comparison script From adf4ad6ce3ba68ce67397c370b1613480174d307 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Wed, 18 Mar 2020 13:27:56 +0800 Subject: [PATCH 051/115] Always prefer case insensitive suggestions --- base/common/LineReader.cpp | 17 ++++------------- base/common/LineReader.h | 3 --- dbms/programs/client/Client.cpp | 3 --- dbms/programs/client/Suggest.cpp | 13 +++++-------- dbms/programs/client/Suggest.h | 3 --- 5 files changed, 9 insertions(+), 30 deletions(-) diff --git a/base/common/LineReader.cpp b/base/common/LineReader.cpp index c69690e3420..816184fdb1f 100644 --- a/base/common/LineReader.cpp +++ b/base/common/LineReader.cpp @@ -52,19 +52,10 @@ LineReader::Suggest::WordsRange LineReader::Suggest::getCompletions(const String last_word = std::string_view(prefix).substr(last_word_pos + 1, std::string::npos); /// last_word can be empty. - - if (case_insensitive) - return std::equal_range( - words.begin(), words.end(), last_word, [prefix_length](std::string_view s, std::string_view prefix_searched) - { - return strncasecmp(s.data(), prefix_searched.data(), prefix_length) < 0; - }); - else - return std::equal_range( - words.begin(), words.end(), last_word, [prefix_length](std::string_view s, std::string_view prefix_searched) - { - return strncmp(s.data(), prefix_searched.data(), prefix_length) < 0; - }); + return std::equal_range(words.begin(), words.end(), last_word, [prefix_length](std::string_view s, std::string_view prefix_searched) + { + return strncasecmp(s.data(), prefix_searched.data(), prefix_length) < 0; + }); } LineReader::LineReader(const String & history_file_path_, char extender_, char delimiter_) diff --git a/base/common/LineReader.h b/base/common/LineReader.h index 66de46d5fcb..06f737a860b 100644 --- a/base/common/LineReader.h +++ b/base/common/LineReader.h @@ -18,9 +18,6 @@ public: /// Get iterators for the matched range of words if any. WordsRange getCompletions(const String & prefix, size_t prefix_length) const; - - /// case sensitive suggestion - bool case_insensitive = false; }; LineReader(const String & history_file_path, char extender, char delimiter = 0); /// if delimiter != 0, then it's multiline mode diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 4885b08ad91..0618e6272d6 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -481,8 +481,6 @@ private: if (server_revision >= Suggest::MIN_SERVER_REVISION && !config().getBool("disable_suggestion", false)) { - if (config().has("case_insensitive_suggestion")) - Suggest::instance().setCaseInsensitive(); /// Load suggestion data from the server. Suggest::instance().load(connection_parameters, config().getInt("suggestion_limit")); } @@ -1720,7 +1718,6 @@ public: ("always_load_suggestion_data", "Load suggestion data even if clickhouse-client is run in non-interactive mode. Used for testing.") ("suggestion_limit", po::value()->default_value(10000), "Suggestion limit for how many databases, tables and columns to fetch.") - ("case_insensitive_suggestion", "Case sensitive suggestions.") ("multiline,m", "multiline") ("multiquery,n", "multiquery") ("format,f", po::value(), "default output format") diff --git a/dbms/programs/client/Suggest.cpp b/dbms/programs/client/Suggest.cpp index 9058bafd138..528e8d74a2b 100644 --- a/dbms/programs/client/Suggest.cpp +++ b/dbms/programs/client/Suggest.cpp @@ -50,16 +50,13 @@ void Suggest::load(const ConnectionParameters & connection_parameters, size_t su /// Note that keyword suggestions are available even if we cannot load data from server. - if (case_insensitive) - std::sort(words.begin(), words.end(), [](const std::string & str1, const std::string & str2) + std::sort(words.begin(), words.end(), [](const std::string & str1, const std::string & str2) + { + return std::lexicographical_compare(begin(str1), end(str1), begin(str2), end(str2), [](const char char1, const char char2) { - return std::lexicographical_compare(begin(str1), end(str1), begin(str2), end(str2), [](const char char1, const char char2) - { - return std::tolower(char1) < std::tolower(char2); - }); + return std::tolower(char1) < std::tolower(char2); }); - else - std::sort(words.begin(), words.end()); + }); ready = true; }); diff --git a/dbms/programs/client/Suggest.h b/dbms/programs/client/Suggest.h index a4d357d09b3..6c81a388ea7 100644 --- a/dbms/programs/client/Suggest.h +++ b/dbms/programs/client/Suggest.h @@ -23,9 +23,6 @@ public: return instance; } - /// Need to set before load - void setCaseInsensitive() { case_insensitive = true; } - void load(const ConnectionParameters & connection_parameters, size_t suggestion_limit); /// Older server versions cannot execute the query above. From 045f5ae348b00e555833c8efcc015abedfa0e5ac Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Wed, 18 Mar 2020 16:12:15 +0800 Subject: [PATCH 052/115] random seed by default --- dbms/src/AggregateFunctions/AggregateFunctionGroupArray.cpp | 2 +- dbms/tests/queries/0_stateless/01050_group_array_sample.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.cpp index e188116cc2c..6e51dd5d447 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.cpp @@ -88,7 +88,7 @@ AggregateFunctionPtr createAggregateFunctionGroupArraySample(const std::string & assertUnary(name, argument_types); UInt64 max_elems = std::numeric_limits::max(); - UInt64 seed = 123456; + UInt64 seed = thread_local_rng(); UInt64 * params[2] = {&max_elems, &seed}; if (parameters.size() != 1 && parameters.size() != 2) diff --git a/dbms/tests/queries/0_stateless/01050_group_array_sample.sql b/dbms/tests/queries/0_stateless/01050_group_array_sample.sql index 395ab9d41b6..8c7c6a9648c 100644 --- a/dbms/tests/queries/0_stateless/01050_group_array_sample.sql +++ b/dbms/tests/queries/0_stateless/01050_group_array_sample.sql @@ -1,4 +1,4 @@ -select k, groupArraySample(10)(v) from (select number % 4 as k, number as v from numbers(1024)) group by k; +select k, groupArraySample(10, 123456)(v) from (select number % 4 as k, number as v from numbers(1024)) group by k; -- different seed select k, groupArraySample(10, 1)(v) from (select number % 4 as k, number as v from numbers(1024)) group by k; From 36aab47149c46ba8063e30fb5afa6244fcd25fea Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 18 Mar 2020 14:59:40 +0300 Subject: [PATCH 053/115] Fixed race condition in text_log --- base/loggers/OwnSplitChannel.cpp | 5 +++-- base/loggers/OwnSplitChannel.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/base/loggers/OwnSplitChannel.cpp b/base/loggers/OwnSplitChannel.cpp index 3b809c022b5..eda61e1105e 100644 --- a/base/loggers/OwnSplitChannel.cpp +++ b/base/loggers/OwnSplitChannel.cpp @@ -71,7 +71,8 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg) /// Also log to system.text_log table, if message is not too noisy - if (text_log_max_priority && msg.getPriority() <= text_log_max_priority) + auto text_log_max_priority_loaded = text_log_max_priority.load(std::memory_order_relaxed); + if (text_log_max_priority_loaded && msg.getPriority() <= text_log_max_priority_loaded) { TextLogElement elem; @@ -108,7 +109,7 @@ void OwnSplitChannel::addTextLog(std::shared_ptr log, int max_prior { std::lock_guard lock(text_log_mutex); text_log = log; - text_log_max_priority = max_priority; + text_log_max_priority.store(max_priority, std::memory_order_relaxed); } } diff --git a/base/loggers/OwnSplitChannel.h b/base/loggers/OwnSplitChannel.h index 78308e97ab7..ac313b383bb 100644 --- a/base/loggers/OwnSplitChannel.h +++ b/base/loggers/OwnSplitChannel.h @@ -33,7 +33,7 @@ private: std::mutex text_log_mutex; std::weak_ptr text_log; - int text_log_max_priority = -1; + std::atomic text_log_max_priority = -1; }; } From cf1a8bc36ebfeaee87dd227298a0b81634332e0e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 18 Mar 2020 15:14:10 +0300 Subject: [PATCH 054/115] Fixed error --- dbms/src/Common/UTF8Helpers.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Common/UTF8Helpers.cpp b/dbms/src/Common/UTF8Helpers.cpp index ff799315d3b..d393ee2328b 100644 --- a/dbms/src/Common/UTF8Helpers.cpp +++ b/dbms/src/Common/UTF8Helpers.cpp @@ -77,6 +77,7 @@ static int wcwidth(wchar_t wc) case widechar_nonprint: case widechar_combining: case widechar_unassigned: + return 0; case widechar_ambiguous: case widechar_private_use: case widechar_widened_in_9: From 5abe3ac3f1e6eacfaaaf19eb825d1accd2fbb74d Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 18 Mar 2020 16:02:32 +0300 Subject: [PATCH 055/115] Switch docs to python3 and update MkDocs to 1.1 (#9711) + some grammar and css fixes --- .gitignore | 1 + docs/en/data_types/array.md | 2 +- docs/en/data_types/boolean.md | 2 +- docs/en/data_types/date.md | 2 +- docs/en/data_types/datetime.md | 2 +- docs/en/data_types/datetime64.md | 6 +- docs/en/data_types/decimal.md | 16 +- docs/en/data_types/enum.md | 4 +- docs/en/data_types/fixedstring.md | 4 +- docs/en/data_types/float.md | 4 +- docs/en/data_types/index.md | 6 +- .../aggregatefunction.md | 2 +- docs/en/data_types/nullable.md | 4 +- .../data_types/special_data_types/interval.md | 4 +- docs/en/data_types/uuid.md | 2 +- docs/en/faq/general.md | 4 +- docs/en/getting_started/install.md | 4 +- docs/en/guides/apply_catboost_model.md | 12 +- docs/en/interfaces/cli.md | 8 +- docs/en/interfaces/formats.md | 70 ++++---- docs/en/interfaces/http.md | 4 +- docs/en/interfaces/index.md | 2 +- docs/en/interfaces/mysql.md | 2 +- docs/en/interfaces/third-party/gui.md | 2 +- docs/en/operations/configuration_files.md | 2 +- docs/en/operations/quotas.md | 2 +- docs/en/operations/server_settings/index.md | 2 +- .../en/operations/server_settings/settings.md | 64 ++++---- docs/en/operations/settings/index.md | 2 +- .../settings/permissions_for_queries.md | 6 +- .../operations/settings/query_complexity.md | 14 +- docs/en/operations/settings/settings.md | 154 +++++++++--------- docs/en/operations/system_tables.md | 34 ++-- .../table_engines/collapsingmergetree.md | 6 +- docs/en/operations/table_engines/file.md | 2 +- docs/en/operations/table_engines/generate.md | 2 +- .../table_engines/graphitemergetree.md | 12 +- docs/en/operations/table_engines/hdfs.md | 2 +- docs/en/operations/table_engines/index.md | 4 +- docs/en/operations/table_engines/jdbc.md | 2 +- docs/en/operations/table_engines/join.md | 4 +- docs/en/operations/table_engines/kafka.md | 2 +- docs/en/operations/table_engines/mergetree.md | 22 +-- docs/en/operations/table_engines/odbc.md | 2 +- .../operations/table_engines/replication.md | 4 +- docs/en/operations/table_engines/stripelog.md | 8 +- .../table_engines/summingmergetree.md | 2 +- docs/en/operations/table_engines/url.md | 2 +- .../versionedcollapsingmergetree.md | 4 +- docs/en/operations/table_engines/view.md | 2 +- docs/en/operations/tips.md | 22 +-- docs/en/operations/troubleshooting.md | 8 +- .../operations/utils/clickhouse-benchmark.md | 8 +- .../agg_functions/combinators.md | 20 +-- docs/en/query_language/agg_functions/index.md | 2 +- .../agg_functions/parametric_functions.md | 10 +- .../query_language/agg_functions/reference.md | 72 ++++---- docs/en/query_language/alter.md | 42 ++--- docs/en/query_language/create.md | 16 +- .../en/query_language/dicts/external_dicts.md | 4 +- .../dicts/external_dicts_dict.md | 2 +- .../dicts/external_dicts_dict_layout.md | 6 +- .../dicts/external_dicts_dict_sources.md | 18 +- .../dicts/external_dicts_dict_structure.md | 6 +- .../en/query_language/dicts/internal_dicts.md | 2 +- .../functions/arithmetic_functions.md | 2 +- .../functions/array_functions.md | 34 ++-- .../en/query_language/functions/array_join.md | 2 +- .../query_language/functions/bit_functions.md | 6 +- .../functions/bitmap_functions.md | 8 +- .../functions/comparison_functions.md | 12 +- .../functions/conditional_functions.md | 4 +- .../functions/date_time_functions.md | 6 +- .../functions/encoding_functions.md | 4 +- .../functions/ext_dict_functions.md | 6 +- docs/en/query_language/functions/geo.md | 2 +- .../functions/hash_functions.md | 16 +- .../functions/higher_order_functions.md | 20 +-- .../query_language/functions/in_functions.md | 2 +- .../query_language/functions/introspection.md | 6 +- .../functions/machine_learning_functions.md | 2 +- .../functions/other_functions.md | 36 ++-- .../functions/rounding_functions.md | 4 +- .../functions/string_functions.md | 22 +-- .../functions/string_search_functions.md | 20 +-- .../functions/type_conversion_functions.md | 10 +- .../functions/uuid_functions.md | 2 +- docs/en/query_language/insert_into.md | 2 +- docs/en/query_language/misc.md | 10 +- docs/en/query_language/operators.md | 10 +- docs/en/query_language/select.md | 40 ++--- docs/en/query_language/show.md | 2 +- docs/en/query_language/syntax.md | 12 +- docs/en/query_language/system.md | 30 ++-- .../en/query_language/table_functions/jdbc.md | 2 +- .../en/query_language/table_functions/odbc.md | 2 +- docs/en/roadmap.md | 3 + docs/en/security_changelog.md | 6 +- docs/ru/operations/tips.md | 22 +-- .../functions/string_functions.md | 2 +- docs/tools/build.py | 15 +- docs/tools/mdx_clickhouse.py | 26 ++- docs/tools/release.sh | 2 +- docs/tools/requirements.txt | 31 ++-- docs/tools/website.py | 2 +- docs/zh/operations/tips.md | 22 +-- .../functions/string_functions.md | 2 +- website/css/base.css | 15 +- website/images/clickhouse-black.svg | 2 +- website/images/index/hardware-efficient.jpg | Bin 111195 -> 0 bytes website/images/index/hardware-efficient.svg | 2 +- website/index.html | 2 - website/js/base.js | 2 + website/templates/footer.html | 4 +- website/templates/index/community.html | 22 ++- website/templates/index/efficient.html | 11 +- website/templates/index/features.html | 10 +- website/templates/index/hero.html | 16 +- website/templates/index/nav.html | 4 +- website/templates/index/performance.html | 16 +- website/templates/index/reliable.html | 6 +- website/templates/index/rich.html | 10 +- website/templates/index/success.html | 12 +- website/templates/index/use.html | 4 +- website/templates/index/why.html | 9 +- 125 files changed, 700 insertions(+), 685 deletions(-) delete mode 100644 website/images/index/hardware-efficient.jpg diff --git a/.gitignore b/.gitignore index 1e6bb1716ec..5f41164e3f7 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,7 @@ /docs/publish /docs/edit /docs/website +/docs/venv/ /docs/tools/venv/ /docs/en/single.md /docs/ru/single.md diff --git a/docs/en/data_types/array.md b/docs/en/data_types/array.md index d23fe60c327..4c9eef2cdfe 100644 --- a/docs/en/data_types/array.md +++ b/docs/en/data_types/array.md @@ -1,4 +1,4 @@ -# Array(T) {#data_type-array} +# Array(T) { #data_type-array} Array of `T`-type items. diff --git a/docs/en/data_types/boolean.md b/docs/en/data_types/boolean.md index d5fc88e45cc..46bd9ec6f45 100644 --- a/docs/en/data_types/boolean.md +++ b/docs/en/data_types/boolean.md @@ -1,6 +1,6 @@ # Boolean Values -There isn't a separate type for boolean values. They use the UInt8 type, restricted to the values 0 or 1. +There is no separate type for boolean values. Use UInt8 type, restricted to the values 0 or 1. [Original article](https://clickhouse.tech/docs/en/data_types/boolean/) diff --git a/docs/en/data_types/date.md b/docs/en/data_types/date.md index cb5f84d6545..2882e24b3c4 100644 --- a/docs/en/data_types/date.md +++ b/docs/en/data_types/date.md @@ -3,7 +3,7 @@ A date. Stored in two bytes as the number of days since 1970-01-01 (unsigned). Allows storing values from just after the beginning of the Unix Epoch to the upper threshold defined by a constant at the compilation stage (currently, this is until the year 2106, but the final fully-supported year is 2105). The minimum value is output as 0000-00-00. -The date is stored without the time zone. +The date value is stored without the time zone. [Original article](https://clickhouse.tech/docs/en/data_types/date/) diff --git a/docs/en/data_types/datetime.md b/docs/en/data_types/datetime.md index fded3caa4e5..947b481d166 100644 --- a/docs/en/data_types/datetime.md +++ b/docs/en/data_types/datetime.md @@ -1,4 +1,4 @@ -# DateTime {#data_type-datetime} +# DateTime { #data_type-datetime} Allows to store an instant in time, that can be expressed as a calendar date and a time of a day. diff --git a/docs/en/data_types/datetime64.md b/docs/en/data_types/datetime64.md index f7feabbb996..f060ba9d83c 100644 --- a/docs/en/data_types/datetime64.md +++ b/docs/en/data_types/datetime64.md @@ -1,4 +1,4 @@ -# DateTime64 {#data_type-datetime64} +# DateTime64 { #data_type-datetime64} Allows to store an instant in time, that can be expressed as a calendar date and a time of a day, with defined sub-second precision @@ -9,7 +9,7 @@ Syntax: DateTime64(precision, [timezone]) ``` -Internally, stores data as number of 'ticks' since epoch start (1970-01-01 00:00:00 UTC) as Int64. The tick resolution is determined by the precision parameter. Additionally, the `DateTime64` type can store time zone that is the same for the entire column, that affects how the values of the `DateTime64` type values are displayed in text format and how the values specified as strings are parsed ('2020-01-01 05:00:01.000'). The time zone is not stored in the rows of the table (or in resultset), but is stored in the column metadata. See details in [DateTime](datetime.md). +Internally, stores data as a number of 'ticks' since epoch start (1970-01-01 00:00:00 UTC) as Int64. The tick resolution is determined by the precision parameter. Additionally, the `DateTime64` type can store time zone that is the same for the entire column, that affects how the values of the `DateTime64` type values are displayed in text format and how the values specified as strings are parsed ('2020-01-01 05:00:01.000'). The time zone is not stored in the rows of the table (or in resultset), but is stored in the column metadata. See details in [DateTime](datetime.md). ## Examples @@ -36,7 +36,7 @@ SELECT * FROM dt └─────────────────────────┴──────────┘ ``` -* When inserting datetime as an integer, it is treated as an appropriately scaled Unix Timestamp (UTC). `1546300800000` (with precision 3) represents `'2019-01-01 00:00:00'` UTC. However, as `timestamp` column has `Europe/Moscow` (UTC+3) timezone specified, when outputting as string the value will be shown as `'2019-01-01 03:00:00'` +* When inserting datetime as an integer, it is treated as an appropriately scaled Unix Timestamp (UTC). `1546300800000` (with precision 3) represents `'2019-01-01 00:00:00'` UTC. However, as `timestamp` column has `Europe/Moscow` (UTC+3) timezone specified, when outputting as a string the value will be shown as `'2019-01-01 03:00:00'` * When inserting string value as datetime, it is treated as being in column timezone. `'2019-01-01 00:00:00'` will be treated as being in `Europe/Moscow` timezone and stored as `1546290000000`. **2.** Filtering on `DateTime64` values diff --git a/docs/en/data_types/decimal.md b/docs/en/data_types/decimal.md index 8d3112c3dab..defbc5601d3 100644 --- a/docs/en/data_types/decimal.md +++ b/docs/en/data_types/decimal.md @@ -1,7 +1,7 @@ # Decimal(P, S), Decimal32(S), Decimal64(S), Decimal128(S) -Signed fixed point numbers that keep precision during add, subtract and multiply operations. For division least significant digits are discarded (not rounded). +Signed fixed-point numbers that keep precision during add, subtract and multiply operations. For division least significant digits are discarded (not rounded). ## Parameters @@ -23,9 +23,9 @@ For example, Decimal32(4) can contain numbers from -99999.9999 to 99999.9999 wit ## Internal representation -Internally data is represented as normal signed integers with respective bit width. Real value ranges that can be stored in memory are a bit larger than specified above, which are checked only on convertion from string. +Internally data is represented as normal signed integers with respective bit width. Real value ranges that can be stored in memory are a bit larger than specified above, which are checked only on conversion from a string. -Because modern CPU's do not support 128 bit integers natively, operations on Decimal128 are emulated. Because of this Decimal128 works signigicantly slower than Decimal32/Decimal64. +Because modern CPU's do not support 128-bit integers natively, operations on Decimal128 are emulated. Because of this Decimal128 works significantly slower than Decimal32/Decimal64. ## Operations and result type @@ -41,15 +41,15 @@ Rules for scale: - multuply: S = S1 + S2. - divide: S = S1. -For similar operations between Decimal and integers, the result is Decimal of the same size as argument. +For similar operations between Decimal and integers, the result is Decimal of the same size as an argument. -Operations between Decimal and Float32/Float64 are not defined. If you really need them, you can explicitly cast one of argument using toDecimal32, toDecimal64, toDecimal128 or toFloat32, toFloat64 builtins. Keep in mind that the result will lose precision and type conversion is computationally expensive operation. +Operations between Decimal and Float32/Float64 are not defined. If you need them, you can explicitly cast one of argument using toDecimal32, toDecimal64, toDecimal128 or toFloat32, toFloat64 builtins. Keep in mind that the result will lose precision and type conversion is a computationally expensive operation. -Some functions on Decimal return result as Float64 (for example, var or stddev). Intermediate calculations might still be performed in Decimal, which might lead to different results between Float64 and Decimal inputs with same values. +Some functions on Decimal return result as Float64 (for example, var or stddev). Intermediate calculations might still be performed in Decimal, which might lead to different results between Float64 and Decimal inputs with the same values. ## Overflow checks -During calculations on Decimal, integer overflows might happen. Excessive digits in fraction are discarded (not rounded). Excessive digits in integer part will lead to exception. +During calculations on Decimal, integer overflows might happen. Excessive digits in a fraction are discarded (not rounded). Excessive digits in integer part will lead to an exception. ```sql SELECT toDecimal32(2, 4) AS x, x / 3 @@ -86,7 +86,7 @@ SELECT toDecimal32(4.2, 8) AS x, 6 * x └────────────┴──────────────────────────────────┘ ``` -Overflow checks happen not only on arithmetic operations, but also on value comparison: +Overflow checks happen not only on arithmetic operations but also on value comparison: ```sql SELECT toDecimal32(1, 8) < 100 diff --git a/docs/en/data_types/enum.md b/docs/en/data_types/enum.md index 6fee1602e38..368838ead22 100644 --- a/docs/en/data_types/enum.md +++ b/docs/en/data_types/enum.md @@ -2,7 +2,7 @@ Enumerated type consisting of named values. -Named values must be delcared as `'string' = integer` pairs. ClickHouse stores only numbers, but supports operations with the values through their names. +Named values must be declared as `'string' = integer` pairs. ClickHouse stores only numbers, but supports operations with the values through their names. ClickHouse supports: @@ -106,7 +106,7 @@ The implicit default value is the value with the lowest number. During `ORDER BY`, `GROUP BY`, `IN`, `DISTINCT` and so on, Enums behave the same way as the corresponding numbers. For example, ORDER BY sorts them numerically. Equality and comparison operators work the same way on Enums as they do on the underlying numeric values. -Enum values cannot be compared with numbers. Enums can be compared to a constant string. If the string compared to is not a valid value for the Enum, an exception will be thrown. The IN operator is supported with the Enum on the left hand side and a set of strings on the right hand side. The strings are the values of the corresponding Enum. +Enum values cannot be compared with numbers. Enums can be compared to a constant string. If the string compared to is not a valid value for the Enum, an exception will be thrown. The IN operator is supported with the Enum on the left-hand side and a set of strings on the right-hand side. The strings are the values of the corresponding Enum. Most numeric and string operations are not defined for Enum values, e.g. adding a number to an Enum or concatenating a string to an Enum. However, the Enum has a natural `toString` function that returns its string value. diff --git a/docs/en/data_types/fixedstring.md b/docs/en/data_types/fixedstring.md index 9c40295bbb8..088315a4d6f 100644 --- a/docs/en/data_types/fixedstring.md +++ b/docs/en/data_types/fixedstring.md @@ -14,7 +14,7 @@ The `FixedString` type is efficient when data has the length of precisely `N` by Examples of the values that can be efficiently stored in `FixedString`-typed columns: -- Binary representation of IP addresses (`FixedString(16)` for IPv6). +- The binary representation of IP addresses (`FixedString(16)` for IPv6). - Language codes (ru_RU, en_US ... ). - Currency codes (USD, RUB ... ). - Binary representation of hashes (`FixedString(16)` for MD5, `FixedString(32)` for SHA256). @@ -48,7 +48,7 @@ WHERE a = 'b\0' └───┘ ``` -This behavior differs from MySQL behavior for the `CHAR` type (where strings are padded with spaces, and the spaces are removed for output). +This behaviour differs from MySQL for the `CHAR` type (where strings are padded with spaces, and the spaces are removed for output). Note that the length of the `FixedString(N)` value is constant. The [length](../query_language/functions/array_functions.md#array_functions-length) function returns `N` even if the `FixedString(N)` value is filled only with null bytes, but the [empty](../query_language/functions/string_functions.md#string_functions-empty) function returns `1` in this case. diff --git a/docs/en/data_types/float.md b/docs/en/data_types/float.md index 1531c8d5722..c184bf6bfe8 100644 --- a/docs/en/data_types/float.md +++ b/docs/en/data_types/float.md @@ -25,9 +25,9 @@ SELECT 1 - 0.9 - The result of the calculation depends on the calculation method (the processor type and architecture of the computer system). - Floating-point calculations might result in numbers such as infinity (`Inf`) and "not-a-number" (`NaN`). This should be taken into account when processing the results of calculations. -- When parsing floating point numbers from text, the result might not be the nearest machine-representable number. +- When parsing floating-point numbers from text, the result might not be the nearest machine-representable number. -## NaN and Inf {#data_type-float-nan-inf} +## NaN and Inf { #data_type-float-nan-inf} In contrast to standard SQL, ClickHouse supports the following categories of floating-point numbers: diff --git a/docs/en/data_types/index.md b/docs/en/data_types/index.md index 17a30842748..4f0a57959ab 100644 --- a/docs/en/data_types/index.md +++ b/docs/en/data_types/index.md @@ -1,8 +1,8 @@ -# Data Types {#data_types} +# Data Types { #data_types} -ClickHouse can store various types of data in table cells. +ClickHouse can store various kinds of data in table cells. -This section describes the supported data types and special considerations when using and/or implementing them, if any. +This section describes the supported data types and special considerations for using and/or implementing them if any. [Original article](https://clickhouse.tech/docs/en/data_types/) diff --git a/docs/en/data_types/nested_data_structures/aggregatefunction.md b/docs/en/data_types/nested_data_structures/aggregatefunction.md index 67520f75670..f6f86ed37ef 100644 --- a/docs/en/data_types/nested_data_structures/aggregatefunction.md +++ b/docs/en/data_types/nested_data_structures/aggregatefunction.md @@ -1,4 +1,4 @@ -# AggregateFunction(name, types_of_arguments...) {#data_type-aggregatefunction} +# AggregateFunction(name, types_of_arguments...) { #data_type-aggregatefunction} The intermediate state of an aggregate function. To get it, use aggregate functions with the `-State` suffix. To get aggregated data in the future, you must use the same aggregate functions with the `-Merge`suffix. diff --git a/docs/en/data_types/nullable.md b/docs/en/data_types/nullable.md index 3974091b4ce..a94967e92eb 100644 --- a/docs/en/data_types/nullable.md +++ b/docs/en/data_types/nullable.md @@ -1,4 +1,4 @@ -# Nullable(TypeName) {#data_type-nullable} +# Nullable(TypeName) { #data_type-nullable} Allows to store special marker ([NULL](../query_language/syntax.md)) that denotes "missing value" alongside normal values allowed by `TypeName`. For example, a `Nullable(Int8)` type column can store `Int8` type values, and the rows that don't have a value will store `NULL`. @@ -10,7 +10,7 @@ A `Nullable` type field can't be included in table indexes. ## Storage features -To store `Nullable` type values in table column, ClickHouse uses a separate file with `NULL` masks in addition to normal file with values. Entries in masks file allow ClickHouse to distinguish between `NULL` and default value of corresponding data type for each table row. Because of additional file, `Nullable` column consumes additional storage space compared to similar normal one. +To store `Nullable` type values in a table column, ClickHouse uses a separate file with `NULL` masks in addition to normal file with values. Entries in masks file allow ClickHouse to distinguish between `NULL` and a default value of corresponding data type for each table row. Because of an additional file, `Nullable` column consumes additional storage space compared to a similar normal one. !!! info "Note" Using `Nullable` almost always negatively affects performance, keep this in mind when designing your databases. diff --git a/docs/en/data_types/special_data_types/interval.md b/docs/en/data_types/special_data_types/interval.md index b0d152e69bd..8a8f507ccc3 100644 --- a/docs/en/data_types/special_data_types/interval.md +++ b/docs/en/data_types/special_data_types/interval.md @@ -1,4 +1,4 @@ -# Interval {#data-type-interval} +# Interval { #data-type-interval} The family of data types representing time and date intervals. The resulting types of the [INTERVAL](../../query_language/operators.md#operator-interval) operator. @@ -32,7 +32,7 @@ SELECT toTypeName(INTERVAL 4 DAY) └──────────────────────────────┘ ``` -## Usage Remarks {#data-type-interval-usage-remarks} +## Usage Remarks { #data-type-interval-usage-remarks} You can use `Interval`-type values in arithmetical operations with [Date](../../data_types/date.md) and [DateTime](../../data_types/datetime.md)-type values. For example, you can add 4 days to the current time: diff --git a/docs/en/data_types/uuid.md b/docs/en/data_types/uuid.md index c5ace976ef9..8aea8b51ace 100644 --- a/docs/en/data_types/uuid.md +++ b/docs/en/data_types/uuid.md @@ -1,4 +1,4 @@ -# UUID {#uuid-data-type} +# UUID { #uuid-data-type} A universally unique identifier (UUID) is a 16-byte number used to identify records. For detailed information about the UUID, see [Wikipedia](https://en.wikipedia.org/wiki/Universally_unique_identifier). diff --git a/docs/en/faq/general.md b/docs/en/faq/general.md index fb753026812..01735c35041 100644 --- a/docs/en/faq/general.md +++ b/docs/en/faq/general.md @@ -11,7 +11,7 @@ Distributed sorting is one of the main causes of reduced performance when runnin Most MapReduce implementations allow you to execute arbitrary code on a cluster. But a declarative query language is better suited to OLAP in order to run experiments quickly. For example, Hadoop has Hive and Pig. Also consider Cloudera Impala or Shark (outdated) for Spark, as well as Spark SQL, Presto, and Apache Drill. Performance when running such tasks is highly sub-optimal compared to specialized systems, but relatively high latency makes it unrealistic to use these systems as the backend for a web interface. -## What If I Have a Problem with Encodings When Using Oracle Through ODBC? {#oracle-odbc-encodings} +## What If I Have a Problem with Encodings When Using Oracle Through ODBC? { #oracle-odbc-encodings} If you use Oracle through the ODBC driver as a source of external dictionaries, you need to set the correct value for the `NLS_LANG` environment variable in `/etc/default/clickhouse`. For more information, see the [Oracle NLS_LANG FAQ](https://www.oracle.com/technetwork/products/globalization/nls-lang-099431.html). @@ -21,7 +21,7 @@ If you use Oracle through the ODBC driver as a source of external dictionaries, NLS_LANG=RUSSIAN_RUSSIA.UTF8 ``` -## How Do I Export Data from ClickHouse to a File? {#how-to-export-to-file} +## How Do I Export Data from ClickHouse to a File? { #how-to-export-to-file} ### Using INTO OUTFILE Clause diff --git a/docs/en/getting_started/install.md b/docs/en/getting_started/install.md index 9bcff1cbeab..e62528e14c2 100644 --- a/docs/en/getting_started/install.md +++ b/docs/en/getting_started/install.md @@ -14,7 +14,7 @@ To run ClickHouse on processors that do not support SSE 4.2 or have AArch64 or P ## Available Installation Options -### From DEB Packages {#install-from-deb-packages} +### From DEB Packages { #install-from-deb-packages} It is recommended to use official pre-compiled `deb` packages for Debian or Ubuntu. @@ -66,7 +66,7 @@ sudo yum install clickhouse-server clickhouse-client You can also download and install packages manually from here: . -### From tgz archives {#from-tgz-archives} +### From tgz archives { #from-tgz-archives} It is recommended to use official pre-compiled `tgz` archives for all Linux distributions, where installation of `deb` or `rpm` packages is not possible. diff --git a/docs/en/guides/apply_catboost_model.md b/docs/en/guides/apply_catboost_model.md index a9d8707f5ca..9ab314e0398 100644 --- a/docs/en/guides/apply_catboost_model.md +++ b/docs/en/guides/apply_catboost_model.md @@ -1,4 +1,4 @@ -# Applying a Catboost Model in ClickHouse {#applying-catboost-model-in-clickhouse} +# Applying a Catboost Model in ClickHouse { #applying-catboost-model-in-clickhouse} [CatBoost](https://catboost.ai) is a free and open-source gradient boosting library developed at [Yandex](https://yandex.com/company/) for machine learning. @@ -13,7 +13,7 @@ To apply a CatBoost model in ClickHouse: For more information about training CatBoost models, see [Training and applying models](https://catboost.ai/docs/features/training.html#training). -## Prerequisites {#prerequisites} +## Prerequisites { #prerequisites} If you don't have the [Docker](https://docs.docker.com/install/) yet, install it. @@ -44,7 +44,7 @@ yandex/tutorial-catboost-clickhouse latest 622e4d17945b 22 $ docker run -it -p 8888:8888 yandex/tutorial-catboost-clickhouse ``` -## 1. Create a Table {#create-table} +## 1. Create a Table { #create-table} To create a ClickHouse table for the train sample: @@ -83,7 +83,7 @@ ENGINE = MergeTree ORDER BY date :) exit ``` -## 2. Insert the Data to the Table {#insert-data-to-table} +## 2. Insert the Data to the Table { #insert-data-to-table} To insert the data: @@ -112,7 +112,7 @@ FROM amazon_train +---------+ ``` -## 3. Integrate CatBoost into ClickHouse {#integrate-catboost-into-clickhouse} +## 3. Integrate CatBoost into ClickHouse { #integrate-catboost-into-clickhouse} !!! note "Note" **Optional step.** The Docker image contains everything you need to run CatBoost and ClickHouse. @@ -154,7 +154,7 @@ The fastest way to evaluate a CatBoost model is compile `libcatboostmodel./home/catboost/models/*_model.xml ``` -## 4. Run the Model Inference from SQL {#run-model-inference} +## 4. Run the Model Inference from SQL { #run-model-inference} For test model run the ClickHouse client `$ clickhouse client`. diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index 48965d11062..7477e81cd76 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -19,7 +19,7 @@ Different client and server versions are compatible with one another, but some f ClickHouse client version is older than ClickHouse server. It may lack support for new features. ``` -## Usage {#cli_usage} +## Usage { #cli_usage} The client can be used in interactive and non-interactive (batch) mode. To use batch mode, specify the 'query' parameter, or send data to 'stdin' (it verifies that 'stdin' is not a terminal), or both. @@ -71,7 +71,7 @@ You can cancel a long query by pressing Ctrl+C. However, you will still need to The command-line client allows passing external data (external temporary tables) for querying. For more information, see the section "External data for query processing". -### Queries with Parameters {#cli-queries-with-parameters} +### Queries with Parameters { #cli-queries-with-parameters} You can create a query with parameters and pass values to them from client application. This allows to avoid formatting query with specific dynamic values on client side. For example: @@ -79,7 +79,7 @@ You can create a query with parameters and pass values to them from client appli $ clickhouse-client --param_parName="[1, 2]" -q "SELECT * FROM table WHERE a = {parName:Array(UInt16)}" ``` -#### Query Syntax {#cli-queries-with-parameters-syntax} +#### Query Syntax { #cli-queries-with-parameters-syntax} Format a query as usual, then place the values that you want to pass from the app parameters to the query in braces in the following format: @@ -96,7 +96,7 @@ Format a query as usual, then place the values that you want to pass from the ap $ clickhouse-client --param_tuple_in_tuple="(10, ('dt', 10))" -q "SELECT * FROM table WHERE val = {tuple_in_tuple:Tuple(UInt8, Tuple(String, UInt8))}" ``` -## Configuring {#interfaces_cli_configuration} +## Configuring { #interfaces_cli_configuration} You can pass parameters to `clickhouse-client` (all parameters have a default value) using: diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index a6deb4ccb02..b6e768513d7 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1,4 +1,4 @@ -# Formats for Input and Output Data {#formats} +# Formats for Input and Output Data { #formats} ClickHouse can accept and return data in various formats. A format supported for input can be used to parse the data provided to `INSERT`s, to perform `SELECT`s from a file-backed table such as File, URL or HDFS, or to read an external dictionary. A format supported for output can be used to arrange the results of a `SELECT`, and to perform `INSERT`s into a file-backed table. @@ -42,7 +42,7 @@ The supported formats are: You can control some format processing parameters with the ClickHouse settings. For more information read the [Settings](../operations/settings/settings.md) section. -## TabSeparated {#tabseparated} +## TabSeparated { #tabseparated} In TabSeparated format, data is written by row. Each row contains values separated by tabs. Each value is follow by a tab, except the last value in the row, which is followed by a line feed. Strictly Unix line feeds are assumed everywhere. The last row also must contain a line feed at the end. Values are written in text format, without enclosing quotation marks, and with special characters escaped. @@ -130,14 +130,14 @@ SELECT * FROM nestedt FORMAT TSV 1 [1] ['a'] ``` -## TabSeparatedRaw {#tabseparatedraw} +## TabSeparatedRaw { #tabseparatedraw} Differs from `TabSeparated` format in that the rows are written without escaping. This format is only appropriate for outputting a query result, but not for parsing (retrieving data to insert in a table). This format is also available under the name `TSVRaw`. -## TabSeparatedWithNames {#tabseparatedwithnames} +## TabSeparatedWithNames { #tabseparatedwithnames} Differs from the `TabSeparated` format in that the column names are written in the first row. During parsing, the first row is completely ignored. You can't use column names to determine their position or to check their correctness. @@ -145,14 +145,14 @@ During parsing, the first row is completely ignored. You can't use column names This format is also available under the name `TSVWithNames`. -## TabSeparatedWithNamesAndTypes {#tabseparatedwithnamesandtypes} +## TabSeparatedWithNamesAndTypes { #tabseparatedwithnamesandtypes} Differs from the `TabSeparated` format in that the column names are written to the first row, while the column types are in the second row. During parsing, the first and second rows are completely ignored. This format is also available under the name `TSVWithNamesAndTypes`. -## Template {#format-template} +## Template { #format-template} This format allows to specify a custom format string with placeholders for values with specified escaping rule. @@ -268,7 +268,7 @@ Page views: ${PageViews:CSV}, User id: ${UserID:CSV}, Useless field: ${:CSV}, Du `PageViews`, `UserID`, `Duration` and `Sign` inside placeholders are names of columns in the table. Values after `Useless field` in rows and after `\nTotal rows: ` in suffix will be ignored. All delimiters in the input data must be strictly equal to delimiters in specified format strings. -## TemplateIgnoreSpaces {#templateignorespaces} +## TemplateIgnoreSpaces { #templateignorespaces} This format is suitable only for input. Similar to `Template`, but skips whitespace characters between delimiters and values in the input stream. However, if format strings contain whitespace characters, these characters will be expected in the input stream. Also allows to specify empty placeholders (`${}` or `${:None}`) to split some delimiter into separate parts to ignore spaces between them. Such placeholders are used only for skipping whitespace characters. @@ -286,7 +286,7 @@ format_template_resultset = '/some/path/resultset.format', format_template_row = {${}"SearchPhrase"${}:${}${phrase:JSON}${},${}"c"${}:${}${cnt:JSON}${}} ``` -## TSKV {#tskv} +## TSKV { #tskv} Similar to TabSeparated, but outputs a value in name=value format. Names are escaped the same way as in TabSeparated format, and the = symbol is also escaped. @@ -319,7 +319,7 @@ Both data output and parsing are supported in this format. For parsing, any orde Parsing allows the presence of the additional field `tskv` without the equal sign or a value. This field is ignored. -## CSV {#csv} +## CSV { #csv} Comma Separated Values format ([RFC](https://tools.ietf.org/html/rfc4180)). @@ -345,12 +345,12 @@ The CSV format supports the output of totals and extremes the same way as `TabSe Also prints the header row, similar to `TabSeparatedWithNames`. -## CustomSeparated {#format-customseparated} +## CustomSeparated { #format-customseparated} Similar to [Template](#format-template), but it prints or reads all columns and uses escaping rule from setting `format_custom_escaping_rule` and delimiters from settings `format_custom_field_delimiter`, `format_custom_row_before_delimiter`, `format_custom_row_after_delimiter`, `format_custom_row_between_delimiter`, `format_custom_result_before_delimiter` and `format_custom_result_after_delimiter`, not from format strings. There is also `CustomSeparatedIgnoreSpaces` format, which is similar to `TemplateIgnoreSpaces`. -## JSON {#json} +## JSON { #json} Outputs data in JSON format. Besides data tables, it also outputs column names and types, along with some additional information: the total number of output rows, and the number of rows that could have been output if there weren't a LIMIT. Example: @@ -439,7 +439,7 @@ ClickHouse supports [NULL](../query_language/syntax.md), which is displayed as ` See also the [JSONEachRow](#jsoneachrow) format. -## JSONCompact {#jsoncompact} +## JSONCompact { #jsoncompact} Differs from JSON only in that data rows are output in arrays, not in objects. @@ -485,7 +485,7 @@ Example: This format is only appropriate for outputting a query result, but not for parsing (retrieving data to insert in a table). See also the `JSONEachRow` format. -## JSONEachRow {#jsoneachrow} +## JSONEachRow { #jsoneachrow} When using this format, ClickHouse outputs rows as separated, newline-delimited JSON objects, but the data as a whole is not valid JSON. @@ -555,7 +555,7 @@ Unlike the [JSON](#json) format, there is no substitution of invalid UTF-8 seque !!! note "Note" Any set of bytes can be output in the strings. Use the `JSONEachRow` format if you are sure that the data in the table can be formatted as JSON without losing any information. -### Usage of Nested Structures {#jsoneachrow-nested} +### Usage of Nested Structures { #jsoneachrow-nested} If you have a table with [Nested](../data_types/nested_data_structures/nested.md) data type columns, you can insert JSON data with the same structure. Enable this feature with the [input_format_import_nested_json](../operations/settings/settings.md#settings-input_format_import_nested_json) setting. @@ -609,18 +609,18 @@ SELECT * FROM json_each_row_nested └───────────────┴────────┘ ``` -## Native {#native} +## Native { #native} The most efficient format. Data is written and read by blocks in binary format. For each block, the number of rows, number of columns, column names and types, and parts of columns in this block are recorded one after another. In other words, this format is "columnar" – it doesn't convert columns to rows. This is the format used in the native interface for interaction between servers, for using the command-line client, and for C++ clients. You can use this format to quickly generate dumps that can only be read by the ClickHouse DBMS. It doesn't make sense to work with this format yourself. -## Null {#null} +## Null { #null} Nothing is output. However, the query is processed, and when using the command-line client, data is transmitted to the client. This is used for tests, including productivity testing. Obviously, this format is only appropriate for output, not for parsing. -## Pretty {#pretty} +## Pretty { #pretty} Outputs data as Unicode-art tables, also using ANSI-escape sequences for setting colors in the terminal. A full grid of the table is drawn, and each row occupies two lines in the terminal. @@ -684,16 +684,16 @@ Extremes: └────────────┴─────────┘ ``` -## PrettyCompact {#prettycompact} +## PrettyCompact { #prettycompact} Differs from [Pretty](#pretty) in that the grid is drawn between rows and the result is more compact. This format is used by default in the command-line client in interactive mode. -## PrettyCompactMonoBlock {#prettycompactmonoblock} +## PrettyCompactMonoBlock { #prettycompactmonoblock} Differs from [PrettyCompact](#prettycompact) in that up to 10,000 rows are buffered, then output as a single table, not by blocks. -## PrettyNoEscapes {#prettynoescapes} +## PrettyNoEscapes { #prettynoescapes} Differs from Pretty in that ANSI-escape sequences aren't used. This is necessary for displaying this format in a browser, as well as for using the 'watch' command-line utility. @@ -713,11 +713,11 @@ The same as the previous setting. The same as the previous setting. -## PrettySpace {#prettyspace} +## PrettySpace { #prettyspace} Differs from [PrettyCompact](#prettycompact) in that whitespace (space characters) is used instead of the grid. -## RowBinary {#rowbinary} +## RowBinary { #rowbinary} Formats and parses data by row in binary format. Rows and values are listed consecutively, without separators. This format is less efficient than the Native format, since it is row-based. @@ -732,7 +732,7 @@ Array is represented as a varint length (unsigned [LEB128](https://en.wikipedia. For [NULL](../query_language/syntax.md#null-literal) support, an additional byte containing 1 or 0 is added before each [Nullable](../data_types/nullable.md) value. If 1, then the value is `NULL` and this byte is interpreted as a separate value. If 0, the value after the byte is not `NULL`. -## RowBinaryWithNamesAndTypes {#rowbinarywithnamesandtypes} +## RowBinaryWithNamesAndTypes { #rowbinarywithnamesandtypes} Similar to [RowBinary](#rowbinary), but with added header: @@ -740,7 +740,7 @@ Similar to [RowBinary](#rowbinary), but with added header: * N `String`s specifying column names * N `String`s specifying column types -## Values {#data-format-values} +## Values { #data-format-values} Prints every row in brackets. Rows are separated by commas. There is no comma after the last row. The values inside the brackets are also comma-separated. Numbers are output in decimal format without quotes. Arrays are output in square brackets. Strings, dates, and dates with times are output in quotes. Escaping rules and parsing are similar to the [TabSeparated](#tabseparated) format. During formatting, extra spaces aren't inserted, but during parsing, they are allowed and skipped (except for spaces inside array values, which are not allowed). [NULL](../query_language/syntax.md) is represented as `NULL`. @@ -750,7 +750,7 @@ This is the format that is used in `INSERT INTO t VALUES ...`, but you can also See also: [input_format_values_interpret_expressions](../operations/settings/settings.md#settings-input_format_values_interpret_expressions) and [input_format_values_deduce_templates_of_expressions](../operations/settings/settings.md#settings-input_format_values_deduce_templates_of_expressions) settings. -## Vertical {#vertical} +## Vertical { #vertical} Prints each value on a separate line with the column name specified. This format is convenient for printing just one or a few rows, if each row consists of a large number of columns. @@ -783,11 +783,11 @@ test: string with 'quotes' and with some special This format is only appropriate for outputting a query result, but not for parsing (retrieving data to insert in a table). -## VerticalRaw {#verticalraw} +## VerticalRaw { #verticalraw} Similar to [Vertical](#vertical), but with escaping disabled. This format is only suitable for outputting query results, not for parsing (receiving data and inserting it in the table). -## XML {#xml} +## XML { #xml} XML format is suitable only for output, not for parsing. Example: @@ -860,7 +860,7 @@ In string values, the characters `<` and `&` are escaped as `<` and `&`. Arrays are output as `HelloWorld...`,and tuples as `HelloWorld...`. -## CapnProto {#capnproto} +## CapnProto { #capnproto} Cap'n Proto is a binary message format similar to Protocol Buffers and Thrift, but not like JSON or MessagePack. @@ -883,7 +883,7 @@ Deserialization is effective and usually doesn't increase the system load. See also [Format Schema](#formatschema). -## Protobuf {#protobuf} +## Protobuf { #protobuf} Protobuf - is a [Protocol Buffers](https://developers.google.com/protocol-buffers/) format. @@ -950,7 +950,7 @@ ClickHouse inputs and outputs protobuf messages in the `length-delimited` format It means before every message should be written its length as a [varint](https://developers.google.com/protocol-buffers/docs/encoding#varints). See also [how to read/write length-delimited protobuf messages in popular languages](https://cwiki.apache.org/confluence/display/GEODE/Delimiting+Protobuf+Messages). -## Avro {#data-format-avro} +## Avro { #data-format-avro} [Apache Avro](http://avro.apache.org/) is a row-oriented data serialization framework developed within Apache's Hadoop project. @@ -1014,7 +1014,7 @@ Column names must: Output Avro file compression and sync interval can be configured with [output_format_avro_codec](../operations/settings/settings.md#settings-output_format_avro_codec) and [output_format_avro_sync_interval](../operations/settings/settings.md#settings-output_format_avro_sync_interval) respectively. -## AvroConfluent {#data-format-avro-confluent} +## AvroConfluent { #data-format-avro-confluent} AvroConfluent supports decoding single-object Avro messages commonly used with [Kafka](https://kafka.apache.org/) and [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html). @@ -1062,7 +1062,7 @@ SELECT * FROM topic1_stream; Setting `format_avro_schema_registry_url` needs to be configured in `users.xml` to maintain it's value after a restart. -## Parquet {#data-format-parquet} +## Parquet { #data-format-parquet} [Apache Parquet](http://parquet.apache.org/) is a columnar storage format widespread in the Hadoop ecosystem. ClickHouse supports read and write operations for this format. @@ -1110,7 +1110,7 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_ To exchange data with Hadoop, you can use [HDFS table engine](../operations/table_engines/hdfs.md). -## ORC {#data-format-orc} +## ORC { #data-format-orc} [Apache ORC](https://orc.apache.org/) is a columnar storage format widespread in the Hadoop ecosystem. You can only insert data in this format to ClickHouse. @@ -1151,7 +1151,7 @@ $ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT OR To exchange data with Hadoop, you can use [HDFS table engine](../operations/table_engines/hdfs.md). -## Format Schema {#formatschema} +## Format Schema { #formatschema} The file name containing the format schema is set by the setting `format_schema`. It's required to set this setting when it is used one of the formats `Cap'n Proto` and `Protobuf`. @@ -1170,7 +1170,7 @@ in the server configuration. [Original article](https://clickhouse.tech/docs/en/interfaces/formats/) -## Skipping Errors {#skippingerrors} +## Skipping Errors { #skippingerrors} Some formats such as `CSV`, `TabSeparated`, `TSKV`, `JSONEachRow`, `Template`, `CustomSeparated` and `Protobuf` can skip broken row if parsing error occurred and continue parsing from the beginning of next row. See [input_format_allow_errors_num](../operations/settings/settings.md#settings-input_format_allow_errors_num) and [input_format_allow_errors_ratio](../operations/settings/settings.md#settings-input_format_allow_errors_ratio) settings. diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md index 0ce700bdc54..2e4a08675cc 100644 --- a/docs/en/interfaces/http.md +++ b/docs/en/interfaces/http.md @@ -1,4 +1,4 @@ -# HTTP Interface {#http_interface} +# HTTP Interface { #http_interface} The HTTP interface lets you use ClickHouse on any platform from any programming language. We use it for working from Java and Perl, as well as shell scripts. In other departments, the HTTP interface is used from Perl, Python, and Go. The HTTP interface is more limited than the native interface, but it has better compatibility. @@ -261,7 +261,7 @@ $ curl -sS 'http://localhost:8123/?max_result_bytes=4000000&buffer_size=3000000& Use buffering to avoid situations where a query processing error occurred after the response code and HTTP headers were sent to the client. In this situation, an error message is written at the end of the response body, and on the client side, the error can only be detected at the parsing stage. -### Queries with Parameters {#cli-queries-with-parameters} +### Queries with Parameters { #cli-queries-with-parameters} You can create a query with parameters and pass values for them from the corresponding HTTP request parameters. For more information, see [Queries with Parameters for CLI](cli.md#cli-queries-with-parameters). diff --git a/docs/en/interfaces/index.md b/docs/en/interfaces/index.md index 9af7c9863dc..a52f0ebb750 100644 --- a/docs/en/interfaces/index.md +++ b/docs/en/interfaces/index.md @@ -1,4 +1,4 @@ -# Interfaces {#interfaces} +# Interfaces { #interfaces} ClickHouse provides two network interfaces (both can be optionally wrapped in TLS for additional security): diff --git a/docs/en/interfaces/mysql.md b/docs/en/interfaces/mysql.md index 454cdb9160d..660bb80a50c 100644 --- a/docs/en/interfaces/mysql.md +++ b/docs/en/interfaces/mysql.md @@ -1,4 +1,4 @@ -# MySQL interface {#mysql_interface} +# MySQL interface { #mysql_interface} ClickHouse supports MySQL wire protocol. It can be enabled by [mysql_port](../operations/server_settings/settings.md#server_settings-mysql_port) setting in configuration file: ```xml diff --git a/docs/en/interfaces/third-party/gui.md b/docs/en/interfaces/third-party/gui.md index a01f524f3c7..d3208ad07e4 100644 --- a/docs/en/interfaces/third-party/gui.md +++ b/docs/en/interfaces/third-party/gui.md @@ -101,7 +101,7 @@ Features: - Refactorings. - Search and Navigation. -### Yandex DataLens {#yandex-datalens} +### Yandex DataLens { #yandex-datalens} [Yandex DataLens](https://cloud.yandex.ru/services/datalens) is a service of data visualization and analytics. diff --git a/docs/en/operations/configuration_files.md b/docs/en/operations/configuration_files.md index 69204506c2d..6723ef79e1a 100644 --- a/docs/en/operations/configuration_files.md +++ b/docs/en/operations/configuration_files.md @@ -1,4 +1,4 @@ -# Configuration Files {#configuration_files} +# Configuration Files { #configuration_files} ClickHouse supports multi-file configuration management. The main server configuration file is `/etc/clickhouse-server/config.xml`. Other files must be in the `/etc/clickhouse-server/config.d` directory. diff --git a/docs/en/operations/quotas.md b/docs/en/operations/quotas.md index 608e7f39034..e6e1fb03cc7 100644 --- a/docs/en/operations/quotas.md +++ b/docs/en/operations/quotas.md @@ -1,4 +1,4 @@ -# Quotas {#quotas} +# Quotas { #quotas} Quotas allow you to limit resource usage over a period of time, or simply track the use of resources. Quotas are set up in the user config. This is usually 'users.xml'. diff --git a/docs/en/operations/server_settings/index.md b/docs/en/operations/server_settings/index.md index 39f3a5bb6d4..472fc41c3e6 100644 --- a/docs/en/operations/server_settings/index.md +++ b/docs/en/operations/server_settings/index.md @@ -1,4 +1,4 @@ -# Server configuration parameters {#server_settings} +# Server configuration parameters { #server_settings} This section contains descriptions of server settings that cannot be changed at the session or query level. diff --git a/docs/en/operations/server_settings/settings.md b/docs/en/operations/server_settings/settings.md index 1e48b374711..1b53e8af162 100644 --- a/docs/en/operations/server_settings/settings.md +++ b/docs/en/operations/server_settings/settings.md @@ -16,7 +16,7 @@ Default value: 3600. ``` -## compression {#server-settings-compression} +## compression { #server-settings-compression} Data compression settings for [MergeTree](../table_engines/mergetree.md)-engine tables. @@ -90,7 +90,7 @@ Settings profiles are located in the file specified in the parameter `user_confi ``` -## dictionaries_config {#server_settings-dictionaries_config} +## dictionaries_config { #server_settings-dictionaries_config} The path to the config file for external dictionaries. @@ -108,7 +108,7 @@ See also "[External dictionaries](../../query_language/dicts/external_dicts.md)" ``` -## dictionaries_lazy_load {#server_settings-dictionaries_lazy_load} +## dictionaries_lazy_load { #server_settings-dictionaries_lazy_load} Lazy loading of dictionaries. @@ -125,7 +125,7 @@ The default is `true`. ``` -## format_schema_path {#server_settings-format_schema_path} +## format_schema_path { #server_settings-format_schema_path} The path to the directory with the schemes for the input data, such as schemas for the [CapnProto](../../interfaces/formats.md#capnproto) format. @@ -136,7 +136,7 @@ The path to the directory with the schemes for the input data, such as schemas f format_schemas/ ``` -## graphite {#server_settings-graphite} +## graphite { #server_settings-graphite} Sending data to [Graphite](https://github.com/graphite-project). @@ -171,7 +171,7 @@ You can configure multiple `` clauses. For instance, you can use this ``` -## graphite_rollup {#server_settings-graphite_rollup} +## graphite_rollup { #server_settings-graphite_rollup} Settings for thinning data for Graphite. @@ -215,7 +215,7 @@ If `http_port` is specified, the openSSL configuration is ignored even if it is ``` -## http_server_default_response {#server_settings-http_server_default_response} +## http_server_default_response { #server_settings-http_server_default_response} The page that is shown by default when you access the ClickHouse HTTP(s) server. Default value is "Ok." (with a line feed at the end) @@ -230,7 +230,7 @@ Opens `https://tabix.io/` when accessing ` http://localhost: http_port`. ``` -## include_from {#server_settings-include_from} +## include_from { #server_settings-include_from} The path to the file with substitutions. @@ -268,7 +268,7 @@ Useful for breaking away from a specific network interface. example.yandex.ru ``` -## interserver_http_credentials {#server-settings-interserver_http_credentials} +## interserver_http_credentials { #server-settings-interserver_http_credentials} The username and password used to authenticate during [replication](../table_engines/replication.md) with the Replicated* engines. These credentials are used only for communication between replicas and are unrelated to credentials for ClickHouse clients. The server is checking these credentials for connecting replicas and use the same credentials when connecting to other replicas. So, these credentials should be set the same for all replicas in a cluster. By default, the authentication is not used. @@ -299,7 +299,7 @@ The number of seconds that ClickHouse waits for incoming requests before closing ``` -## listen_host {#server_settings-listen_host} +## listen_host { #server_settings-listen_host} Restriction on hosts that requests can come from. If you want the server to answer all of them, specify `::`. @@ -311,7 +311,7 @@ Examples: ``` -## logger {#server_settings-logger} +## logger { #server_settings-logger} Logging settings. @@ -374,7 +374,7 @@ For more information, see the section "[Creating replicated tables](../../operat ``` -## mark_cache_size {#server-mark-cache-size} +## mark_cache_size { #server-mark-cache-size} Approximate size (in bytes) of the cache of marks used by table engines of the [MergeTree](../table_engines/mergetree.md) family. @@ -443,7 +443,7 @@ The value 0 means that you can delete all tables without any restrictions. ``` -## merge_tree {#server_settings-merge_tree} +## merge_tree { #server_settings-merge_tree} Fine tuning for tables in the [MergeTree](../table_engines/mergetree.md). @@ -458,7 +458,7 @@ For more information, see the MergeTreeSettings.h header file. ``` -## openSSL {#server_settings-openssl} +## openSSL { #server_settings-openssl} SSL client/server configuration. @@ -518,7 +518,7 @@ Keys for server/client settings: ``` -## part_log {#server_settings-part-log} +## part_log { #server_settings-part-log} Logging events that are associated with [MergeTree](../table_engines/mergetree.md). For instance, adding or merging data. You can use the log to simulate merge algorithms and compare their characteristics. You can visualize the merge process. @@ -543,7 +543,7 @@ Use the following parameters to configure logging: ``` -## path {#server_settings-path} +## path { #server_settings-path} The path to the directory containing data. @@ -557,7 +557,7 @@ The path to the directory containing data. ``` -## query_log {#server_settings-query-log} +## query_log { #server_settings-query-log} Setting for logging queries received with the [log_queries=1](../settings/settings.md) setting. @@ -583,7 +583,7 @@ If the table doesn't exist, ClickHouse will create it. If the structure of the q ``` -## query_thread_log {#server_settings-query-thread-log} +## query_thread_log { #server_settings-query-thread-log} Setting for logging threads of queries received with the [log_query_threads=1](../settings/settings.md#settings-log-query-threads) setting. @@ -609,7 +609,7 @@ If the table doesn't exist, ClickHouse will create it. If the structure of the q ``` -## trace_log {#server_settings-trace_log} +## trace_log { #server_settings-trace_log} Settings for the [trace_log](../system_tables.md#system_tables-trace_log) system table operation. @@ -662,7 +662,7 @@ The masking rules are applied on whole query (to prevent leaks of sensitive data For distributed queries each server have to be configured separately, otherwise subquries passed to other nodes will be stored without masking. -## remote_servers {#server_settings_remote_servers} +## remote_servers { #server_settings_remote_servers} Configuration of clusters used by the [Distributed](../../operations/table_engines/distributed.md) table engine and by the `cluster` table function. @@ -678,7 +678,7 @@ For the value of the `incl` attribute, see the section "[Configuration files](.. - [skip_unavailable_shards](../settings/settings.md#settings-skip_unavailable_shards) -## timezone {#server_settings-timezone} +## timezone { #server_settings-timezone} The server's time zone. @@ -693,7 +693,7 @@ The time zone is necessary for conversions between String and DateTime formats w ``` -## tcp_port {#server_settings-tcp_port} +## tcp_port { #server_settings-tcp_port} Port for communicating with clients over the TCP protocol. @@ -703,7 +703,7 @@ Port for communicating with clients over the TCP protocol. 9000 ``` -## tcp_port_secure {#server_settings-tcp_port_secure} +## tcp_port_secure { #server_settings-tcp_port_secure} TCP port for secure communication with clients. Use it with [OpenSSL](#server_settings-openssl) settings. @@ -717,7 +717,7 @@ Positive integer. 9440 ``` -## mysql_port {#server_settings-mysql_port} +## mysql_port { #server_settings-mysql_port} Port for communicating with clients over MySQL protocol. @@ -731,7 +731,7 @@ Example 9004 ``` -## tmp_path {#server-settings-tmp_path} +## tmp_path { #server-settings-tmp_path} Path to temporary data for processing large queries. @@ -745,7 +745,7 @@ Path to temporary data for processing large queries. ``` -## tmp_policy {#server-settings-tmp_policy} +## tmp_policy { #server-settings-tmp_policy} Policy from [`storage_configuration`](../table_engines/mergetree.md#table_engine-mergetree-multiple-volumes) to store temporary files. If not set [`tmp_path`](#server-settings-tmp_path) is used, otherwise it is ignored. @@ -756,7 +756,7 @@ If not set [`tmp_path`](#server-settings-tmp_path) is used, otherwise it is igno - `max_data_part_size_bytes` is ignored - you must have exactly one volume in that policy -## uncompressed_cache_size {#server-settings-uncompressed_cache_size} +## uncompressed_cache_size { #server-settings-uncompressed_cache_size} Cache size (in bytes) for uncompressed data used by table engines from the [MergeTree](../table_engines/mergetree.md). @@ -770,7 +770,7 @@ The uncompressed cache is advantageous for very short queries in individual case 8589934592 ``` -## user_files_path {#server_settings-user_files_path} +## user_files_path { #server_settings-user_files_path} The directory with user files. Used in the table function [file()](../../query_language/table_functions/file.md). @@ -797,7 +797,7 @@ Path to the file that contains: ``` -## zookeeper {#server-settings_zookeeper} +## zookeeper { #server-settings_zookeeper} Contains settings that allow ClickHouse to interact with a [ZooKeeper](http://zookeeper.apache.org/) cluster. @@ -848,7 +848,7 @@ This section contains the following parameters: - [Replication](../../operations/table_engines/replication.md) - [ZooKeeper Programmer's Guide](http://zookeeper.apache.org/doc/current/zookeeperProgrammers.html) -## use_minimalistic_part_header_in_zookeeper {#server-settings-use_minimalistic_part_header_in_zookeeper} +## use_minimalistic_part_header_in_zookeeper { #server-settings-use_minimalistic_part_header_in_zookeeper} Storage method for data part headers in ZooKeeper. @@ -876,14 +876,14 @@ If `use_minimalistic_part_header_in_zookeeper = 1`, then [replicated](../table_e **Default value:** 0. -## disable_internal_dns_cache {#server-settings-disable_internal_dns_cache} +## disable_internal_dns_cache { #server-settings-disable_internal_dns_cache} Disables the internal DNS cache. Recommended for operating ClickHouse in systems with frequently changing infrastructure such as Kubernetes. **Default value:** 0. -## dns_cache_update_period {#server-settings-dns_cache_update_period} +## dns_cache_update_period { #server-settings-dns_cache_update_period} The period of updating IP addresses stored in the ClickHouse internal DNS cache (in seconds). The update is performed asynchronously, in a separate system thread. diff --git a/docs/en/operations/settings/index.md b/docs/en/operations/settings/index.md index 3dd02876955..3f56eaaf99a 100644 --- a/docs/en/operations/settings/index.md +++ b/docs/en/operations/settings/index.md @@ -1,4 +1,4 @@ -# Settings {#settings} +# Settings { #settings} There are multiple ways to make all the settings described below. Settings are configured in layers, so each subsequent layer redefines the previous settings. diff --git a/docs/en/operations/settings/permissions_for_queries.md b/docs/en/operations/settings/permissions_for_queries.md index e6dcd490e97..4c56dd93d12 100644 --- a/docs/en/operations/settings/permissions_for_queries.md +++ b/docs/en/operations/settings/permissions_for_queries.md @@ -1,4 +1,4 @@ -# Permissions for queries {#permissions_for_queries} +# Permissions for queries { #permissions_for_queries} Queries in ClickHouse can be divided into several types: @@ -15,7 +15,7 @@ The following settings regulate user permissions by the type of query: `KILL QUERY` can be performed with any settings. -## readonly {#settings_readonly} +## readonly { #settings_readonly} Restricts permissions for read data, write data and change settings queries. @@ -36,7 +36,7 @@ from changing only specific settings, for details see [constraints on settings]( Default value: 0 -## allow_ddl {#settings_allow_ddl} +## allow_ddl { #settings_allow_ddl} Allows or denies [DDL](https://en.wikipedia.org/wiki/Data_definition_language) queries. diff --git a/docs/en/operations/settings/query_complexity.md b/docs/en/operations/settings/query_complexity.md index 5b9db828d03..1d7ac4e5ae6 100644 --- a/docs/en/operations/settings/query_complexity.md +++ b/docs/en/operations/settings/query_complexity.md @@ -16,7 +16,7 @@ It can take one of two values: `throw` or `break`. Restrictions on aggregation ( `any (only for group_by_overflow_mode)` – Continuing aggregation for the keys that got into the set, but don't add new keys to the set. -## max_memory_usage {#settings_max_memory_usage} +## max_memory_usage { #settings_max_memory_usage} The maximum amount of RAM to use for running a query on a single server. @@ -64,7 +64,7 @@ Maximum number of bytes (uncompressed data) that can be read from a table when r What to do when the volume of data read exceeds one of the limits: 'throw' or 'break'. By default, throw. -## max_rows_to_group_by {#settings-max_rows_to_group_by} +## max_rows_to_group_by { #settings-max_rows_to_group_by} Maximum number of unique keys received from aggregation. This setting lets you limit memory consumption when aggregating. @@ -73,7 +73,7 @@ Maximum number of unique keys received from aggregation. This setting lets you l What to do when the number of unique keys for aggregation exceeds the limit: 'throw', 'break', or 'any'. By default, throw. Using the 'any' value lets you run an approximation of GROUP BY. The quality of this approximation depends on the statistical nature of the data. -## max_bytes_before_external_group_by {#settings-max_bytes_before_external_group_by} +## max_bytes_before_external_group_by { #settings-max_bytes_before_external_group_by} Enables or disables execution of `GROUP BY` clauses in external memory. See [GROUP BY in external memory](../../query_language/select.md#select-group-by-in-external-memory). @@ -96,7 +96,7 @@ Maximum number of bytes before sorting. What to do if the number of rows received before sorting exceeds one of the limits: 'throw' or 'break'. By default, throw. -## max_result_rows {#setting-max_result_rows} +## max_result_rows { #setting-max_result_rows} Limit on the number of rows in the result. Also checked for subqueries, and on remote servers when running parts of a distributed query. @@ -223,7 +223,7 @@ Maximum number of bytes (uncompressed data) that can be passed to a remote serve What to do when the amount of data exceeds one of the limits: 'throw' or 'break'. By default, throw. -## max_rows_in_join {#settings-max_rows_in_join} +## max_rows_in_join { #settings-max_rows_in_join} Limits the number of rows in the hash table that is used when joining tables. @@ -240,7 +240,7 @@ Possible values: Default value: 0. -## max_bytes_in_join {#settings-max_bytes_in_join} +## max_bytes_in_join { #settings-max_bytes_in_join} Limits the size in bytes of the hash table used when joining tables. @@ -257,7 +257,7 @@ Possible values: Default value: 0. -## join_overflow_mode {#settings-join_overflow_mode} +## join_overflow_mode { #settings-join_overflow_mode} Defines what action ClickHouse performs when any of the following join limits is reached: diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index b10553cbe77..4bb08485208 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -45,7 +45,7 @@ If `enable_optimize_predicate_expression = 1`, then the execution time of these If `enable_optimize_predicate_expression = 0`, then the execution time of the second query is much longer, because the `WHERE` clause applies to all the data after the subquery finishes. -## fallback_to_stale_replicas_for_distributed_queries {#settings-fallback_to_stale_replicas_for_distributed_queries} +## fallback_to_stale_replicas_for_distributed_queries { #settings-fallback_to_stale_replicas_for_distributed_queries} Forces a query to an out-of-date replica if updated data is not available. See [Replication](../table_engines/replication.md). @@ -55,7 +55,7 @@ Used when performing `SELECT` from a distributed table that points to replicated By default, 1 (enabled). -## force_index_by_date {#settings-force_index_by_date} +## force_index_by_date { #settings-force_index_by_date} Disables query execution if the index can't be used by date. @@ -82,7 +82,7 @@ Enables or disables [fsync](http://pubs.opengroup.org/onlinepubs/9699919799/func It makes sense to disable it if the server has millions of tiny tables that are constantly being created and destroyed. -## enable_http_compression {#settings-enable_http_compression} +## enable_http_compression { #settings-enable_http_compression} Enables or disables data compression in the response to an HTTP request. @@ -95,7 +95,7 @@ Possible values: Default value: 0. -## http_zlib_compression_level {#settings-http_zlib_compression_level} +## http_zlib_compression_level { #settings-http_zlib_compression_level} Sets the level of data compression in the response to an HTTP request if [enable_http_compression = 1](#settings-enable_http_compression). @@ -104,7 +104,7 @@ Possible values: Numbers from 1 to 9. Default value: 3. -## http_native_compression_disable_checksumming_on_decompress {#settings-http_native_compression_disable_checksumming_on_decompress} +## http_native_compression_disable_checksumming_on_decompress { #settings-http_native_compression_disable_checksumming_on_decompress} Enables or disables checksum verification when decompressing the HTTP POST data from the client. Used only for ClickHouse native compression format (not used with `gzip` or `deflate`). @@ -117,7 +117,7 @@ Possible values: Default value: 0. -## send_progress_in_http_headers {#settings-send_progress_in_http_headers} +## send_progress_in_http_headers { #settings-send_progress_in_http_headers} Enables or disables `X-ClickHouse-Progress` HTTP response headers in `clickhouse-server` responses. @@ -130,7 +130,7 @@ Possible values: Default value: 0. -## max_http_get_redirects {#setting-max_http_get_redirects} +## max_http_get_redirects { #setting-max_http_get_redirects} Limits the maximum number of HTTP GET redirect hops for [URL](../table_engines/url.md)-engine tables. The setting applies to both types of tables: those created by the [CREATE TABLE](../../query_language/create/#create-table-query) query and by the [url](../../query_language/table_functions/url.md) table function. @@ -141,7 +141,7 @@ Possible values: Default value: 0. -## input_format_allow_errors_num {#settings-input_format_allow_errors_num} +## input_format_allow_errors_num { #settings-input_format_allow_errors_num} Sets the maximum number of acceptable errors when reading from text formats (CSV, TSV, etc.). @@ -153,7 +153,7 @@ If an error occurred while reading rows but the error counter is still less than If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` are exceeded, ClickHouse throws an exception. -## input_format_allow_errors_ratio {#settings-input_format_allow_errors_ratio} +## input_format_allow_errors_ratio { #settings-input_format_allow_errors_ratio} Sets the maximum percentage of errors allowed when reading from text formats (CSV, TSV, etc.). The percentage of errors is set as a floating-point number between 0 and 1. @@ -167,7 +167,7 @@ If an error occurred while reading rows but the error counter is still less than If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` are exceeded, ClickHouse throws an exception. -## input_format_values_interpret_expressions {#settings-input_format_values_interpret_expressions} +## input_format_values_interpret_expressions { #settings-input_format_values_interpret_expressions} Enables or disables the full SQL parser if the fast stream parser can't parse the data. This setting is used only for the [Values](../../interfaces/formats.md#data-format-values) format at the data insertion. For more information about syntax parsing, see the [Syntax](../../query_language/syntax.md) section. @@ -217,7 +217,7 @@ INSERT INTO datetime_t SELECT now() Ok. ``` -## input_format_values_deduce_templates_of_expressions {#settings-input_format_values_deduce_templates_of_expressions} +## input_format_values_deduce_templates_of_expressions { #settings-input_format_values_deduce_templates_of_expressions} Enables or disables template deduction for an SQL expressions in [Values](../../interfaces/formats.md#data-format-values) format. It allows to parse and interpret expressions in `Values` much faster if expressions in consecutive rows have the same structure. ClickHouse will try to deduce template of an expression, parse the following rows using this template and evaluate the expression on batch of successfully parsed rows. For the following query: @@ -231,7 +231,7 @@ INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), ( Enabled by default. -## input_format_values_accurate_types_of_literals {#settings-input_format_values_accurate_types_of_literals} +## input_format_values_accurate_types_of_literals { #settings-input_format_values_accurate_types_of_literals} This setting is used only when `input_format_values_deduce_templates_of_expressions = 1`. It can happen, that expressions for some column have the same structure, but contain numeric literals of different types, e.g ```sql @@ -244,7 +244,7 @@ When this setting is enabled, ClickHouse will check actual type of literal and w When disabled, ClickHouse may use more general type for some literals (e.g. `Float64` or `Int64` instead of `UInt64` for `42`), but it may cause overflow and precision issues. Enabled by default. -## input_format_defaults_for_omitted_fields {#session_settings-input_format_defaults_for_omitted_fields} +## input_format_defaults_for_omitted_fields { #session_settings-input_format_defaults_for_omitted_fields} When performing `INSERT` queries, replace omitted input column values with default values of the respective columns. This option only applies to [JSONEachRow](../../interfaces/formats.md#jsoneachrow), [CSV](../../interfaces/formats.md#csv) and [TabSeparated](../../interfaces/formats.md#tabseparated) formats. @@ -258,18 +258,18 @@ Possible values: Default value: 1. -## input_format_tsv_empty_as_default {#settings-input_format_tsv_empty_as_default} +## input_format_tsv_empty_as_default { #settings-input_format_tsv_empty_as_default} When enabled, replace empty input fields in TSV with default values. For complex default expressions `input_format_defaults_for_omitted_fields` must be enabled too. Disabled by default. -## input_format_null_as_default {#settings-input_format_null_as_default} +## input_format_null_as_default { #settings-input_format_null_as_default} Enables or disables using default values if input data contain `NULL`, but data type of corresponding column in not `Nullable(T)` (for text input formats). -## input_format_skip_unknown_fields {#settings-input_format_skip_unknown_fields} +## input_format_skip_unknown_fields { #settings-input_format_skip_unknown_fields} Enables or disables skipping insertion of extra data. @@ -289,7 +289,7 @@ Possible values: Default value: 0. -## input_format_import_nested_json {#settings-input_format_import_nested_json} +## input_format_import_nested_json { #settings-input_format_import_nested_json} Enables or disables the insertion of JSON data with nested objects. @@ -308,7 +308,7 @@ See also: - [Usage of Nested Structures](../../interfaces/formats.md#jsoneachrow-nested) with the `JSONEachRow` format. -## input_format_with_names_use_header {#settings-input_format_with_names_use_header} +## input_format_with_names_use_header { #settings-input_format_with_names_use_header} Enables or disables checking the column order when inserting data. @@ -326,7 +326,7 @@ Possible values: Default value: 1. -## date_time_input_format {#settings-date_time_input_format} +## date_time_input_format { #settings-date_time_input_format} Allows to choose a parser of text representation of date and time. @@ -349,7 +349,7 @@ See also: - [DateTime data type.](../../data_types/datetime.md) - [Functions for working with dates and times.](../../query_language/functions/date_time_functions.md) -## join_default_strictness {#settings-join_default_strictness} +## join_default_strictness { #settings-join_default_strictness} Sets default strictness for [JOIN clauses](../../query_language/select.md#select-join). @@ -362,7 +362,7 @@ Possible values: Default value: `ALL`. -## join_any_take_last_row {#settings-join_any_take_last_row} +## join_any_take_last_row { #settings-join_any_take_last_row} Changes behavior of join operations with `ANY` strictness. @@ -382,7 +382,7 @@ See also: - [Join table engine](../table_engines/join.md) - [join_default_strictness](#settings-join_default_strictness) -## join_use_nulls {#join_use_nulls} +## join_use_nulls { #join_use_nulls} Sets the type of [JOIN](../../query_language/select.md) behavior. When merging tables, empty cells may appear. ClickHouse fills them differently based on this setting. @@ -393,7 +393,7 @@ Possible values: Default value: 0. -## max_block_size {#setting-max_block_size} +## max_block_size { #setting-max_block_size} In ClickHouse, data is processed by blocks (sets of column parts). The internal processing cycles for a single block are efficient enough, but there are noticeable expenditures on each block. The `max_block_size` setting is a recommendation for what size of block (in number of rows) to load from tables. The block size shouldn't be too small, so that the expenditures on each block are still noticeable, but not too large, so that the query with LIMIT that is completed after the first block is processed quickly. The goal is to avoid consuming too much memory when extracting a large number of columns in multiple threads, and to preserve at least some cache locality. @@ -407,7 +407,7 @@ Used for the same purpose as `max_block_size`, but it sets the recommended block However, the block size cannot be more than `max_block_size` rows. By default: 1,000,000. It only works when reading from MergeTree engines. -## merge_tree_min_rows_for_concurrent_read {#setting-merge_tree_min_rows_for_concurrent_read} +## merge_tree_min_rows_for_concurrent_read { #setting-merge_tree_min_rows_for_concurrent_read} If the number of rows to be read from a file of a [MergeTree](../table_engines/mergetree.md) table exceeds `merge_tree_min_rows_for_concurrent_read` then ClickHouse tries to perform a concurrent reading from this file on several threads. @@ -417,7 +417,7 @@ Possible values: Default value: 163840. -## merge_tree_min_bytes_for_concurrent_read {#setting-merge_tree_min_bytes_for_concurrent_read} +## merge_tree_min_bytes_for_concurrent_read { #setting-merge_tree_min_bytes_for_concurrent_read} If the number of bytes to read from one file of a [MergeTree](../table_engines/mergetree.md)-engine table exceeds `merge_tree_min_bytes_for_concurrent_read`, then ClickHouse tries to concurrently read from this file in several threads. @@ -427,7 +427,7 @@ Possible value: Default value: 251658240. -## merge_tree_min_rows_for_seek {#setting-merge_tree_min_rows_for_seek} +## merge_tree_min_rows_for_seek { #setting-merge_tree_min_rows_for_seek} If the distance between two data blocks to be read in one file is less than `merge_tree_min_rows_for_seek` rows, then ClickHouse does not seek through the file, but reads the data sequentially. @@ -437,7 +437,7 @@ Possible values: Default value: 0. -## merge_tree_min_bytes_for_seek {#setting-merge_tree_min_bytes_for_seek} +## merge_tree_min_bytes_for_seek { #setting-merge_tree_min_bytes_for_seek} If the distance between two data blocks to be read in one file is less than `merge_tree_min_bytes_for_seek` bytes, then ClickHouse sequentially reads range of file that contains both blocks, thus avoiding extra seek. @@ -448,7 +448,7 @@ Possible values: Default value: 0. -## merge_tree_coarse_index_granularity {#setting-merge_tree_coarse_index_granularity} +## merge_tree_coarse_index_granularity { #setting-merge_tree_coarse_index_granularity} When searching data, ClickHouse checks the data marks in the index file. If ClickHouse finds that required keys are in some range, it divides this range into `merge_tree_coarse_index_granularity` subranges and searches the required keys there recursively. @@ -458,7 +458,7 @@ Possible values: Default value: 8. -## merge_tree_max_rows_to_use_cache {#setting-merge_tree_max_rows_to_use_cache} +## merge_tree_max_rows_to_use_cache { #setting-merge_tree_max_rows_to_use_cache} If ClickHouse should read more than `merge_tree_max_rows_to_use_cache` rows in one query, it doesn't use the cache of uncompressed blocks. @@ -470,7 +470,7 @@ Possible values: Default value: 128 ✕ 8192. -## merge_tree_max_bytes_to_use_cache {#setting-merge_tree_max_bytes_to_use_cache} +## merge_tree_max_bytes_to_use_cache { #setting-merge_tree_max_bytes_to_use_cache} If ClickHouse should read more than `merge_tree_max_bytes_to_use_cache` bytes in one query, it doesn't use the cache of uncompressed blocks. @@ -482,7 +482,7 @@ Possible value: Default value: 2013265920. -## min_bytes_to_use_direct_io {#settings-min_bytes_to_use_direct_io} +## min_bytes_to_use_direct_io { #settings-min_bytes_to_use_direct_io} The minimum data volume required for using direct I/O access to the storage disk. @@ -495,7 +495,7 @@ Possible values: Default value: 0. -## log_queries {#settings-log-queries} +## log_queries { #settings-log-queries} Setting up query logging. @@ -507,7 +507,7 @@ Example: log_queries=1 ``` -## log_query_threads {#settings-log-query-threads} +## log_query_threads { #settings-log-query-threads} Setting up query threads logging. @@ -519,7 +519,7 @@ Example: log_query_threads=1 ``` -## max_insert_block_size {#settings-max_insert_block_size} +## max_insert_block_size { #settings-max_insert_block_size} The size of blocks to form for insertion into a table. This setting only applies in cases when the server forms the blocks. @@ -531,7 +531,7 @@ Default value: 1,048,576. The default is slightly more than `max_block_size`. The reason for this is because certain table engines (`*MergeTree`) form a data part on the disk for each inserted block, which is a fairly large entity. Similarly, `*MergeTree` tables sort data during insertion, and a large enough block size allows sorting more data in RAM. -## max_replica_delay_for_distributed_queries {#settings-max_replica_delay_for_distributed_queries} +## max_replica_delay_for_distributed_queries { #settings-max_replica_delay_for_distributed_queries} Disables lagging replicas for distributed queries. See [Replication](../../operations/table_engines/replication.md). @@ -541,7 +541,7 @@ Default value: 300. Used when performing `SELECT` from a distributed table that points to replicated tables. -## max_threads {#settings-max_threads} +## max_threads { #settings-max_threads} The maximum number of query processing threads, excluding threads for retrieving data from remote servers (see the 'max_distributed_connections' parameter). @@ -556,7 +556,7 @@ For queries that are completed quickly because of a LIMIT, you can set a lower ' The smaller the `max_threads` value, the less memory is consumed. -## max_insert_threads {#settings-max_insert_threads} +## max_insert_threads { #settings-max_insert_threads} The maximum number of threads to execute the `INSERT SELECT` query. @@ -590,7 +590,7 @@ We are writing a URL column with the String type (average size of 60 bytes per v There usually isn't any reason to change this setting. -## max_query_size {#settings-max_query_size} +## max_query_size { #settings-max_query_size} The maximum part of a query that can be taken to RAM for parsing with the SQL parser. The INSERT query also contains data for INSERT that is processed by a separate stream parser (that consumes O(1) RAM), which is not included in this restriction. @@ -654,7 +654,7 @@ Default value: 3. Whether to count extreme values (the minimums and maximums in columns of a query result). Accepts 0 or 1. By default, 0 (disabled). For more information, see the section "Extreme values". -## use_uncompressed_cache {#setting-use_uncompressed_cache} +## use_uncompressed_cache { #setting-use_uncompressed_cache} Whether to use a cache of uncompressed blocks. Accepts 0 or 1. By default, 0 (disabled). Using the uncompressed cache (only for tables in the MergeTree family) can significantly reduce latency and increase throughput when working with a large number of short queries. Enable this setting for users who send frequent short requests. Also pay attention to the [uncompressed_cache_size](../server_settings/settings.md#server-settings-uncompressed_cache_size) configuration parameter (only set in the config file) – the size of uncompressed cache blocks. By default, it is 8 GiB. The uncompressed cache is filled in as needed and the least-used data is automatically deleted. @@ -682,7 +682,7 @@ The default value is 7500. The smaller the value, the more often data is flushed into the table. Setting the value too low leads to poor performance. -## load_balancing {#settings-load_balancing} +## load_balancing { #settings-load_balancing} Specifies the algorithm of replicas selection that is used for distributed query processing. @@ -693,7 +693,7 @@ ClickHouse supports the following algorithms of choosing replicas: - [In order](#load_balancing-in_order) - [First or random](#load_balancing-first_or_random) -### Random (by default) {#load_balancing-random} +### Random (by default) { #load_balancing-random} ```sql load_balancing = random @@ -702,7 +702,7 @@ load_balancing = random The number of errors is counted for each replica. The query is sent to the replica with the fewest errors, and if there are several of these, to any one of them. Disadvantages: Server proximity is not accounted for; if the replicas have different data, you will also get different data. -### Nearest Hostname {#load_balancing-nearest_hostname} +### Nearest Hostname { #load_balancing-nearest_hostname} ```sql load_balancing = nearest_hostname @@ -716,7 +716,7 @@ This method might seem primitive, but it doesn't require external data about net Thus, if there are equivalent replicas, the closest one by name is preferred. We can also assume that when sending a query to the same server, in the absence of failures, a distributed query will also go to the same servers. So even if different data is placed on the replicas, the query will return mostly the same results. -### In Order {#load_balancing-in_order} +### In Order { #load_balancing-in_order} ```sql load_balancing = in_order @@ -726,7 +726,7 @@ Replicas with the same number of errors are accessed in the same order as they a This method is appropriate when you know exactly which replica is preferable. -### First or Random {#load_balancing-first_or_random} +### First or Random { #load_balancing-first_or_random} ```sql load_balancing = first_or_random @@ -736,7 +736,7 @@ This algorithm chooses the first replica in the set or a random replica if the f The `first_or_random` algorithm solves the problem of the `in_order` algorithm. With `in_order`, if one replica goes down, the next one gets a double load while the remaining replicas handle the usual amount of traffic. When using the `first_or_random` algorithm, load is evenly distributed among replicas that are still available. -## prefer_localhost_replica {#settings-prefer_localhost_replica} +## prefer_localhost_replica { #settings-prefer_localhost_replica} Enables/disables preferable using the localhost replica when processing distributed queries. @@ -760,7 +760,7 @@ See the section "WITH TOTALS modifier". The threshold for `totals_mode = 'auto'`. See the section "WITH TOTALS modifier". -## max_parallel_replicas {#settings-max_parallel_replicas} +## max_parallel_replicas { #settings-max_parallel_replicas} The maximum number of replicas for each shard when executing a query. For consistency (to get different parts of the same data split), this option only works when the sampling key is set. @@ -782,27 +782,27 @@ If the value is 1 or more, compilation occurs asynchronously in a separate threa Compiled code is required for each different combination of aggregate functions used in the query and the type of keys in the GROUP BY clause. The results of compilation are saved in the build directory in the form of .so files. There is no restriction on the number of compilation results, since they don't use very much space. Old results will be used after server restarts, except in the case of a server upgrade – in this case, the old results are deleted. -## output_format_json_quote_64bit_integers {#session_settings-output_format_json_quote_64bit_integers} +## output_format_json_quote_64bit_integers { #session_settings-output_format_json_quote_64bit_integers} If the value is true, integers appear in quotes when using JSON\* Int64 and UInt64 formats (for compatibility with most JavaScript implementations); otherwise, integers are output without the quotes. -## format_csv_delimiter {#settings-format_csv_delimiter} +## format_csv_delimiter { #settings-format_csv_delimiter} The character interpreted as a delimiter in the CSV data. By default, the delimiter is `,`. -## input_format_csv_unquoted_null_literal_as_null {#settings-input_format_csv_unquoted_null_literal_as_null} +## input_format_csv_unquoted_null_literal_as_null { #settings-input_format_csv_unquoted_null_literal_as_null} For CSV input format enables or disables parsing of unquoted `NULL` as literal (synonym for `\N`). -## output_format_csv_crlf_end_of_line {#settings-output_format_csv_crlf_end_of_line} +## output_format_csv_crlf_end_of_line { #settings-output_format_csv_crlf_end_of_line} Use DOS/Windows style line separator (CRLF) in CSV instead of Unix style (LF). -## output_format_tsv_crlf_end_of_line {#settings-output_format_tsv_crlf_end_of_line} +## output_format_tsv_crlf_end_of_line { #settings-output_format_tsv_crlf_end_of_line} Use DOC/Windows style line separator (CRLF) in TSV instead of Unix style (LF). -## insert_quorum {#settings-insert_quorum} +## insert_quorum { #settings-insert_quorum} Enables quorum writes. @@ -829,7 +829,7 @@ See also: - [insert_quorum_timeout](#settings-insert_quorum_timeout) - [select_sequential_consistency](#settings-select_sequential_consistency) -## insert_quorum_timeout {#settings-insert_quorum_timeout} +## insert_quorum_timeout { #settings-insert_quorum_timeout} Quorum write timeout in seconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica. @@ -841,7 +841,7 @@ See also: - [select_sequential_consistency](#settings-select_sequential_consistency) -## select_sequential_consistency {#settings-select_sequential_consistency} +## select_sequential_consistency { #settings-select_sequential_consistency} Enables or disables sequential consistency for `SELECT` queries: @@ -861,7 +861,7 @@ See also: - [insert_quorum](#settings-insert_quorum) - [insert_quorum_timeout](#settings-insert_quorum_timeout) -## insert_deduplicate {#settings-insert_deduplicate} +## insert_deduplicate { #settings-insert_deduplicate} Enables or disables block deduplication of `INSERT` (for Replicated* tables). @@ -874,7 +874,7 @@ Default value: 1. By default, blocks inserted into replicated tables by the `INSERT` statement are deduplicated (see [Data Replication] (../ table_engines/replication.md). -## deduplicate_blocks_in_dependent_materialized_views {#settings-deduplicate_blocks_in_dependent_materialized_views} +## deduplicate_blocks_in_dependent_materialized_views { #settings-deduplicate_blocks_in_dependent_materialized_views} Enables or disables the deduplication check for materialized views that receive data from Replicated* tables. @@ -892,7 +892,7 @@ If an INSERTed block is skipped due to deduplication in the source table, there At the same time, this behavior "breaks" `INSERT` idempotency. If an `INSERT` into the main table was successful and `INSERT` into a materialized view failed (e.g. because of communication failure with Zookeeper) a client will get an error and can retry the operation. However, the materialized view won't receive the second insert because it will be discarded by deduplication in the main (source) table. The setting `deduplicate_blocks_in_dependent_materialized_views` allows to change this behavior. On retry a materialized view will receive the repeat insert and will perform deduplication check by itself, ignoring check result for the source table, and will insert rows lost because of first failure. -## max_network_bytes {#settings-max_network_bytes} +## max_network_bytes { #settings-max_network_bytes} Limits the data volume (in bytes) that is received or transmitted over the network when executing a query. This setting applies to every individual query. Possible values: @@ -902,7 +902,7 @@ Possible values: Default value: 0. -## max_network_bandwidth {#settings-max_network_bandwidth} +## max_network_bandwidth { #settings-max_network_bandwidth} Limits the speed of the data exchange over the network in bytes per second. This setting applies to every query. @@ -913,7 +913,7 @@ Possible values: Default value: 0. -## max_network_bandwidth_for_user {#settings-max_network_bandwidth_for_user} +## max_network_bandwidth_for_user { #settings-max_network_bandwidth_for_user} Limits the speed of the data exchange over the network in bytes per second. This setting applies to all concurrently running queries performed by a single user. @@ -924,7 +924,7 @@ Possible values: Default value: 0. -## max_network_bandwidth_for_all_users {#settings-max_network_bandwidth_for_all_users} +## max_network_bandwidth_for_all_users { #settings-max_network_bandwidth_for_all_users} Limits the speed that data is exchanged at over the network in bytes per second. This setting applies to all concurrently running queries on the server. @@ -935,7 +935,7 @@ Possible values: Default value: 0. -## count_distinct_implementation {#settings-count_distinct_implementation} +## count_distinct_implementation { #settings-count_distinct_implementation} Specifies which of the `uniq*` functions should be used to perform the [COUNT(DISTINCT ...)](../../query_language/agg_functions/reference.md#agg_function-count) construction. @@ -949,7 +949,7 @@ Possible values: Default value: `uniqExact`. -## skip_unavailable_shards {#settings-skip_unavailable_shards} +## skip_unavailable_shards { #settings-skip_unavailable_shards} Enables or disables silently skipping of unavailable shards. @@ -979,13 +979,13 @@ Possible values: Default value: 0. -## optimize_skip_unused_shards {#settings-optimize_skip_unused_shards} +## optimize_skip_unused_shards { #settings-optimize_skip_unused_shards} Enables or disables skipping of unused shards for SELECT queries that has sharding key condition in PREWHERE/WHERE (assumes that the data is distributed by sharding key, otherwise do nothing). Default value: 0 -## force_optimize_skip_unused_shards {#settings-force_optimize_skip_unused_shards} +## force_optimize_skip_unused_shards { #settings-force_optimize_skip_unused_shards} Enables or disables query execution if [`optimize_skip_unused_shards`](#settings-optimize_skip_unused_shards) enabled and skipping of unused shards is not possible. If the skipping is not possible and the setting is enabled exception will be thrown. @@ -997,7 +997,7 @@ Possible values: Default value: 0 -## optimize_throw_if_noop {#setting-optimize_throw_if_noop} +## optimize_throw_if_noop { #setting-optimize_throw_if_noop} Enables or disables throwing an exception if an [OPTIMIZE](../../query_language/misc.md#misc_operations-optimize) query didn't perform a merge. @@ -1011,7 +1011,7 @@ Possible values: Default value: 0. -## distributed_replica_error_half_life {#settings-distributed_replica_error_half_life} +## distributed_replica_error_half_life { #settings-distributed_replica_error_half_life} - Type: seconds - Default value: 60 seconds @@ -1023,7 +1023,7 @@ See also: - [Table engine Distributed](../../operations/table_engines/distributed.md) - [distributed_replica_error_cap](#settings-distributed_replica_error_cap) -## distributed_replica_error_cap {#settings-distributed_replica_error_cap} +## distributed_replica_error_cap { #settings-distributed_replica_error_cap} - Type: unsigned int - Default value: 1000 @@ -1035,7 +1035,7 @@ See also: - [Table engine Distributed](../../operations/table_engines/distributed.md) - [distributed_replica_error_half_life](#settings-distributed_replica_error_half_life) -## distributed_directory_monitor_sleep_time_ms {#distributed_directory_monitor_sleep_time_ms} +## distributed_directory_monitor_sleep_time_ms { #distributed_directory_monitor_sleep_time_ms} Base interval for the [Distributed](../table_engines/distributed.md) table engine to send data. The actual interval grows exponentially in the event of errors. @@ -1045,7 +1045,7 @@ Possible values: Default value: 100 milliseconds. -## distributed_directory_monitor_max_sleep_time_ms {#distributed_directory_monitor_max_sleep_time_ms} +## distributed_directory_monitor_max_sleep_time_ms { #distributed_directory_monitor_max_sleep_time_ms} Maximum interval for the [Distributed](../table_engines/distributed.md) table engine to send data. Limits exponential growth of the interval set in the [distributed_directory_monitor_sleep_time_ms](#distributed_directory_monitor_sleep_time_ms) setting. @@ -1055,7 +1055,7 @@ Possible values: Default value: 30000 milliseconds (30 seconds). -## distributed_directory_monitor_batch_inserts {#distributed_directory_monitor_batch_inserts} +## distributed_directory_monitor_batch_inserts { #distributed_directory_monitor_batch_inserts} Enables/disables sending of inserted data in batches. @@ -1068,7 +1068,7 @@ Possible values: Default value: 0. -## os_thread_priority {#setting-os_thread_priority} +## os_thread_priority { #setting-os_thread_priority} Sets the priority ([nice](https://en.wikipedia.org/wiki/Nice_(Unix))) for threads that execute queries. The OS scheduler considers this priority when choosing the next thread to run on each available CPU core. @@ -1083,7 +1083,7 @@ Lower values mean higher priority. Threads with low `nice` priority values are e Default value: 0. -## query_profiler_real_time_period_ns {#query_profiler_real_time_period_ns} +## query_profiler_real_time_period_ns { #query_profiler_real_time_period_ns} Sets the period for a real clock timer of the [query profiler](../../operations/performance/sampling_query_profiler.md). Real clock timer counts wall-clock time. @@ -1106,7 +1106,7 @@ See also: - System table [trace_log](../system_tables.md#system_tables-trace_log) -## query_profiler_cpu_time_period_ns {#query_profiler_cpu_time_period_ns} +## query_profiler_cpu_time_period_ns { #query_profiler_cpu_time_period_ns} Sets the period for a CPU clock timer of the [query profiler](../../operations/performance/sampling_query_profiler.md). This timer counts only CPU time. @@ -1129,7 +1129,7 @@ See also: - System table [trace_log](../system_tables.md#system_tables-trace_log) -## allow_introspection_functions {#settings-allow_introspection_functions} +## allow_introspection_functions { #settings-allow_introspection_functions} Enables of disables [introspections functions](../../query_language/functions/introspection.md) for query profiling. @@ -1159,7 +1159,7 @@ Enable order-preserving parallel parsing of data formats. Supported only for TSV The minimum chunk size in bytes, which each thread will parse in parallel. -## output_format_avro_codec {#settings-output_format_avro_codec} +## output_format_avro_codec { #settings-output_format_avro_codec} Sets the compression codec used for output Avro file. @@ -1173,7 +1173,7 @@ Possible values: Default value: `snappy` (if available) or `deflate`. -## output_format_avro_sync_interval {#settings-output_format_avro_sync_interval} +## output_format_avro_sync_interval { #settings-output_format_avro_sync_interval} Sets minimum data size (in bytes) between synchronization markers for output Avro file. @@ -1183,7 +1183,7 @@ Possible values: 32 (32 bytes) - 1073741824 (1 GiB) Default value: 32768 (32 KiB) -## format_avro_schema_registry_url {#settings-format_avro_schema_registry_url} +## format_avro_schema_registry_url { #settings-format_avro_schema_registry_url} Sets Confluent Schema Registry URL to use with [AvroConfluent](../../interfaces/formats.md#data-format-avro-confluent) format diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md index 0eb1b8d67f6..763b43b1fbf 100644 --- a/docs/en/operations/system_tables.md +++ b/docs/en/operations/system_tables.md @@ -6,7 +6,7 @@ System tables don't have files with data on the disk or files with metadata. The System tables are read-only. They are located in the 'system' database. -## system.asynchronous_metrics {#system_tables-asynchronous_metrics} +## system.asynchronous_metrics { #system_tables-asynchronous_metrics} Contains metrics that are calculated periodically in the background. For example, the amount of RAM in use. @@ -92,7 +92,7 @@ The `system.columns` table contains the following columns (the column type is sh - `is_in_primary_key` (UInt8) — Flag that indicates whether the column is in the primary key expression. - `is_in_sampling_key` (UInt8) — Flag that indicates whether the column is in the sampling key expression. -## system.contributors {#system_contributors} +## system.contributors { #system_contributors} Contains information about contributors. All constributors in random order. The order is random at query execution time. @@ -138,7 +138,7 @@ This table contains a single String column called 'name' – the name of a datab Each database that the server knows about has a corresponding entry in the table. This system table is used for implementing the `SHOW DATABASES` query. -## system.detached_parts {#system_tables-detached_parts} +## system.detached_parts { #system_tables-detached_parts} Contains information about detached parts of [MergeTree](table_engines/mergetree.md) tables. The `reason` column specifies why the part was detached. For user-detached parts, the reason is empty. Such parts can be attached with [ALTER TABLE ATTACH PARTITION|PART](../query_language/query_language/alter/#alter_attach-partition) command. For the description of other columns, see [system.parts](#system_tables-parts). If part name is invalid, values of some columns may be `NULL`. Such parts can be deleted with [ALTER TABLE DROP DETACHED PART](../query_language/query_language/alter/#alter_drop-detached). @@ -164,7 +164,7 @@ Columns: Note that the amount of memory used by the dictionary is not proportional to the number of items stored in it. So for flat and cached dictionaries, all the memory cells are pre-assigned, regardless of how full the dictionary actually is. -## system.events {#system_tables-events} +## system.events { #system_tables-events} Contains information about the number of events that have occurred in the system. For example, in the table, you can find how many `SELECT` queries were processed since the ClickHouse server started. @@ -243,7 +243,7 @@ Columns: - `bytes_written_uncompressed` (UInt64) — Number of bytes written, uncompressed. - `rows_written` (UInt64) — Number of rows written. -## system.metrics {#system_tables-metrics} +## system.metrics { #system_tables-metrics} Contains metrics which can be calculated instantly, or have a current value. For example, the number of simultaneously processed queries or the current replica delay. This table is always up to date. @@ -283,7 +283,7 @@ SELECT * FROM system.metrics LIMIT 10 - [system.metric_log](#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. - [Monitoring](monitoring.md) — Base concepts of ClickHouse monitoring. -## system.metric_log {#system_tables-metric_log} +## system.metric_log { #system_tables-metric_log} Contains history of metrics values from tables `system.metrics` and `system.events`, periodically flushed to disk. To turn on metrics history collection on `system.metric_log`, create `/etc/clickhouse-server/config.d/metric_log.xml` with following content: @@ -356,7 +356,7 @@ This table contains a single row with a single 'dummy' UInt8 column containing t This table is used if a SELECT query doesn't specify the FROM clause. This is similar to the DUAL table found in other DBMSs. -## system.parts {#system_tables-parts} +## system.parts { #system_tables-parts} Contains information about parts of [MergeTree](table_engines/mergetree.md) tables. @@ -406,7 +406,7 @@ Columns: - `marks_size` (`UInt64`) – Alias for `marks_bytes`. -## system.part_log {#system_tables-part-log} +## system.part_log { #system_tables-part-log} The `system.part_log` table is created only if the [part_log](server_settings/settings.md#server_settings-part-log) server setting is specified. @@ -439,7 +439,7 @@ The `system.part_log` table contains the following columns: The `system.part_log` table is created after the first inserting data to the `MergeTree` table. -## system.processes {#system_tables-processes} +## system.processes { #system_tables-processes} This system table is used for implementing the `SHOW PROCESSLIST` query. @@ -455,7 +455,7 @@ Columns: - `query` (String) – The query text. For `INSERT`, it doesn't include the data to insert. - `query_id` (String) – Query ID, if defined. -## system.text_log {#system_tables-text_log} +## system.text_log { #system_tables-text_log} Contains logging entries. Logging level which goes to this table can be limited with `text_log.level` server setting. @@ -483,7 +483,7 @@ Columns: - `source_line` (`UInt64`) - Source line from which the logging was done. -## system.query_log {#system_tables-query_log} +## system.query_log { #system_tables-query_log} Contains information about execution of queries. For each query, you can see processing start time, duration of processing, error messages and other information. @@ -569,7 +569,7 @@ When the table is deleted manually, it will be automatically created on the fly. You can specify an arbitrary partitioning key for the `system.query_log` table in the [query_log](server_settings/settings.md#server_settings-query-log) server setting (see the `partition_by` parameter). -## system.query_thread_log {#system_tables-query-thread-log} +## system.query_thread_log { #system_tables-query-thread-log} The table contains information about each query execution thread. @@ -634,7 +634,7 @@ When the table is deleted manually, it will be automatically created on the fly. You can specify an arbitrary partitioning key for the `system.query_thread_log` table in the [query_thread_log](server_settings/settings.md#server_settings-query-thread-log) server setting (see the `partition_by` parameter). -## system.trace_log {#system_tables-trace_log} +## system.trace_log { #system_tables-trace_log} Contains stack traces collected by the sampling query profiler. @@ -677,7 +677,7 @@ query_id: acc4d61f-5bd1-4a3e-bc91-2180be37c915 trace: [94222141367858,94222152240175,94222152325351,94222152329944,94222152330796,94222151449980,94222144088167,94222151682763,94222144088167,94222151682763,94222144088167,94222144058283,94222144059248,94222091840750,94222091842302,94222091831228,94222189631488,140509950166747,140509942945935] ``` -## system.replicas {#system_tables-replicas} +## system.replicas { #system_tables-replicas} Contains information and status for replicated tables residing on the local server. This table can be used for monitoring. The table contains a row for every Replicated\* table. @@ -960,7 +960,7 @@ pzxid: 987021252247 path: /clickhouse/tables/01-08/visits/replicas ``` -## system.mutations {#system_tables-mutations} +## system.mutations { #system_tables-mutations} The table contains information about [mutations](../query_language/alter.md#alter-mutations) of MergeTree tables and their progress. Each mutation command is represented by a single row. The table has the following columns: @@ -987,7 +987,7 @@ If there were problems with mutating some parts, the following columns contain a **latest_fail_reason** - The exception message that caused the most recent part mutation failure. -## system.disks {#system_tables-disks} +## system.disks { #system_tables-disks} Contains information about disks defined in the [server configuration](table_engines/mergetree.md#table_engine-mergetree-multiple-volumes_configure). @@ -1000,7 +1000,7 @@ Columns: - `keep_free_space` ([UInt64](../data_types/int_uint.md)) — Amount of disk space that should stay free on disk in bytes. Defined in the `keep_free_space_bytes` parameter of disk configuration. -## system.storage_policies {#system_tables-storage_policies} +## system.storage_policies { #system_tables-storage_policies} Contains information about storage policies and volumes defined in the [server configuration](table_engines/mergetree.md#table_engine-mergetree-multiple-volumes_configure). diff --git a/docs/en/operations/table_engines/collapsingmergetree.md b/docs/en/operations/table_engines/collapsingmergetree.md index 9c50dd959ed..2c1f2b96a3c 100644 --- a/docs/en/operations/table_engines/collapsingmergetree.md +++ b/docs/en/operations/table_engines/collapsingmergetree.md @@ -1,4 +1,4 @@ -# CollapsingMergeTree {#table_engine-collapsingmergetree} +# CollapsingMergeTree { #table_engine-collapsingmergetree} The engine inherits from [MergeTree](mergetree.md) and adds the logic of rows collapsing to data parts merge algorithm. @@ -55,7 +55,7 @@ All of the parameters excepting `sign` have the same meaning as in `MergeTree`. -## Collapsing {#table_engine-collapsingmergetree-collapsing} +## Collapsing { #table_engine-collapsingmergetree-collapsing} ### Data @@ -103,7 +103,7 @@ Why we need 2 rows for each change read in the [Algorithm](#table_engine-collaps 2. Long growing arrays in columns reduce the efficiency of the engine due to load for writing. The more straightforward data, the higher efficiency. 3. The `SELECT` results depend strongly on the consistency of object changes history. Be accurate when preparing data for inserting. You can get unpredictable results in inconsistent data, for example, negative values for non-negative metrics such as session depth. -### Algorithm {#table_engine-collapsingmergetree-collapsing-algorithm} +### Algorithm { #table_engine-collapsingmergetree-collapsing-algorithm} When ClickHouse merges data parts, each group of consecutive rows with the same sorting key (`ORDER BY`) is reduced to not more than two rows, one with `Sign = 1` ("state" row) and another with `Sign = -1` ("cancel" row). In other words, entries collapse. diff --git a/docs/en/operations/table_engines/file.md b/docs/en/operations/table_engines/file.md index d45eb596ea1..8a0745b7635 100644 --- a/docs/en/operations/table_engines/file.md +++ b/docs/en/operations/table_engines/file.md @@ -1,4 +1,4 @@ -# File {#table_engines-file} +# File { #table_engines-file} The File table engine keeps the data in a file in one of the supported [file formats](../../interfaces/formats.md#formats) (TabSeparated, Native, etc.). diff --git a/docs/en/operations/table_engines/generate.md b/docs/en/operations/table_engines/generate.md index fd98b3c9d18..5fc176a5c65 100644 --- a/docs/en/operations/table_engines/generate.md +++ b/docs/en/operations/table_engines/generate.md @@ -1,4 +1,4 @@ -# GenerateRandom {#table_engines-generate} +# GenerateRandom { #table_engines-generate} The GenerateRandom table engine produces random data for given table schema. diff --git a/docs/en/operations/table_engines/graphitemergetree.md b/docs/en/operations/table_engines/graphitemergetree.md index c260acca315..30aedff7979 100644 --- a/docs/en/operations/table_engines/graphitemergetree.md +++ b/docs/en/operations/table_engines/graphitemergetree.md @@ -1,4 +1,4 @@ -# GraphiteMergeTree {#graphitemergetree} +# GraphiteMergeTree { #graphitemergetree} This engine is designed for thinning and aggregating/averaging (rollup) [Graphite](http://graphite.readthedocs.io/en/latest/index.html) data. It may be helpful to developers who want to use ClickHouse as a data store for Graphite. @@ -6,7 +6,7 @@ You can use any ClickHouse table engine to store the Graphite data if you don't The engine inherits properties from [MergeTree](mergetree.md). -## Creating a Table {#creating-table} +## Creating a Table { #creating-table} ```sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] @@ -66,7 +66,7 @@ All of the parameters excepting `config_section` have the same meaning as in `Me - `config_section` — Name of the section in the configuration file, where are the rules of rollup set. -## Rollup configuration {#rollup-configuration} +## Rollup configuration { #rollup-configuration} The settings for rollup are defined by the [graphite_rollup](../server_settings/settings.md#server_settings-graphite_rollup) parameter in the server configuration. The name of the parameter could be any. You can create several configurations and use them for different tables. @@ -77,14 +77,14 @@ required-columns patterns ``` -### Required Columns {#required-columns} +### Required Columns { #required-columns} - `path_column_name` — The name of the column storing the metric name (Graphite sensor). Default value: `Path`. - `time_column_name` — The name of the column storing the time of measuring the metric. Default value: `Time`. - `value_column_name` — The name of the column storing the value of the metric at the time set in `time_column_name`. Default value: `Value`. - `version_column_name` — The name of the column storing the version of the metric. Default value: `Timestamp`. -### Patterns {#patterns} +### Patterns { #patterns} Structure of the `patterns` section: @@ -126,7 +126,7 @@ Fields for `pattern` and `default` sections: - `function` – The name of the aggregating function to apply to data whose age falls within the range `[age, age + precision]`. -### Configuration Example {#configuration-example} +### Configuration Example { #configuration-example} ```xml diff --git a/docs/en/operations/table_engines/hdfs.md b/docs/en/operations/table_engines/hdfs.md index fb254f11112..56a08daefad 100644 --- a/docs/en/operations/table_engines/hdfs.md +++ b/docs/en/operations/table_engines/hdfs.md @@ -1,4 +1,4 @@ -# HDFS {#table_engines-hdfs} +# HDFS { #table_engines-hdfs} This engine provides integration with [Apache Hadoop](https://en.wikipedia.org/wiki/Apache_Hadoop) ecosystem by allowing to manage data on [HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)via ClickHouse. This engine is similar to the [File](file.md) and [URL](url.md) engines, but provides Hadoop-specific features. diff --git a/docs/en/operations/table_engines/index.md b/docs/en/operations/table_engines/index.md index 4ee5eb249e4..f47457012d7 100644 --- a/docs/en/operations/table_engines/index.md +++ b/docs/en/operations/table_engines/index.md @@ -1,4 +1,4 @@ -# Table engines {#table_engines} +# Table engines { #table_engines} The table engine (type of table) determines: @@ -64,7 +64,7 @@ Engines in the family: - [Memory](memory.md) - [Buffer](buffer.md) -## Virtual columns {#table_engines-virtual_columns} +## Virtual columns { #table_engines-virtual_columns} Virtual column is an integral table engine attribute that is defined in the engine source code. diff --git a/docs/en/operations/table_engines/jdbc.md b/docs/en/operations/table_engines/jdbc.md index 2f87cd7794e..7035e6469e2 100644 --- a/docs/en/operations/table_engines/jdbc.md +++ b/docs/en/operations/table_engines/jdbc.md @@ -1,4 +1,4 @@ -# JDBC {#table_engine-jdbc} +# JDBC { #table_engine-jdbc} Allows ClickHouse to connect to external databases via [JDBC](https://en.wikipedia.org/wiki/Java_Database_Connectivity). diff --git a/docs/en/operations/table_engines/join.md b/docs/en/operations/table_engines/join.md index 4b74ac5b2af..927ede7e34a 100644 --- a/docs/en/operations/table_engines/join.md +++ b/docs/en/operations/table_engines/join.md @@ -2,7 +2,7 @@ Prepared data structure for using in [JOIN](../../query_language/select.md#select-join) operations. -## Creating a Table {#creating-a-table} +## Creating a Table { #creating-a-table} ```sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] @@ -77,7 +77,7 @@ You cannot perform a `SELECT` query directly from the table. Instead, use one of - Place the table to the right side in a `JOIN` clause. - Call the [joinGet](../../query_language/functions/other_functions.md#joinget) function, which lets you extract data from the table the same way as from a dictionary. -### Limitations and Settings {#join-limitations-and-settings} +### Limitations and Settings { #join-limitations-and-settings} When creating a table, the following settings are applied: diff --git a/docs/en/operations/table_engines/kafka.md b/docs/en/operations/table_engines/kafka.md index 5731d68a61d..d695ae8c745 100644 --- a/docs/en/operations/table_engines/kafka.md +++ b/docs/en/operations/table_engines/kafka.md @@ -9,7 +9,7 @@ Kafka lets you: - Process streams as they become available. -## Creating a Table {#table_engine-kafka-creating-a-table} +## Creating a Table { #table_engine-kafka-creating-a-table} ```sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] diff --git a/docs/en/operations/table_engines/mergetree.md b/docs/en/operations/table_engines/mergetree.md index e80de9def1c..3cc57327580 100644 --- a/docs/en/operations/table_engines/mergetree.md +++ b/docs/en/operations/table_engines/mergetree.md @@ -1,4 +1,4 @@ -# MergeTree {#table_engines-mergetree} +# MergeTree { #table_engines-mergetree} The `MergeTree` engine and other engines of this family (`*MergeTree`) are the most robust ClickHouse table engines. @@ -26,7 +26,7 @@ Main features: The [Merge](merge.md) engine does not belong to the `*MergeTree` family. -## Creating a Table {#table_engine-mergetree-creating-a-table} +## Creating a Table { #table_engine-mergetree-creating-a-table} ```sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] @@ -50,7 +50,7 @@ For a description of parameters, see the [CREATE query description](../../query_ !!!note "Note" `INDEX` is an experimental feature, see [Data Skipping Indexes](#table_engine-mergetree-data_skipping-indexes). -### Query Clauses {#mergetree-query-clauses} +### Query Clauses { #mergetree-query-clauses} - `ENGINE` — Name and parameters of the engine. `ENGINE = MergeTree()`. The `MergeTree` engine does not have parameters. @@ -134,7 +134,7 @@ MergeTree(EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID) The `MergeTree` engine is configured in the same way as in the example above for the main engine configuration method. -## Data Storage {#mergetree-data-storage} +## Data Storage { #mergetree-data-storage} A table consists of data parts sorted by primary key. @@ -146,7 +146,7 @@ Each data part is logically divided into granules. A granule is the smallest ind The granule size is restricted by the `index_granularity` and `index_granularity_bytes` settings of the table engine. The number of rows in a granule lays in the `[1, index_granularity]` range, depending on the size of the rows. The size of a granule can exceed `index_granularity_bytes` if the size of a single row is greater than the value of the setting. In this case, the size of the granule equals the size of the row. -## Primary Keys and Indexes in Queries {#primary-keys-and-indexes-in-queries} +## Primary Keys and Indexes in Queries { #primary-keys-and-indexes-in-queries} Take the `(CounterID, Date)` primary key as an example. In this case, the sorting and index can be illustrated as follows: @@ -248,7 +248,7 @@ ClickHouse cannot use an index if the values of the primary key in the query par ClickHouse uses this logic not only for days of the month sequences, but for any primary key that represents a partially-monotonic sequence. -### Data Skipping Indexes (Experimental) {#table_engine-mergetree-data_skipping-indexes} +### Data Skipping Indexes (Experimental) { #table_engine-mergetree-data_skipping-indexes} The index declaration is in the columns section of the `CREATE` query. ```sql @@ -368,7 +368,7 @@ For concurrent table access, we use multi-versioning. In other words, when a tab Reading from a table is automatically parallelized. -## TTL for Columns and Tables {#table_engine-mergetree-ttl} +## TTL for Columns and Tables { #table_engine-mergetree-ttl} Determines the lifetime of values. @@ -390,7 +390,7 @@ TTL date_time + INTERVAL 1 MONTH TTL date_time + INTERVAL 15 HOUR ``` -### Column TTL {#mergetree-column-ttl} +### Column TTL { #mergetree-column-ttl} When the values in the column expire, ClickHouse replaces them with the default values for the column data type. If all the column values in the data part expire, ClickHouse deletes this column from the data part in a filesystem. @@ -429,7 +429,7 @@ ALTER TABLE example_table c String TTL d + INTERVAL 1 MONTH; ``` -### Table TTL {#mergetree-table-ttl} +### Table TTL { #mergetree-table-ttl} Table can have an expression for removal of expired rows, and multiple expressions for automatic move of parts between [disks or volumes](#table_engine-mergetree-multiple-volumes). When rows in the table expire, ClickHouse deletes all corresponding rows. For parts moving feature, all rows of a part must satisfy the movement expression criteria. @@ -479,7 +479,7 @@ If you perform the `SELECT` query between merges, you may get expired data. To a [Original article](https://clickhouse.tech/docs/en/operations/table_engines/mergetree/) -## Using Multiple Block Devices for Data Storage {#table_engine-mergetree-multiple-volumes} +## Using Multiple Block Devices for Data Storage { #table_engine-mergetree-multiple-volumes} ### Introduction @@ -496,7 +496,7 @@ Data part is the minimum movable unit for `MergeTree`-engine tables. The data be The names given to the described entities can be found in the system tables, [system.storage_policies](../system_tables.md#system_tables-storage_policies) and [system.disks](../system_tables.md#system_tables-disks). To apply one of the configured storage policies for a table, use the `storage_policy` setting of `MergeTree`-engine family tables. -### Configuration {#table_engine-mergetree-multiple-volumes_configure} +### Configuration { #table_engine-mergetree-multiple-volumes_configure} Disks, volumes and storage policies should be declared inside the `` tag either in the main file `config.xml` or in a distinct file in the `config.d` directory. diff --git a/docs/en/operations/table_engines/odbc.md b/docs/en/operations/table_engines/odbc.md index 8bce818d5ed..3ba3f87fe69 100644 --- a/docs/en/operations/table_engines/odbc.md +++ b/docs/en/operations/table_engines/odbc.md @@ -1,4 +1,4 @@ -# ODBC {#table_engine-odbc} +# ODBC { #table_engine-odbc} Allows ClickHouse to connect to external databases via [ODBC](https://en.wikipedia.org/wiki/Open_Database_Connectivity). diff --git a/docs/en/operations/table_engines/replication.md b/docs/en/operations/table_engines/replication.md index dbf1f491015..a9fbb435b5e 100644 --- a/docs/en/operations/table_engines/replication.md +++ b/docs/en/operations/table_engines/replication.md @@ -1,4 +1,4 @@ -# Data Replication {#table_engines-replication} +# Data Replication { #table_engines-replication} Replication is only supported for tables in the MergeTree family: @@ -73,7 +73,7 @@ You can have any number of replicas of the same data. Yandex.Metrica uses double The system monitors data synchronicity on replicas and is able to recover after a failure. Failover is automatic (for small differences in data) or semi-automatic (when data differs too much, which may indicate a configuration error). -## Creating Replicated Tables {#creating-replicated-tables} +## Creating Replicated Tables { #creating-replicated-tables} The `Replicated` prefix is added to the table engine name. For example:`ReplicatedMergeTree`. diff --git a/docs/en/operations/table_engines/stripelog.md b/docs/en/operations/table_engines/stripelog.md index 59c918defc1..e8c3ea02111 100644 --- a/docs/en/operations/table_engines/stripelog.md +++ b/docs/en/operations/table_engines/stripelog.md @@ -4,7 +4,7 @@ This engine belongs to the family of log engines. See the common properties of l Use this engine in scenarios when you need to write many tables with a small amount of data (less than 1 million rows). -## Creating a Table {#table_engines-stripelog-creating-a-table} +## Creating a Table { #table_engines-stripelog-creating-a-table} ```sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] @@ -17,7 +17,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] See the detailed description of the [CREATE TABLE](../../query_language/create.md#create-table-query) query. -## Writing the Data {#table_engines-stripelog-writing-the-data} +## Writing the Data { #table_engines-stripelog-writing-the-data} The `StripeLog` engine stores all the columns in one file. For each `INSERT` query, ClickHouse appends the data block to the end of a table file, writing columns one by one. @@ -28,11 +28,11 @@ For each table ClickHouse writes the files: The `StripeLog` engine does not support the `ALTER UPDATE` and `ALTER DELETE` operations. -## Reading the Data {#table_engines-stripelog-reading-the-data} +## Reading the Data { #table_engines-stripelog-reading-the-data} The file with marks allows ClickHouse to parallelize the reading of data. This means that a `SELECT` query returns rows in an unpredictable order. Use the `ORDER BY` clause to sort rows. -## Example of Use {#table_engines-stripelog-example-of-use} +## Example of Use { #table_engines-stripelog-example-of-use} Creating a table: diff --git a/docs/en/operations/table_engines/summingmergetree.md b/docs/en/operations/table_engines/summingmergetree.md index bb66d1b7a22..2c2713f083f 100644 --- a/docs/en/operations/table_engines/summingmergetree.md +++ b/docs/en/operations/table_engines/summingmergetree.md @@ -86,7 +86,7 @@ SELECT key, sum(value) FROM summtt GROUP BY key ``` -## Data Processing {#data-processing} +## Data Processing { #data-processing} When data are inserted into a table, they are saved as-is. Clickhouse merges the inserted parts of data periodically and this is when rows with the same primary key are summed and replaced with one for each resulting part of data. diff --git a/docs/en/operations/table_engines/url.md b/docs/en/operations/table_engines/url.md index 60d8b5e1f2a..9666a5cb89f 100644 --- a/docs/en/operations/table_engines/url.md +++ b/docs/en/operations/table_engines/url.md @@ -1,4 +1,4 @@ -# URL(URL, Format) {#table_engines-url} +# URL(URL, Format) { #table_engines-url} Manages data on a remote HTTP/HTTPS server. This engine is similar to the [File](file.md) engine. diff --git a/docs/en/operations/table_engines/versionedcollapsingmergetree.md b/docs/en/operations/table_engines/versionedcollapsingmergetree.md index 4e7747ffae1..097ffb36cd4 100644 --- a/docs/en/operations/table_engines/versionedcollapsingmergetree.md +++ b/docs/en/operations/table_engines/versionedcollapsingmergetree.md @@ -71,7 +71,7 @@ All of the parameters except `sign` and `version` have the same meaning as in `M -## Collapsing {#table_engines_versionedcollapsingmergetree} +## Collapsing { #table_engines_versionedcollapsingmergetree} ### Data @@ -119,7 +119,7 @@ To find out why we need two rows for each change, see [Algorithm](#table_engines 2. Long growing arrays in columns reduce the efficiency of the engine due to the load for writing. The more straightforward the data, the better the efficiency. 3. `SELECT` results depend strongly on the consistency of the history of object changes. Be accurate when preparing data for inserting. You can get unpredictable results with inconsistent data, such as negative values for non-negative metrics like session depth. -### Algorithm {#table_engines-versionedcollapsingmergetree-algorithm} +### Algorithm { #table_engines-versionedcollapsingmergetree-algorithm} When ClickHouse merges data parts, it deletes each pair of rows that have the same primary key and version and different `Sign`. The order of rows does not matter. diff --git a/docs/en/operations/table_engines/view.md b/docs/en/operations/table_engines/view.md index 8c2e1295156..ef0c93446cf 100644 --- a/docs/en/operations/table_engines/view.md +++ b/docs/en/operations/table_engines/view.md @@ -1,4 +1,4 @@ -# View {#table_engines-view} +# View { #table_engines-view} Used for implementing views (for more information, see the `CREATE VIEW query`). It does not store data, but only stores the specified `SELECT` query. When reading from a table, it runs this query (and deletes all unnecessary columns from the query). diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md index 3cb64627f2a..1dfa8c39e4b 100644 --- a/docs/en/operations/tips.md +++ b/docs/en/operations/tips.md @@ -123,9 +123,9 @@ maxClientCnxns=2000 maxSessionTimeout=60000000 # the directory where the snapshot is stored. -dataDir=/opt/zookeeper/{{ cluster['name'] }}/data +dataDir=/opt/zookeeper/{{ '{{' }} cluster['name'] {{ '}}' }}/data # Place the dataLogDir to a separate physical disc for better performance -dataLogDir=/opt/zookeeper/{{ cluster['name'] }}/logs +dataLogDir=/opt/zookeeper/{{ '{{' }} cluster['name'] {{ '}}' }}/logs autopurge.snapRetainCount=10 autopurge.purgeInterval=1 @@ -158,7 +158,7 @@ snapCount=3000000 leaderServes=yes standaloneEnabled=false -dynamicConfigFile=/etc/zookeeper-{{ cluster['name'] }}/conf/zoo.cfg.dynamic +dynamicConfigFile=/etc/zookeeper-{{ '{{' }} cluster['name'] {{ '}}' }}/conf/zoo.cfg.dynamic ``` Java version: @@ -171,7 +171,7 @@ Java HotSpot(TM) 64-Bit Server VM (build 25.25-b02, mixed mode) JVM parameters: ```bash -NAME=zookeeper-{{ cluster['name'] }} +NAME=zookeeper-{{ '{{' }} cluster['name'] {{ '}}' }} ZOOCFGDIR=/etc/$NAME/conf # TODO this is really ugly @@ -190,8 +190,8 @@ JAVA=/usr/bin/java ZOOMAIN="org.apache.zookeeper.server.quorum.QuorumPeerMain" ZOO_LOG4J_PROP="INFO,ROLLINGFILE" JMXLOCALONLY=false -JAVA_OPTS="-Xms{{ cluster.get('xms','128M') }} \ - -Xmx{{ cluster.get('xmx','1G') }} \ +JAVA_OPTS="-Xms{{ '{{' }} cluster.get('xms','128M') {{ '}}' }} \ + -Xmx{{ '{{' }} cluster.get('xmx','1G') {{ '}}' }} \ -Xloggc:/var/log/$NAME/zookeeper-gc.log \ -XX:+UseGCLogFileRotation \ -XX:NumberOfGCLogFiles=16 \ @@ -212,7 +212,7 @@ JAVA_OPTS="-Xms{{ cluster.get('xms','128M') }} \ Salt init: ```text -description "zookeeper-{{ cluster['name'] }} centralized coordination service" +description "zookeeper-{{ '{{' }} cluster['name'] {{ '}}' }} centralized coordination service" start on runlevel [2345] stop on runlevel [!2345] @@ -222,19 +222,19 @@ respawn limit nofile 8192 8192 pre-start script - [ -r "/etc/zookeeper-{{ cluster['name'] }}/conf/environment" ] || exit 0 - . /etc/zookeeper-{{ cluster['name'] }}/conf/environment + [ -r "/etc/zookeeper-{{ '{{' }} cluster['name'] {{ '}}' }}/conf/environment" ] || exit 0 + . /etc/zookeeper-{{ '{{' }} cluster['name'] {{ '}}' }}/conf/environment [ -d $ZOO_LOG_DIR ] || mkdir -p $ZOO_LOG_DIR chown $USER:$GROUP $ZOO_LOG_DIR end script script - . /etc/zookeeper-{{ cluster['name'] }}/conf/environment + . /etc/zookeeper-{{ '{{' }} cluster['name'] {{ '}}' }}/conf/environment [ -r /etc/default/zookeeper ] && . /etc/default/zookeeper if [ -z "$JMXDISABLE" ]; then JAVA_OPTS="$JAVA_OPTS -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.local.only=$JMXLOCALONLY" fi - exec start-stop-daemon --start -c $USER --exec $JAVA --name zookeeper-{{ cluster['name'] }} \ + exec start-stop-daemon --start -c $USER --exec $JAVA --name zookeeper-{{ '{{' }} cluster['name'] {{ '}}' }} \ -- -cp $CLASSPATH $JAVA_OPTS -Dzookeeper.log.dir=${ZOO_LOG_DIR} \ -Dzookeeper.root.logger=${ZOO_LOG4J_PROP} $ZOOMAIN $ZOOCFG end script diff --git a/docs/en/operations/troubleshooting.md b/docs/en/operations/troubleshooting.md index 656a12bad2a..72b23c92b3e 100644 --- a/docs/en/operations/troubleshooting.md +++ b/docs/en/operations/troubleshooting.md @@ -5,7 +5,7 @@ - [Query processing](#troubleshooting-does-not-process-queries) - [Efficiency of query processing](#troubleshooting-too-slow) -## Installation {#troubleshooting-installation-errors} +## Installation { #troubleshooting-installation-errors} ### You Cannot Get Deb Packages from ClickHouse Repository With apt-get @@ -13,7 +13,7 @@ - If you cannot access the repository for any reason, download packages as described in the [Getting started](../getting_started/index.md) article and install them manually using the `sudo dpkg -i ` command. You will also need the `tzdata` package. -## Connecting to the Server {#troubleshooting-accepts-no-connections} +## Connecting to the Server { #troubleshooting-accepts-no-connections} Possible issues: @@ -120,7 +120,7 @@ Check: You might be using the wrong user name or password. -## Query Processing {#troubleshooting-does-not-process-queries} +## Query Processing { #troubleshooting-does-not-process-queries} If ClickHouse is not able to process the query, it sends an error description to the client. In the `clickhouse-client` you get a description of the error in the console. If you are using the HTTP interface, ClickHouse sends the error description in the response body. For example: @@ -133,7 +133,7 @@ If you start `clickhouse-client` with the `stack-trace` parameter, ClickHouse re You might see a message about a broken connection. In this case, you can repeat the query. If the connection breaks every time you perform the query, check the server logs for errors. -## Efficiency of Query Processing {#troubleshooting-too-slow} +## Efficiency of Query Processing { #troubleshooting-too-slow} If you see that ClickHouse is working too slowly, you need to profile the load on the server resources and network for your queries. diff --git a/docs/en/operations/utils/clickhouse-benchmark.md b/docs/en/operations/utils/clickhouse-benchmark.md index 5707158e671..7d7c1983d20 100644 --- a/docs/en/operations/utils/clickhouse-benchmark.md +++ b/docs/en/operations/utils/clickhouse-benchmark.md @@ -25,7 +25,7 @@ Then pass this file to a standard input of `clickhouse-benchmark`. clickhouse-benchmark [keys] < queries_file ``` -## Keys {#clickhouse-benchmark-keys} +## Keys { #clickhouse-benchmark-keys} - `-c N`, `--concurrency=N` — Number of queries that `clickhouse-benchmark` sends simultaneously. Default value: 1. - `-d N`, `--delay=N` — Interval in seconds between intermediate reports (set 0 to disable reports). Default value: 1. @@ -47,7 +47,7 @@ clickhouse-benchmark [keys] < queries_file If you want to apply some [settings](../../operations/settings/index.md) for queries, pass them as a key `--= SETTING_VALUE`. For example, `--max_memory_usage=1048576`. -## Output {#clickhouse-benchmark-output} +## Output { #clickhouse-benchmark-output} By default, `clickhouse-benchmark` reports for each `--delay` interval. @@ -90,13 +90,13 @@ In the report you can find: - Percentiles of queries execution time. -## Comparison mode {#clickhouse-benchmark-comparison-mode} +## Comparison mode { #clickhouse-benchmark-comparison-mode} `clickhouse-benchmark` can compare performances for two running ClickHouse servers. To use the comparison mode, specify endpoints of both servers by two pairs of `--host`, `--port` keys. Keys matched together by position in arguments list, the first `--host` is matched with the first `--port` and so on. `clickhouse-benchmark` establishes connections to both servers, then sends queries. Each query addressed to a randomly selected server. The results are shown for each server separately. -## Example {#clickhouse-benchmark-example} +## Example { #clickhouse-benchmark-example} ```bash $ echo "SELECT * FROM system.numbers LIMIT 10000000 OFFSET 10000000" | clickhouse-benchmark -i 10 diff --git a/docs/en/query_language/agg_functions/combinators.md b/docs/en/query_language/agg_functions/combinators.md index 9cb6c4be4fa..0174dd5c163 100644 --- a/docs/en/query_language/agg_functions/combinators.md +++ b/docs/en/query_language/agg_functions/combinators.md @@ -1,8 +1,8 @@ -# Aggregate function combinators {#aggregate_functions_combinators} +# Aggregate function combinators { #aggregate_functions_combinators} The name of an aggregate function can have a suffix appended to it. This changes the way the aggregate function works. -## -If {#agg-functions-combinator-if} +## -If { #agg-functions-combinator-if} The suffix -If can be appended to the name of any aggregate function. In this case, the aggregate function accepts an extra argument – a condition (Uint8 type). The aggregate function processes only the rows that trigger the condition. If the condition was not triggered even once, it returns a default value (usually zeros or empty strings). @@ -10,7 +10,7 @@ Examples: `sumIf(column, cond)`, `countIf(cond)`, `avgIf(x, cond)`, `quantilesTi With conditional aggregate functions, you can calculate aggregates for several conditions at once, without using subqueries and `JOIN`s. For example, in Yandex.Metrica, conditional aggregate functions are used to implement the segment comparison functionality. -## -Array {#agg-functions-combinator-array} +## -Array { #agg-functions-combinator-array} The -Array suffix can be appended to any aggregate function. In this case, the aggregate function takes arguments of the 'Array(T)' type (arrays) instead of 'T' type arguments. If the aggregate function accepts multiple arguments, this must be arrays of equal lengths. When processing arrays, the aggregate function works like the original aggregate function across all array elements. @@ -20,7 +20,7 @@ Example 2: `uniqArray(arr)` – Counts the number of unique elements in all 'arr -If and -Array can be combined. However, 'Array' must come first, then 'If'. Examples: `uniqArrayIf(arr, cond)`, `quantilesTimingArrayIf(level1, level2)(arr, cond)`. Due to this order, the 'cond' argument won't be an array. -## -State {#agg-functions-combinator-state} +## -State { #agg-functions-combinator-state} If you apply this combinator, the aggregate function doesn't return the resulting value (such as the number of unique values for the [uniq](reference.md#agg_function-uniq) function), but an intermediate state of the aggregation (for `uniq`, this is the hash table for calculating the number of unique values). This is an `AggregateFunction(...)` that can be used for further processing or stored in a table to finish aggregating later. @@ -32,19 +32,19 @@ To work with these states, use: - [-Merge](#aggregate_functions_combinators_merge) combinator. - [-MergeState](#aggregate_functions_combinators_mergestate) combinator. -## -Merge {#aggregate_functions_combinators_merge} +## -Merge { #aggregate_functions_combinators_merge} If you apply this combinator, the aggregate function takes the intermediate aggregation state as an argument, combines the states to finish aggregation, and returns the resulting value. -## -MergeState {#aggregate_functions_combinators_mergestate} +## -MergeState { #aggregate_functions_combinators_mergestate} Merges the intermediate aggregation states in the same way as the -Merge combinator. However, it doesn't return the resulting value, but an intermediate aggregation state, similar to the -State combinator. -## -ForEach {#agg-functions-combinator-foreach} +## -ForEach { #agg-functions-combinator-foreach} Converts an aggregate function for tables into an aggregate function for arrays that aggregates the corresponding array items and returns an array of results. For example, `sumForEach` for the arrays `[1, 2]`, `[3, 4, 5]`and`[6, 7]`returns the result `[10, 13, 5]` after adding together the corresponding array items. -## -OrDefault {#agg-functions-combinator-ordefault} +## -OrDefault { #agg-functions-combinator-ordefault} Fills the default value of the aggregate function's return type if there is nothing to aggregate. @@ -57,7 +57,7 @@ SELECT avg(number), avgOrDefault(number) FROM numbers(0) └─────────────┴──────────────────────┘ ``` -## -OrNull {#agg-functions-combinator-ornull} +## -OrNull { #agg-functions-combinator-ornull} Fills `null` if there is nothing to aggregate. The return column will be nullable. @@ -85,7 +85,7 @@ FROM └────────────────────────────────┘ ``` -## -Resample {#agg_functions-combinator-resample} +## -Resample { #agg_functions-combinator-resample} Lets you divide data into groups, and then separately aggregates the data in those groups. Groups are created by splitting the values from one column into intervals. diff --git a/docs/en/query_language/agg_functions/index.md b/docs/en/query_language/agg_functions/index.md index 613518611f6..dec14812f04 100644 --- a/docs/en/query_language/agg_functions/index.md +++ b/docs/en/query_language/agg_functions/index.md @@ -1,4 +1,4 @@ -# Aggregate functions {#aggregate_functions} +# Aggregate functions { #aggregate_functions} Aggregate functions work in the [normal](http://www.sql-tutorial.com/sql-aggregate-functions-sql-tutorial) way as expected by database experts. diff --git a/docs/en/query_language/agg_functions/parametric_functions.md b/docs/en/query_language/agg_functions/parametric_functions.md index 9a7a356d7c4..7da0bdca8b8 100644 --- a/docs/en/query_language/agg_functions/parametric_functions.md +++ b/docs/en/query_language/agg_functions/parametric_functions.md @@ -1,4 +1,4 @@ -# Parametric aggregate functions {#aggregate_functions_parametric} +# Parametric aggregate functions { #aggregate_functions_parametric} Some aggregate functions can accept not only argument columns (used for compression), but a set of parameters – constants for initialization. The syntax is two pairs of brackets instead of one. The first is for parameters, and the second is for arguments. @@ -71,7 +71,7 @@ FROM In this case, you should remember that you don't know the histogram bin borders. -## sequenceMatch(pattern)(timestamp, cond1, cond2, ...) {#function-sequencematch} +## sequenceMatch(pattern)(timestamp, cond1, cond2, ...) { #function-sequencematch} Checks whether the sequence contains an event chain that matches the pattern. @@ -161,7 +161,7 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM - [sequenceCount](#function-sequencecount) -## sequenceCount(pattern)(time, cond1, cond2, ...) {#function-sequencecount} +## sequenceCount(pattern)(time, cond1, cond2, ...) { #function-sequencecount} Counts the number of event chains that matched the pattern. The function searches event chains that don't overlap. It starts to search for the next chain after the current chain is matched. @@ -219,7 +219,7 @@ SELECT sequenceCount('(?1).*(?2)')(time, number = 1, number = 2) FROM t - [sequenceMatch](#function-sequencematch) -## windowFunnel {#windowfunnel} +## windowFunnel { #windowfunnel} Searches for event chains in a sliding time window and calculates the maximum number of events that occurred from the chain. @@ -309,7 +309,7 @@ Result: └───────┴───┘ ``` -## retention {#retention} +## retention { #retention} The function takes as arguments a set of conditions from 1 to 32 arguments of type `UInt8` that indicate whether a certain condition was met for the event. Any condition can be specified as an argument (as in [WHERE](../../query_language/select.md#select-where)). diff --git a/docs/en/query_language/agg_functions/reference.md b/docs/en/query_language/agg_functions/reference.md index 7c099c26580..eeeead8e69e 100644 --- a/docs/en/query_language/agg_functions/reference.md +++ b/docs/en/query_language/agg_functions/reference.md @@ -1,6 +1,6 @@ # Function Reference -## count {#agg_function-count} +## count { #agg_function-count} Counts the number of rows or not-NULL values. @@ -65,7 +65,7 @@ SELECT count(DISTINCT num) FROM t This example shows that `count(DISTINCT num)` is performed by the `uniqExact` function according to the `count_distinct_implementation` setting value. -## any(x) {#agg_function-any} +## any(x) { #agg_function-any} Selects the first encountered value. The query can be executed in any order and even in a different order each time, so the result of this function is indeterminate. @@ -277,15 +277,15 @@ num 3 ``` -## min(x) {#agg_function-min} +## min(x) { #agg_function-min} Calculates the minimum. -## max(x) {#agg_function-max} +## max(x) { #agg_function-max} Calculates the maximum. -## argMin(arg, val) {#agg_function-argMin} +## argMin(arg, val) { #agg_function-argMin} Calculates the 'arg' value for a minimal 'val' value. If there are several different values of 'arg' for minimal values of 'val', the first of these values encountered is output. @@ -306,12 +306,12 @@ SELECT argMin(user, salary) FROM salary └──────────────────────┘ ``` -## argMax(arg, val) {#agg_function-argMax} +## argMax(arg, val) { #agg_function-argMax} Calculates the 'arg' value for a maximum 'val' value. If there are several different values of 'arg' for maximum values of 'val', the first of these values encountered is output. -## sum(x) {#agg_function-sum} +## sum(x) { #agg_function-sum} Calculates the sum. Only works for numbers. @@ -323,7 +323,7 @@ Computes the sum of the numbers, using the same data type for the result as for Only works for numbers. -## sumMap(key, value) {#agg_functions-summap} +## sumMap(key, value) { #agg_functions-summap} Totals the 'value' array according to the keys specified in the 'key' array. The number of elements in 'key' and 'value' must be the same for each row that is totaled. @@ -451,7 +451,7 @@ The kurtosis of the given distribution. Type — [Float64](../../data_types/floa SELECT kurtSamp(value) FROM series_with_value_column ``` -## timeSeriesGroupSum(uid, timestamp, value) {#agg_function-timeseriesgroupsum} +## timeSeriesGroupSum(uid, timestamp, value) { #agg_function-timeseriesgroupsum} `timeSeriesGroupSum` can aggregate different time series that sample timestamp not alignment. It will use linear interpolation between two sample timestamp and then sum time-series together. @@ -498,7 +498,7 @@ And the result will be: [(2,0.2),(3,0.9),(7,2.1),(8,2.4),(12,3.6),(17,5.1),(18,5.4),(24,7.2),(25,2.5)] ``` -## timeSeriesGroupRateSum(uid, ts, val) {#agg_function-timeseriesgroupratesum} +## timeSeriesGroupRateSum(uid, ts, val) { #agg_function-timeseriesgroupratesum} Similarly timeSeriesGroupRateSum, timeSeriesGroupRateSum will Calculate the rate of time-series and then sum rates together. Also, timestamp should be in ascend order before use this function. @@ -507,13 +507,13 @@ Use this function, the result above case will be: [(2,0),(3,0.1),(7,0.3),(8,0.3),(12,0.3),(17,0.3),(18,0.3),(24,0.3),(25,0.1)] ``` -## avg(x) {#agg_function-avg} +## avg(x) { #agg_function-avg} Calculates the average. Only works for numbers. The result is always Float64. -## uniq {#agg_function-uniq} +## uniq { #agg_function-uniq} Calculates the approximate number of different values of the argument. @@ -549,7 +549,7 @@ We recommend using this function in almost all scenarios. - [uniqHLL12](#agg_function-uniqhll12) - [uniqExact](#agg_function-uniqexact) -## uniqCombined {#agg_function-uniqcombined} +## uniqCombined { #agg_function-uniqcombined} Calculates the approximate number of different argument values. @@ -596,11 +596,11 @@ Compared to the [uniq](#agg_function-uniq) function, the `uniqCombined`: - [uniqHLL12](#agg_function-uniqhll12) - [uniqExact](#agg_function-uniqexact) -## uniqCombined64 {#agg_function-uniqcombined64} +## uniqCombined64 { #agg_function-uniqcombined64} Same as [uniqCombined](#agg_function-uniqcombined), but uses 64-bit hash for all data types. -## uniqHLL12 {#agg_function-uniqhll12} +## uniqHLL12 { #agg_function-uniqhll12} Calculates the approximate number of different argument values, using the [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) algorithm. @@ -636,7 +636,7 @@ We don't recommend using this function. In most cases, use the [uniq](#agg_funct - [uniqExact](#agg_function-uniqexact) -## uniqExact {#agg_function-uniqexact} +## uniqExact { #agg_function-uniqexact} Calculates the exact number of different argument values. @@ -659,7 +659,7 @@ The function takes a variable number of parameters. Parameters can be `Tuple`, ` - [uniqHLL12](#agg_function-uniqhll12) -## groupArray(x), groupArray(max_size)(x) {#agg_function-grouparray} +## groupArray(x), groupArray(max_size)(x) { #agg_function-grouparray} Creates an array of argument values. Values can be added to the array in any (indeterminate) order. @@ -684,7 +684,7 @@ Optional parameters: - The default value for substituting in empty positions. - The length of the resulting array. This allows you to receive arrays of the same size for all the aggregate keys. When using this parameter, the default value must be specified. -## groupArrayMovingSum {#agg_function-grouparraymovingsum} +## groupArrayMovingSum { #agg_function-grouparraymovingsum} Calculates the moving sum of input values. @@ -753,7 +753,7 @@ FROM t └────────────┴─────────────────────────────────┴────────────────────────┘ ``` -## groupArrayMovingAvg {#agg_function-grouparraymovingavg} +## groupArrayMovingAvg { #agg_function-grouparraymovingavg} Calculates the moving average of input values. @@ -831,7 +831,7 @@ Creates an array from different argument values. Memory consumption is the same The second version (with the `max_size` parameter) limits the size of the resulting array to `max_size` elements. For example, `groupUniqArray(1)(x)` is equivalent to `[any(x)]`. -## quantile {#quantile} +## quantile { #quantile} Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. @@ -896,7 +896,7 @@ Result: - [quantiles](#quantiles) -## quantileDeterministic {#quantiledeterministic} +## quantileDeterministic { #quantiledeterministic} Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. @@ -962,7 +962,7 @@ Result: - [quantiles](#quantiles) -## quantileExact {#quantileexact} +## quantileExact { #quantileexact} Exactly computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. @@ -1014,7 +1014,7 @@ Result: - [median](#median) - [quantiles](#quantiles) -## quantileExactWeighted {#quantileexactweighted} +## quantileExactWeighted { #quantileexactweighted} Exactly computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence, taking into account the weight of each element. @@ -1078,7 +1078,7 @@ Result: - [median](#median) - [quantiles](#quantiles) -## quantileTiming {#quantiletiming} +## quantileTiming { #quantiletiming} With the determined precision computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. @@ -1160,7 +1160,7 @@ Result: - [median](#median) - [quantiles](#quantiles) -## quantileTimingWeighted {#quantiletimingweighted} +## quantileTimingWeighted { #quantiletimingweighted} With the determined precision computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence according to the weight of each sequence member. @@ -1243,7 +1243,7 @@ Result: - [quantiles](#quantiles) -## quantileTDigest {#quantiletdigest} +## quantileTDigest { #quantiletdigest} Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using the [t-digest](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf) algorithm. @@ -1298,7 +1298,7 @@ Result: - [median](#median) - [quantiles](#quantiles) -## quantileTDigestWeighted {#quantiletdigestweighted} +## quantileTDigestWeighted { #quantiletdigestweighted} Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using the [t-digest](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf) algorithm. The function takes into account the weight of each sequence member. The maximum error is 1%. Memory consumption is `log(n)`, where `n` is a number of values. @@ -1354,7 +1354,7 @@ Result: - [quantiles](#quantiles) -## median {#median} +## median { #median} The `median*` functions are the aliases for the corresponding `quantile*` functions. They calculate median of a numeric data sample. @@ -1397,7 +1397,7 @@ Result: ``` -## quantiles(level1, level2, ...)(x) {#quantiles} +## quantiles(level1, level2, ...)(x) { #quantiles} All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values. @@ -1462,7 +1462,7 @@ FROM ontime └─────────────────────┘ ``` -## topKWeighted {#topkweighted} +## topKWeighted { #topkweighted} Similar to `topK` but takes one additional argument of integer type - `weight`. Every value is accounted `weight` times for frequency calculation. @@ -1562,12 +1562,12 @@ SELECT arrayReduce('simpleLinearRegression', [0, 1, 2, 3], [3, 4, 5, 6]) └───────────────────────────────────────────────────────────────────┘ ``` -## stochasticLinearRegression {#agg_functions-stochasticlinearregression} +## stochasticLinearRegression { #agg_functions-stochasticlinearregression} This function implements stochastic linear regression. It supports custom parameters for learning rate, L2 regularization coefficient, mini-batch size and has few methods for updating weights ([Adam](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Adam) (used by default), [simple SGD](https://en.wikipedia.org/wiki/Stochastic_gradient_descent), [Momentum](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Momentum), [Nesterov](https://mipt.ru/upload/medialibrary/d7e/41-91.pdf)). -### Parameters {#agg_functions-stochasticlinearregression-parameters} +### Parameters { #agg_functions-stochasticlinearregression-parameters} There are 4 customizable parameters. They are passed to the function sequentially, but there is no need to pass all four - default values will be used, however good model required some parameter tuning. @@ -1581,7 +1581,7 @@ stochasticLinearRegression(1.0, 1.0, 10, 'SGD') 4. `method for updating weights`, they are: `Adam` (by default), `SGD`, `Momentum`, `Nesterov`. `Momentum` and `Nesterov` require little bit more computations and memory, however they happen to be useful in terms of speed of convergance and stability of stochastic gradient methods. -### Usage {#agg_functions-stochasticlinearregression-usage} +### Usage { #agg_functions-stochasticlinearregression-usage} `stochasticLinearRegression` is used in two steps: fitting the model and predicting on new data. In order to fit the model and save its state for later usage we use `-State` combinator, which basically saves the state (model weights, etc). To predict we use function [evalMLMethod](../functions/machine_learning_functions.md#machine_learning_methods-evalmlmethod), which takes a state as an argument as well as features to predict on. @@ -1622,7 +1622,7 @@ The query will return a column of predicted values. Note that first argument of `test_data` is a table like `train_data` but may not contain target value. -### Notes {#agg_functions-stochasticlinearregression-notes} +### Notes { #agg_functions-stochasticlinearregression-notes} 1. To merge two models user may create such query: ```sql @@ -1642,12 +1642,12 @@ The query will return a column of predicted values. Note that first argument of - [Difference between linear and logistic regressions](https://stackoverflow.com/questions/12146914/what-is-the-difference-between-linear-regression-and-logistic-regression) -## stochasticLogisticRegression {#agg_functions-stochasticlogisticregression} +## stochasticLogisticRegression { #agg_functions-stochasticlogisticregression} This function implements stochastic logistic regression. It can be used for binary classification problem, supports the same custom parameters as stochasticLinearRegression and works the same way. -### Parameters {#agg_functions-stochasticlogisticregression-parameters} +### Parameters { #agg_functions-stochasticlogisticregression-parameters} Parameters are exactly the same as in stochasticLinearRegression: `learning rate`, `l2 regularization coefficient`, `mini-batch size`, `method for updating weights`. diff --git a/docs/en/query_language/alter.md b/docs/en/query_language/alter.md index eeb11282f65..4bb40df4c3d 100644 --- a/docs/en/query_language/alter.md +++ b/docs/en/query_language/alter.md @@ -1,4 +1,4 @@ -## ALTER {#query_language_queries_alter} +## ALTER { #query_language_queries_alter} The `ALTER` query is only supported for `*MergeTree` tables, as well as `Merge`and`Distributed`. The query has several variations. @@ -23,7 +23,7 @@ The following actions are supported: These actions are described in detail below. -#### ADD COLUMN {#alter_add-column} +#### ADD COLUMN { #alter_add-column} ```sql ADD COLUMN [IF NOT EXISTS] name [type] [default_expr] [codec] [AFTER name_after] @@ -43,7 +43,7 @@ Example: ALTER TABLE visits ADD COLUMN browser String AFTER user_id ``` -#### DROP COLUMN {#alter_drop-column} +#### DROP COLUMN { #alter_drop-column} ```sql DROP COLUMN [IF EXISTS] name @@ -59,7 +59,7 @@ Example: ALTER TABLE visits DROP COLUMN browser ``` -#### CLEAR COLUMN {#alter_clear-column} +#### CLEAR COLUMN { #alter_clear-column} ```sql CLEAR COLUMN [IF EXISTS] name IN PARTITION partition_name @@ -75,7 +75,7 @@ Example: ALTER TABLE visits CLEAR COLUMN browser IN PARTITION tuple() ``` -#### COMMENT COLUMN {#alter_comment-column} +#### COMMENT COLUMN { #alter_comment-column} ```sql COMMENT COLUMN [IF EXISTS] name 'comment' @@ -93,7 +93,7 @@ Example: ALTER TABLE visits COMMENT COLUMN browser 'The table shows the browser used for accessing the site.' ``` -#### MODIFY COLUMN {#alter_modify-column} +#### MODIFY COLUMN { #alter_modify-column} ```sql MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [TTL] @@ -191,7 +191,7 @@ Constraint check *will not be executed* on existing data if it was added. All changes on replicated tables are broadcasting to ZooKeeper so will be applied on other replicas. -### Manipulations With Partitions and Parts {#alter_manipulations-with-partitions} +### Manipulations With Partitions and Parts { #alter_manipulations-with-partitions} The following operations with [partitions](../operations/table_engines/custom_partitioning_key.md) are available: @@ -207,7 +207,7 @@ The following operations with [partitions](../operations/table_engines/custom_pa - [FREEZE PARTITION](#alter_freeze-partition) – Creates a backup of a partition. - [FETCH PARTITION](#alter_fetch-partition) – Downloads a partition from another server. - [MOVE PARTITION|PART](#alter_move-partition) – Move partition/data part to another disk or volume. -#### DETACH PARTITION {#alter_detach-partition} +#### DETACH PARTITION { #alter_detach-partition} ```sql ALTER TABLE table_name DETACH PARTITION partition_expr @@ -227,7 +227,7 @@ After the query is executed, you can do whatever you want with the data in the ` This query is replicated – it moves the data to the `detached` directory on all replicas. Note that you can execute this query only on a leader replica. To find out if a replica is a leader, perform the `SELECT` query to the [system.replicas](../operations/system_tables.md#system_tables-replicas) table. Alternatively, it is easier to make a `DETACH` query on all replicas - all the replicas throw an exception, except the leader replica. -#### DROP PARTITION {#alter_drop-partition} +#### DROP PARTITION { #alter_drop-partition} ```sql ALTER TABLE table_name DROP PARTITION partition_expr @@ -239,7 +239,7 @@ Read about setting the partition expression in a section [How to specify the par The query is replicated – it deletes data on all replicas. -#### DROP DETACHED PARTITION|PART {#alter_drop-detached} +#### DROP DETACHED PARTITION|PART { #alter_drop-detached} ```sql ALTER TABLE table_name DROP DETACHED PARTITION|PART partition_expr @@ -249,7 +249,7 @@ Removes the specified part or all parts of the specified partition from `detache Read more about setting the partition expression in a section [How to specify the partition expression](#alter-how-to-specify-part-expr). -#### ATTACH PARTITION|PART {#alter_attach-partition} +#### ATTACH PARTITION|PART { #alter_attach-partition} ``` sql ALTER TABLE table_name ATTACH PARTITION|PART partition_expr @@ -268,7 +268,7 @@ This query is replicated. The replica-initiator checks whether there is data in So you can put data to the `detached` directory on one replica, and use the `ALTER ... ATTACH` query to add it to the table on all replicas. -#### ATTACH PARTITION FROM {#alter_attach-partition-from} +#### ATTACH PARTITION FROM { #alter_attach-partition-from} ```sql ALTER TABLE table2 ATTACH PARTITION partition_expr FROM table1 @@ -281,7 +281,7 @@ For the query to run successfully, the following conditions must be met: - Both tables must have the same structure. - Both tables must have the same partition key. -#### REPLACE PARTITION {#alter_replace-partition} +#### REPLACE PARTITION { #alter_replace-partition} ```sql ALTER TABLE table2 REPLACE PARTITION partition_expr FROM table1 @@ -294,7 +294,7 @@ For the query to run successfully, the following conditions must be met: - Both tables must have the same structure. - Both tables must have the same partition key. -#### MOVE PARTITION TO TABLE {#alter_move_to_table-partition} +#### MOVE PARTITION TO TABLE { #alter_move_to_table-partition} ``` sql ALTER TABLE table_source MOVE PARTITION partition_expr TO TABLE table_dest @@ -311,7 +311,7 @@ For the query to run successfully, the following conditions must be met: -#### CLEAR COLUMN IN PARTITION {#alter_clear-column-partition} +#### CLEAR COLUMN IN PARTITION { #alter_clear-column-partition} ```sql ALTER TABLE table_name CLEAR COLUMN column_name IN PARTITION partition_expr @@ -325,7 +325,7 @@ Example: ALTER TABLE visits CLEAR COLUMN hour in PARTITION 201902 ``` -#### FREEZE PARTITION {#alter_freeze-partition} +#### FREEZE PARTITION { #alter_freeze-partition} ```sql ALTER TABLE table_name FREEZE [PARTITION partition_expr] @@ -364,7 +364,7 @@ Restoring from a backup doesn't require stopping the server. For more information about backups and restoring data, see the [Data Backup](../operations/backup.md) section. -#### CLEAR INDEX IN PARTITION {#alter_clear-index-partition} +#### CLEAR INDEX IN PARTITION { #alter_clear-index-partition} ```sql ALTER TABLE table_name CLEAR INDEX index_name IN PARTITION partition_expr @@ -372,7 +372,7 @@ ALTER TABLE table_name CLEAR INDEX index_name IN PARTITION partition_expr The query works similar to `CLEAR COLUMN`, but it resets an index instead of a column data. -#### FETCH PARTITION {#alter_fetch-partition} +#### FETCH PARTITION { #alter_fetch-partition} ```sql ALTER TABLE table_name FETCH PARTITION partition_expr FROM 'path-in-zookeeper' @@ -400,7 +400,7 @@ Before downloading, the system checks if the partition exists and the table stru Although the query is called `ALTER TABLE`, it does not change the table structure and does not immediately change the data available in the table. -#### MOVE PARTITION|PART {#alter_move-partition} +#### MOVE PARTITION|PART { #alter_move-partition} Moves partitions or data parts to another volume or disk for `MergeTree`-engine tables. See [Using Multiple Block Devices for Data Storage](../operations/table_engines/mergetree.md#table_engine-mergetree-multiple-volumes). @@ -421,7 +421,7 @@ ALTER TABLE hits MOVE PART '20190301_14343_16206_438' TO VOLUME 'slow' ALTER TABLE hits MOVE PARTITION '2019-09-01' TO DISK 'fast_ssd' ``` -#### How To Set Partition Expression {#alter-how-to-specify-part-expr} +#### How To Set Partition Expression { #alter-how-to-specify-part-expr} You can specify the partition expression in `ALTER ... PARTITION` queries in different ways: @@ -458,7 +458,7 @@ For non-replicatable tables, all `ALTER` queries are performed synchronously. Fo For `ALTER ... ATTACH|DETACH|DROP` queries, you can use the `replication_alter_partitions_sync` setting to set up waiting. Possible values: `0` – do not wait; `1` – only wait for own execution (default); `2` – wait for all. -### Mutations {#alter-mutations} +### Mutations { #alter-mutations} Mutations are an ALTER query variant that allows changing or deleting rows in a table. In contrast to standard `UPDATE` and `DELETE` queries that are intended for point data changes, mutations are intended for heavy operations that change a lot of rows in a table. Supported for the `MergeTree` family of table engines including the engines with replication support. diff --git a/docs/en/query_language/create.md b/docs/en/query_language/create.md index f399eb27a4f..f29fd7fe8ae 100644 --- a/docs/en/query_language/create.md +++ b/docs/en/query_language/create.md @@ -1,6 +1,6 @@ # CREATE Queries -## CREATE DATABASE {#query_language-create-database} +## CREATE DATABASE { #query_language-create-database} Creates database. @@ -29,7 +29,7 @@ CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] [ENGINE = engine(.. By default, ClickHouse uses its own [database engine](../database_engines/index.md). -## CREATE TABLE {#create-table-query} +## CREATE TABLE { #create-table-query} The `CREATE TABLE` query can have several forms. @@ -70,7 +70,7 @@ In all cases, if `IF NOT EXISTS` is specified, the query won't return an error i There can be other clauses after the `ENGINE` clause in the query. See detailed documentation on how to create tables in the descriptions of [table engines](../operations/table_engines/index.md#table_engines). -### Default Values {#create-default-values} +### Default Values { #create-default-values} The column description can specify an expression for a default value, in one of the following ways:`DEFAULT expr`, `MATERIALIZED expr`, `ALIAS expr`. Example: `URLDomain String DEFAULT domain(URL)`. @@ -105,7 +105,7 @@ If you add a new column to a table but later change its default expression, the It is not possible to set default values for elements in nested data structures. -### Constraints {#constraints} +### Constraints { #constraints} Along with columns descriptions constraints could be defined: @@ -127,7 +127,7 @@ Adding large amount of constraints can negatively affect performance of big `INS Defines storage time for values. Can be specified only for MergeTree-family tables. For the detailed description, see [TTL for columns and tables](../operations/table_engines/mergetree.md#table_engine-mergetree-ttl). -### Column Compression Codecs {#codecs} +### Column Compression Codecs { #codecs} By default, ClickHouse applies the `lz4` compression method. For `MergeTree`-engine family you can change the default compression method in the [compression](../operations/server_settings/settings.md#server-settings-compression) section of a server configuration. You can also define the compression method for each individual column in the `CREATE TABLE` query. @@ -158,7 +158,7 @@ Compression is supported for the following table engines: ClickHouse supports common purpose codecs and specialized codecs. -#### Specialized Codecs {#create-query-specialized-codecs} +#### Specialized Codecs { #create-query-specialized-codecs} These codecs are designed to make compression more effective by using specific features of data. Some of these codecs don't compress data themself. Instead, they prepare the data for a common purpose codec, which compresses it better than without this preparation. @@ -180,7 +180,7 @@ CREATE TABLE codec_example ENGINE = MergeTree() ``` -#### Common purpose codecs {#create-query-common-purpose-codecs} +#### Common purpose codecs { #create-query-common-purpose-codecs} Codecs: @@ -275,7 +275,7 @@ Views look the same as normal tables. For example, they are listed in the result There isn't a separate query for deleting views. To delete a view, use `DROP TABLE`. -## CREATE DICTIONARY {#create-dictionary-query} +## CREATE DICTIONARY { #create-dictionary-query} ```sql CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name diff --git a/docs/en/query_language/dicts/external_dicts.md b/docs/en/query_language/dicts/external_dicts.md index 46733654f68..2c6f89be9e5 100644 --- a/docs/en/query_language/dicts/external_dicts.md +++ b/docs/en/query_language/dicts/external_dicts.md @@ -1,4 +1,4 @@ -# External Dictionaries {#dicts-external_dicts} +# External Dictionaries { #dicts-external_dicts} You can add your own dictionaries from various data sources. The data source for a dictionary can be a local text or executable file, an HTTP(s) resource, or another DBMS. For more information, see "[Sources for external dictionaries](external_dicts_dict_sources.md)". @@ -37,7 +37,7 @@ You can [configure](external_dicts_dict.md) any number of dictionaries in the sa !!! attention "Attention" You can convert values for a small dictionary by describing it in a `SELECT` query (see the [transform](../functions/other_functions.md) function). This functionality is not related to external dictionaries. -## See also {#ext-dicts-see-also} +## See also { #ext-dicts-see-also} - [Configuring an External Dictionary](external_dicts_dict.md) - [Storing Dictionaries in Memory](external_dicts_dict_layout.md) diff --git a/docs/en/query_language/dicts/external_dicts_dict.md b/docs/en/query_language/dicts/external_dicts_dict.md index fba76199784..02b33d5c353 100644 --- a/docs/en/query_language/dicts/external_dicts_dict.md +++ b/docs/en/query_language/dicts/external_dicts_dict.md @@ -1,4 +1,4 @@ -# Configuring an External Dictionary {#dicts-external_dicts_dict} +# Configuring an External Dictionary { #dicts-external_dicts_dict} If dictionary is configured using xml file, than dictionary configuration has the following structure: diff --git a/docs/en/query_language/dicts/external_dicts_dict_layout.md b/docs/en/query_language/dicts/external_dicts_dict_layout.md index 87034a5aa14..fbb978b1859 100644 --- a/docs/en/query_language/dicts/external_dicts_dict_layout.md +++ b/docs/en/query_language/dicts/external_dicts_dict_layout.md @@ -1,4 +1,4 @@ -# Storing Dictionaries in Memory {#dicts-external_dicts_dict_layout} +# Storing Dictionaries in Memory { #dicts-external_dicts_dict_layout} There are a variety of ways to store dictionaries in memory. @@ -79,7 +79,7 @@ or LAYOUT(FLAT()) ``` -### hashed {#dicts-external_dicts_dict_layout-hashed} +### hashed { #dicts-external_dicts_dict_layout-hashed} The dictionary is completely stored in memory in the form of a hash table. The dictionary can contain any number of elements with any identifiers In practice, the number of keys can reach tens of millions of items. @@ -99,7 +99,7 @@ or LAYOUT(HASHED()) ``` -### sparse_hashed {#dicts-external_dicts_dict_layout-sparse_hashed} +### sparse_hashed { #dicts-external_dicts_dict_layout-sparse_hashed} Similar to `hashed`, but uses less memory in favor more CPU usage. diff --git a/docs/en/query_language/dicts/external_dicts_dict_sources.md b/docs/en/query_language/dicts/external_dicts_dict_sources.md index b52b07b4256..091452f7484 100644 --- a/docs/en/query_language/dicts/external_dicts_dict_sources.md +++ b/docs/en/query_language/dicts/external_dicts_dict_sources.md @@ -1,4 +1,4 @@ -# Sources of External Dictionaries {#dicts-external_dicts_dict_sources} +# Sources of External Dictionaries { #dicts-external_dicts_dict_sources} An external dictionary can be connected from many different sources. @@ -43,7 +43,7 @@ Types of sources (`source_type`): - [Redis](#dicts-external_dicts_dict_sources-redis) -## Local File {#dicts-external_dicts_dict_sources-local_file} +## Local File { #dicts-external_dicts_dict_sources-local_file} Example of settings: @@ -68,7 +68,7 @@ Setting fields: - `format` – The file format. All the formats described in "[Formats](../../interfaces/formats.md#formats)" are supported. -## Executable File {#dicts-external_dicts_dict_sources-executable} +## Executable File { #dicts-external_dicts_dict_sources-executable} Working with executable files depends on [how the dictionary is stored in memory](external_dicts_dict_layout.md). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request to the executable file's STDIN. Otherwise, ClickHouse starts executable file and treats its output as dictionary data. @@ -95,7 +95,7 @@ Setting fields: - `format` – The file format. All the formats described in "[Formats](../../interfaces/formats.md#formats)" are supported. -## HTTP(s) {#dicts-external_dicts_dict_sources-http} +## HTTP(s) { #dicts-external_dicts_dict_sources-http} Working with an HTTP(s) server depends on [how the dictionary is stored in memory](external_dicts_dict_layout.md). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request via the `POST` method. @@ -146,7 +146,7 @@ Setting fields: - `value` – Value set for a specific identifiant name. -## ODBC {#dicts-external_dicts_dict_sources-odbc} +## ODBC { #dicts-external_dicts_dict_sources-odbc} You can use this method to connect any database that has an ODBC driver. @@ -390,7 +390,7 @@ LIFETIME(MIN 300 MAX 360) ## DBMS -### MySQL {#dicts-external_dicts_dict_sources-mysql} +### MySQL { #dicts-external_dicts_dict_sources-mysql} Example of settings: @@ -482,7 +482,7 @@ SOURCE(MYSQL( ``` -### ClickHouse {#dicts-external_dicts_dict_sources-clickhouse} +### ClickHouse { #dicts-external_dicts_dict_sources-clickhouse} Example of settings: @@ -526,7 +526,7 @@ Setting fields: - `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](external_dicts_dict_lifetime.md). -### MongoDB {#dicts-external_dicts_dict_sources-mongodb} +### MongoDB { #dicts-external_dicts_dict_sources-mongodb} Example of settings: @@ -566,7 +566,7 @@ Setting fields: - `collection` – Name of the collection. -### Redis {#dicts-external_dicts_dict_sources-redis} +### Redis { #dicts-external_dicts_dict_sources-redis} Example of settings: diff --git a/docs/en/query_language/dicts/external_dicts_dict_structure.md b/docs/en/query_language/dicts/external_dicts_dict_structure.md index f5a0b0b6017..925affd4a70 100644 --- a/docs/en/query_language/dicts/external_dicts_dict_structure.md +++ b/docs/en/query_language/dicts/external_dicts_dict_structure.md @@ -43,7 +43,7 @@ Attributes are described in the query body: - `AttrName AttrType` — [Data column](external_dicts_dict_structure.md#ext_dict_structure-attributes). There can be a multiple number of attributes. -## Key {#ext_dict_structure-key} +## Key { #ext_dict_structure-key} ClickHouse supports the following types of keys: @@ -56,7 +56,7 @@ An xml structure can contain either `` or ``. DDL-query must contain si You must not describe key as an attribute. -### Numeric Key {#ext_dict-numeric-key} +### Numeric Key { #ext_dict-numeric-key} Type: `UInt64`. @@ -126,7 +126,7 @@ PRIMARY KEY field1, field2 For a query to the `dictGet*` function, a tuple is passed as the key. Example: `dictGetString('dict_name', 'attr_name', tuple('string for field1', num_for_field2))`. -## Attributes {#ext_dict_structure-attributes} +## Attributes { #ext_dict_structure-attributes} Configuration example: diff --git a/docs/en/query_language/dicts/internal_dicts.md b/docs/en/query_language/dicts/internal_dicts.md index 7df915dd998..d0a97ca5922 100644 --- a/docs/en/query_language/dicts/internal_dicts.md +++ b/docs/en/query_language/dicts/internal_dicts.md @@ -1,4 +1,4 @@ -# Internal dictionaries {#internal_dicts} +# Internal dictionaries { #internal_dicts} ClickHouse contains a built-in feature for working with a geobase. diff --git a/docs/en/query_language/functions/arithmetic_functions.md b/docs/en/query_language/functions/arithmetic_functions.md index 55b62b404ca..6de44cfb75f 100644 --- a/docs/en/query_language/functions/arithmetic_functions.md +++ b/docs/en/query_language/functions/arithmetic_functions.md @@ -63,7 +63,7 @@ Differs from 'modulo' in that it returns zero when the divisor is zero. Calculates a number with the reverse sign. The result is always signed. -## abs(a) {#arithm_func-abs} +## abs(a) { #arithm_func-abs} Calculates the absolute value of the number (a). That is, if a < 0, it returns -a. For unsigned types it doesn't do anything. For signed integer types, it returns an unsigned number. diff --git a/docs/en/query_language/functions/array_functions.md b/docs/en/query_language/functions/array_functions.md index 6c33b018c25..36849cdc455 100644 --- a/docs/en/query_language/functions/array_functions.md +++ b/docs/en/query_language/functions/array_functions.md @@ -1,18 +1,18 @@ # Functions for working with arrays -## empty {#function-empty} +## empty { #function-empty} Returns 1 for an empty array, or 0 for a non-empty array. The result type is UInt8. The function also works for strings. -## notEmpty {#function-notempty} +## notEmpty { #function-notempty} Returns 0 for an empty array, or 1 for a non-empty array. The result type is UInt8. The function also works for strings. -## length {#array_functions-length} +## length { #array_functions-length} Returns the number of items in the array. The result type is UInt64. @@ -202,7 +202,7 @@ SELECT countEqual([1, 2, NULL, NULL], NULL) └──────────────────────────────────────┘ ``` -## arrayEnumerate(arr) {#array_functions-arrayenumerate} +## arrayEnumerate(arr) { #array_functions-arrayenumerate} Returns the array \[1, 2, 3, ..., length (arr) \] @@ -455,7 +455,7 @@ SELECT arraySlice([1, 2, NULL, 4, 5], 2, 3) AS res Array elements set to `NULL` are handled as normal values. -## arraySort(\[func,\] arr, ...) {#array_functions-sort} +## arraySort(\[func,\] arr, ...) { #array_functions-sort} Sorts the elements of the `arr` array in ascending order. If the `func` function is specified, sorting order is determined by the result of the `func` function applied to the elements of the array. If `func` accepts multiple arguments, the `arraySort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arraySort` description. @@ -548,7 +548,7 @@ SELECT arraySort((x, y) -> -y, [0, 1, 2], [1, 2, 3]) as res; !!! note To improve sorting efficiency, the [Schwartzian transform](https://en.wikipedia.org/wiki/Schwartzian_transform) is used. -## arrayReverseSort([func,] arr, ...) {#array_functions-reverse-sort} +## arrayReverseSort([func,] arr, ...) { #array_functions-reverse-sort} Sorts the elements of the `arr` array in descending order. If the `func` function is specified, `arr` is sorted according to the result of the `func` function applied to the elements of the array, and then the sorted array is reversed. If `func` accepts multiple arguments, the `arrayReverseSort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arrayReverseSort` description. @@ -648,11 +648,11 @@ If multiple arguments are passed, it counts the number of different tuples of el If you want to get a list of unique items in an array, you can use arrayReduce('groupUniqArray', arr). -## arrayJoin(arr) {#array_functions-join} +## arrayJoin(arr) { #array_functions-join} A special function. See the section ["ArrayJoin function"](array_join.md#functions_arrayjoin). -## arrayDifference {#arraydifference} +## arrayDifference { #arraydifference} Calculates the difference between adjacent array elements. Returns an array where the first element will be 0, the second is the difference between `a[1] - a[0]`, etc. The type of elements in the resulting array is determined by the type inference rules for subtraction (e.g. `UInt8` - `UInt8` = `Int16`). @@ -704,7 +704,7 @@ Result: └────────────────────────────────────────────┘ ``` -## arrayDistinct {#arraydistinct} +## arrayDistinct { #arraydistinct} Takes an array, returns an array containing the distinct elements only. @@ -738,7 +738,7 @@ Result: └────────────────────────────────┘ ``` -## arrayEnumerateDense(arr) {#array_functions-arrayenumeratedense} +## arrayEnumerateDense(arr) { #array_functions-arrayenumeratedense} Returns an array of the same size as the source array, indicating where each element first appears in the source array. @@ -754,7 +754,7 @@ SELECT arrayEnumerateDense([10, 20, 10, 30]) └───────────────────────────────────────┘ ``` -## arrayIntersect(arr) {#array_functions-arrayintersect} +## arrayIntersect(arr) { #array_functions-arrayintersect} Takes multiple arrays, returns an array with elements that are present in all source arrays. Elements order in the resulting array is the same as in the first array. @@ -772,7 +772,7 @@ SELECT └──────────────┴───────────┘ ``` -## arrayReduce(agg_func, arr1, ...) {#array_functions-arrayreduce} +## arrayReduce(agg_func, arr1, ...) { #array_functions-arrayreduce} Applies an aggregate function to array elements and returns its result. The name of the aggregation function is passed as a string in single quotes `'max'`, `'sum'`. When using parametric aggregate functions, the parameter is indicated after the function name in parentheses `'uniqUpTo(6)'`. @@ -814,7 +814,7 @@ SELECT arrayReduce('uniqUpTo(3)', [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) └─────────────────────────────────────────────────────────────┘ ``` -## arrayReverse(arr) {#array_functions-arrayreverse} +## arrayReverse(arr) { #array_functions-arrayreverse} Returns an array of the same size as the original array containing the elements in reverse order. @@ -830,11 +830,11 @@ SELECT arrayReverse([1, 2, 3]) └─────────────────────────┘ ``` -## reverse(arr) {#array_functions-reverse} +## reverse(arr) { #array_functions-reverse} Synonym for ["arrayReverse"](#array_functions-arrayreverse) -## arrayFlatten {#arrayflatten} +## arrayFlatten { #arrayflatten} Converts an array of arrays to a flat array. @@ -869,7 +869,7 @@ SELECT flatten([[[1]], [[2], [3]]]) └─────────────────────────────────────────────┘ ``` -## arrayCompact {#arraycompact} +## arrayCompact { #arraycompact} Removes consecutive duplicate elements from an array. The order of result values is determined by the order in the source array. @@ -905,7 +905,7 @@ Result: └────────────────────────────────────────────┘ ``` -## arrayZip {#arrayzip} +## arrayZip { #arrayzip} Combine multiple Array type columns into one Array[Tuple(...)] column diff --git a/docs/en/query_language/functions/array_join.md b/docs/en/query_language/functions/array_join.md index 5db4b0e601e..ee040cf7b8f 100644 --- a/docs/en/query_language/functions/array_join.md +++ b/docs/en/query_language/functions/array_join.md @@ -1,4 +1,4 @@ -# arrayJoin function {#functions_arrayjoin} +# arrayJoin function { #functions_arrayjoin} This is a very unusual function. diff --git a/docs/en/query_language/functions/bit_functions.md b/docs/en/query_language/functions/bit_functions.md index 5774375a546..b184f35fdf4 100644 --- a/docs/en/query_language/functions/bit_functions.md +++ b/docs/en/query_language/functions/bit_functions.md @@ -20,7 +20,7 @@ The result type is an integer with bits equal to the maximum bits of its argumen ## bitRotateRight(a, b) -## bitTest {#bittest} +## bitTest { #bittest} Takes any integer and converts it into [binary form](https://en.wikipedia.org/wiki/Binary_number), returns the value of a bit at specified position. The countdown starts from 0 from the right to the left. @@ -75,7 +75,7 @@ Result: └────────────────┘ ``` -## bitTestAll {#bittestall} +## bitTestAll { #bittestall} Returns result of [logical conjuction](https://en.wikipedia.org/wiki/Logical_conjunction) (AND operator) of all bits at given positions. The countdown starts from 0 from the right to the left. @@ -140,7 +140,7 @@ Result: └───────────────────────────────┘ ``` -## bitTestAny {#bittestany} +## bitTestAny { #bittestany} Returns result of [logical disjunction](https://en.wikipedia.org/wiki/Logical_disjunction) (OR operator) of all bits at given positions. The countdown starts from 0 from the right to the left. diff --git a/docs/en/query_language/functions/bitmap_functions.md b/docs/en/query_language/functions/bitmap_functions.md index 6e6edc6aa08..23bc4b51ffe 100644 --- a/docs/en/query_language/functions/bitmap_functions.md +++ b/docs/en/query_language/functions/bitmap_functions.md @@ -9,7 +9,7 @@ RoaringBitmap is wrapped into a data structure while actual storage of Bitmap ob For more information on RoaringBitmap, see: [CRoaring](https://github.com/RoaringBitmap/CRoaring). -## bitmapBuild {#bitmap_functions-bitmapbuild} +## bitmapBuild { #bitmap_functions-bitmapbuild} Build a bitmap from unsigned integer array. @@ -56,7 +56,7 @@ SELECT bitmapToArray(bitmapBuild([1, 2, 3, 4, 5])) AS res └─────────────┘ ``` -## bitmapSubsetInRange {#bitmap_functions-bitmapsubsetinrange} +## bitmapSubsetInRange { #bitmap_functions-bitmapsubsetinrange} Return subset in specified range (not include the range_end). @@ -82,7 +82,7 @@ SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11, └───────────────────┘ ``` -## bitmapSubsetLimit {#bitmapsubsetlimit} +## bitmapSubsetLimit { #bitmapsubsetlimit} Creates a subset of bitmap with n elements taken between `range_start` and `cardinality_limit`. @@ -120,7 +120,7 @@ Result: └───────────────────────────┘ ``` -## bitmapContains {#bitmap_functions-bitmapcontains} +## bitmapContains { #bitmap_functions-bitmapcontains} Checks whether the bitmap contains an element. diff --git a/docs/en/query_language/functions/comparison_functions.md b/docs/en/query_language/functions/comparison_functions.md index 0c4bf8324f6..7fa30f72443 100644 --- a/docs/en/query_language/functions/comparison_functions.md +++ b/docs/en/query_language/functions/comparison_functions.md @@ -17,17 +17,17 @@ Strings are compared by bytes. A shorter string is smaller than all strings that Note. Up until version 1.1.54134, signed and unsigned numbers were compared the same way as in C++. In other words, you could get an incorrect result in cases like SELECT 9223372036854775807 > -1. This behavior changed in version 1.1.54134 and is now mathematically correct. -## equals, a = b and a == b operator {#function-equals} +## equals, a = b and a == b operator { #function-equals} -## notEquals, a ! operator= b and a `<>` b {#function-notequals} +## notEquals, a ! operator= b and a `<>` b { #function-notequals} -## less, `< operator` {#function-less} +## less, `< operator` { #function-less} -## greater, `> operator` {#function-greater} +## greater, `> operator` { #function-greater} -## lessOrEquals, `<= operator` {#function-lessorequals} +## lessOrEquals, `<= operator` { #function-lessorequals} -## greaterOrEquals, `>= operator` {#function-greaterorequals} +## greaterOrEquals, `>= operator` { #function-greaterorequals} [Original article](https://clickhouse.tech/docs/en/query_language/functions/comparison_functions/) diff --git a/docs/en/query_language/functions/conditional_functions.md b/docs/en/query_language/functions/conditional_functions.md index 6822d40bb21..301e0013bd5 100644 --- a/docs/en/query_language/functions/conditional_functions.md +++ b/docs/en/query_language/functions/conditional_functions.md @@ -1,6 +1,6 @@ # Conditional functions -## if {#if} +## if { #if} Controls conditional branching. Unlike most systems, ClickHouse always evaluate both expressions `then` and `else`. @@ -88,7 +88,7 @@ WHERE isNotNull(left) AND isNotNull(right) ``` Note: `NULL` values are not used in this example, check [NULL values in conditionals](#null-values-in-conditionals) section. -## Ternary Operator {#ternary-operator} +## Ternary Operator { #ternary-operator} It works same as `if` function. diff --git a/docs/en/query_language/functions/date_time_functions.md b/docs/en/query_language/functions/date_time_functions.md index 3a3adba38a0..84366798521 100644 --- a/docs/en/query_language/functions/date_time_functions.md +++ b/docs/en/query_language/functions/date_time_functions.md @@ -62,7 +62,7 @@ Converts a date with time to a UInt8 number containing the number of the minute Converts a date with time to a UInt8 number containing the number of the second in the minute (0-59). Leap seconds are not accounted for. -## toUnixTimestamp {#to_unix_timestamp} +## toUnixTimestamp { #to_unix_timestamp} For DateTime argument: converts value to its internal numeric representation (Unix Timestamp). For String argument: parse datetime from string according to the timezone (optional second argument, server timezone is used by default) and returns the corresponding unix timestamp. @@ -342,7 +342,7 @@ SELECT └──────────────────────────┴───────────────────────────────┘ ``` -## dateDiff {#datediff} +## dateDiff { #datediff} Returns the difference between two Date or DateTime values. @@ -401,7 +401,7 @@ For a time interval starting at 'StartTime' and continuing for 'Duration' second For example, `timeSlots(toDateTime('2012-01-01 12:20:00'), 600) = [toDateTime('2012-01-01 12:00:00'), toDateTime('2012-01-01 12:30:00')]`. This is necessary for searching for pageviews in the corresponding session. -## formatDateTime(Time, Format\[, Timezone\]) {#formatdatetime} +## formatDateTime(Time, Format\[, Timezone\]) { #formatdatetime} Function formats a Time according given Format string. N.B.: Format is a constant expression, e.g. you can not have multiple formats for single result column. diff --git a/docs/en/query_language/functions/encoding_functions.md b/docs/en/query_language/functions/encoding_functions.md index 9adb69e2cf6..ed55be7488b 100644 --- a/docs/en/query_language/functions/encoding_functions.md +++ b/docs/en/query_language/functions/encoding_functions.md @@ -1,6 +1,6 @@ # Encoding functions -## char {#char} +## char { #char} Returns the string with the length as the number of passed arguments and each byte has the value of corresponding argument. Accepts multiple arguments of numeric types. If the value of argument is out of range of UInt8 data type, it is converted to UInt8 with possible rounding and overflow. @@ -60,7 +60,7 @@ Result: └───────┘ ``` -## hex {#hex} +## hex { #hex} Returns a string containing the argument's hexadecimal representation. diff --git a/docs/en/query_language/functions/ext_dict_functions.md b/docs/en/query_language/functions/ext_dict_functions.md index 26e05721b6a..a0cf648327a 100644 --- a/docs/en/query_language/functions/ext_dict_functions.md +++ b/docs/en/query_language/functions/ext_dict_functions.md @@ -1,4 +1,4 @@ -# Functions for Working with External Dictionaries {#ext_dict_functions} +# Functions for Working with External Dictionaries { #ext_dict_functions} For information on connecting and configuring external dictionaries, see [External dictionaries](../dicts/external_dicts.md). @@ -111,7 +111,7 @@ dictHas('dict_name', id_expr) Type: `UInt8`. -## dictGetHierarchy {#dictgethierarchy} +## dictGetHierarchy { #dictgethierarchy} Creates an array, containing all the parents of a key in the [hierarchical dictionary](../dicts/external_dicts_dict_hierarchical.md). @@ -154,7 +154,7 @@ dictIsIn('dict_name', child_id_expr, ancestor_id_expr) Type: `UInt8`. -## Other functions {#ext_dict_functions-other} +## Other functions { #ext_dict_functions-other} ClickHouse supports specialized functions that convert dictionary attribute values to a specific data type regardless of the dictionary configuration. diff --git a/docs/en/query_language/functions/geo.md b/docs/en/query_language/functions/geo.md index 81e43c93ad3..df5fc87cafa 100644 --- a/docs/en/query_language/functions/geo.md +++ b/docs/en/query_language/functions/geo.md @@ -152,7 +152,7 @@ SELECT geohashDecode('ezs42') AS res └─────────────────────────────────┘ ``` -## geoToH3 {#geotoh3} +## geoToH3 { #geotoh3} Returns [H3](https://uber.github.io/h3/#/documentation/overview/introduction) point index `(lon, lat)` with specified resolution. diff --git a/docs/en/query_language/functions/hash_functions.md b/docs/en/query_language/functions/hash_functions.md index 5c51bf33b20..9bbf86db66a 100644 --- a/docs/en/query_language/functions/hash_functions.md +++ b/docs/en/query_language/functions/hash_functions.md @@ -2,7 +2,7 @@ Hash functions can be used for the deterministic pseudo-random shuffling of elements. -## halfMD5 {#hash_functions-halfmd5} +## halfMD5 { #hash_functions-halfmd5} [Interprets](../../query_language/functions/type_conversion_functions.md#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the [MD5](https://en.wikipedia.org/wiki/MD5) hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the resulting string, and interprets them as `UInt64` in big-endian byte order. @@ -32,13 +32,13 @@ SELECT halfMD5(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00') └────────────────────┴────────┘ ``` -## MD5 {#hash_functions-md5} +## MD5 { #hash_functions-md5} Calculates the MD5 from a string and returns the resulting set of bytes as FixedString(16). If you don't need MD5 in particular, but you need a decent cryptographic 128-bit hash, use the 'sipHash128' function instead. If you want to get the same result as output by the md5sum utility, use lower(hex(MD5(s))). -## sipHash64 {#hash_functions-siphash64} +## sipHash64 { #hash_functions-siphash64} Produces a 64-bit [SipHash](https://131002.net/siphash/) hash value. @@ -74,7 +74,7 @@ SELECT sipHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00 └──────────────────────┴────────┘ ``` -## sipHash128 {#hash_functions-siphash128} +## sipHash128 { #hash_functions-siphash128} Calculates SipHash from a string. Accepts a String-type argument. Returns FixedString(16). @@ -175,7 +175,7 @@ SELECT farmHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:0 └──────────────────────┴────────┘ ``` -## javaHash {#hash_functions-javahash} +## javaHash { #hash_functions-javahash} Calculates [JavaHash](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452) from a string. This hash function is neither fast nor having a good quality. The only reason to use it is when this algorithm is already used in another system and you have to calculate exactly the same result. @@ -205,7 +205,7 @@ Result: └───────────────────────────┘ ``` -## javaHashUTF16LE {#javahashutf16le} +## javaHashUTF16LE { #javahashutf16le} Calculates [JavaHash](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452) from a string, assuming it contains bytes representing a string in UTF-16LE encoding. @@ -241,7 +241,7 @@ Result: └──────────────────────────────────────────────────────────────┘ ``` -## hiveHash {#hash_functions-hivehash} +## hiveHash { #hash_functions-hivehash} Calculates `HiveHash` from a string. @@ -391,7 +391,7 @@ SELECT murmurHash3_128('example_string') AS MurmurHash3, toTypeName(MurmurHash3) └──────────────────┴─────────────────┘ ``` -## xxHash32, xxHash64 {#hash_functions-xxhash32} +## xxHash32, xxHash64 { #hash_functions-xxhash32} Calculates `xxHash` from a string. It is proposed in two flavors, 32 and 64 bits. diff --git a/docs/en/query_language/functions/higher_order_functions.md b/docs/en/query_language/functions/higher_order_functions.md index ef5fd0c398a..d5d9f025a11 100644 --- a/docs/en/query_language/functions/higher_order_functions.md +++ b/docs/en/query_language/functions/higher_order_functions.md @@ -23,7 +23,7 @@ A lambda function can't be omitted for the following functions: - [arrayFirst](#higher_order_functions-array-first) - [arrayFirstIndex](#higher_order_functions-array-first-index) -### arrayMap(func, arr1, ...) {#higher_order_functions-array-map} +### arrayMap(func, arr1, ...) { #higher_order_functions-array-map} Returns an array obtained from the original application of the `func` function to each element in the `arr` array. @@ -50,7 +50,7 @@ SELECT arrayMap((x, y) -> (x, y), [1, 2, 3], [4, 5, 6]) AS res Note that the first argument (lambda function) can't be omitted in the `arrayMap` function. -### arrayFilter(func, arr1, ...) {#higher_order_functions-array-filter} +### arrayFilter(func, arr1, ...) { #higher_order_functions-array-filter} Returns an array containing only the elements in `arr1` for which `func` returns something other than 0. @@ -83,7 +83,7 @@ SELECT Note that the first argument (lambda function) can't be omitted in the `arrayFilter` function. -### arrayFill(func, arr1, ...) {#higher_order_functions-array-fill} +### arrayFill(func, arr1, ...) { #higher_order_functions-array-fill} Scan through `arr1` from the first element to the last element and replace `arr1[i]` by `arr1[i - 1]` if `func` returns 0. The first element of `arr1` will not be replaced. @@ -101,7 +101,7 @@ SELECT arrayFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, Note that the first argument (lambda function) can't be omitted in the `arrayFill` function. -### arrayReverseFill(func, arr1, ...) {#higher_order_functions-array-reverse-fill} +### arrayReverseFill(func, arr1, ...) { #higher_order_functions-array-reverse-fill} Scan through `arr1` from the last element to the first element and replace `arr1[i]` by `arr1[i + 1]` if `func` returns 0. The last element of `arr1` will not be replaced. @@ -119,7 +119,7 @@ SELECT arrayReverseFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, Note that the first argument (lambda function) can't be omitted in the `arrayReverseFill` function. -### arraySplit(func, arr1, ...) {#higher_order_functions-array-split} +### arraySplit(func, arr1, ...) { #higher_order_functions-array-split} Split `arr1` into multiple arrays. When `func` returns something other than 0, the array will be split on the left hand side of the element. The array will not be split before the first element. @@ -137,7 +137,7 @@ SELECT arraySplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res Note that the first argument (lambda function) can't be omitted in the `arraySplit` function. -### arrayReverseSplit(func, arr1, ...) {#higher_order_functions-array-reverse-split} +### arrayReverseSplit(func, arr1, ...) { #higher_order_functions-array-reverse-split} Split `arr1` into multiple arrays. When `func` returns something other than 0, the array will be split on the right hand side of the element. The array will not be split after the last element. @@ -155,7 +155,7 @@ SELECT arrayReverseSplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res Note that the first argument (lambda function) can't be omitted in the `arraySplit` function. -### arrayCount(\[func,\] arr1, ...) {#higher_order_functions-array-count} +### arrayCount(\[func,\] arr1, ...) { #higher_order_functions-array-count} Returns the number of elements in the arr array for which func returns something other than 0. If 'func' is not specified, it returns the number of non-zero elements in the array. @@ -167,17 +167,17 @@ Returns 1 if there is at least one element in 'arr' for which 'func' returns som Returns 1 if 'func' returns something other than 0 for all the elements in 'arr'. Otherwise, it returns 0. -### arraySum(\[func,\] arr1, ...) {#higher_order_functions-array-sum} +### arraySum(\[func,\] arr1, ...) { #higher_order_functions-array-sum} Returns the sum of the 'func' values. If the function is omitted, it just returns the sum of the array elements. -### arrayFirst(func, arr1, ...) {#higher_order_functions-array-first} +### arrayFirst(func, arr1, ...) { #higher_order_functions-array-first} Returns the first element in the 'arr1' array for which 'func' returns something other than 0. Note that the first argument (lambda function) can't be omitted in the `arrayFirst` function. -### arrayFirstIndex(func, arr1, ...) {#higher_order_functions-array-first-index} +### arrayFirstIndex(func, arr1, ...) { #higher_order_functions-array-first-index} Returns the index of the first element in the 'arr1' array for which 'func' returns something other than 0. diff --git a/docs/en/query_language/functions/in_functions.md b/docs/en/query_language/functions/in_functions.md index 287d401a0a5..c9473820ae8 100644 --- a/docs/en/query_language/functions/in_functions.md +++ b/docs/en/query_language/functions/in_functions.md @@ -1,6 +1,6 @@ # Functions for implementing the IN operator -## in, notIn, globalIn, globalNotIn {#in-functions} +## in, notIn, globalIn, globalNotIn { #in-functions} See the section [IN operators](../select.md#select-in-operators). diff --git a/docs/en/query_language/functions/introspection.md b/docs/en/query_language/functions/introspection.md index 520c89feaeb..76f6fade169 100644 --- a/docs/en/query_language/functions/introspection.md +++ b/docs/en/query_language/functions/introspection.md @@ -14,7 +14,7 @@ For proper operation of introspection functions: ClickHouse saves profiler reports to the [trace_log](../../operations/system_tables.md#system_tables-trace_log) system table. Make sure the table and profiler are configured properly. -## addressToLine {#addresstoline} +## addressToLine { #addresstoline} Converts virtual memory address inside ClickHouse server process to the filename and the line number in ClickHouse source code. @@ -104,7 +104,7 @@ trace_source_code_lines: /lib/x86_64-linux-gnu/libpthread-2.27.so /build/glibc-OTsEL5/glibc-2.27/misc/../sysdeps/unix/sysv/linux/x86_64/clone.S:97 ``` -## addressToSymbol {#addresstosymbol} +## addressToSymbol { #addresstosymbol} Converts virtual memory address inside ClickHouse server process to the symbol from ClickHouse object files. @@ -201,7 +201,7 @@ start_thread clone ``` -## demangle {#demangle} +## demangle { #demangle} Converts a symbol that you can get using the [addressToSymbol](#addresstosymbol) function to the C++ function name. diff --git a/docs/en/query_language/functions/machine_learning_functions.md b/docs/en/query_language/functions/machine_learning_functions.md index 5d9983f015f..498f6ff9994 100644 --- a/docs/en/query_language/functions/machine_learning_functions.md +++ b/docs/en/query_language/functions/machine_learning_functions.md @@ -1,6 +1,6 @@ # Machine learning functions -## evalMLMethod (prediction) {#machine_learning_methods-evalmlmethod} +## evalMLMethod (prediction) { #machine_learning_methods-evalmlmethod} Prediction using fitted regression models uses `evalMLMethod` function. See link in `linearRegression`. diff --git a/docs/en/query_language/functions/other_functions.md b/docs/en/query_language/functions/other_functions.md index 24b6906b57e..ad0d4ff33fd 100644 --- a/docs/en/query_language/functions/other_functions.md +++ b/docs/en/query_language/functions/other_functions.md @@ -4,7 +4,7 @@ Returns a string with the name of the host that this function was performed on. For distributed processing, this is the name of the remote server host, if the function is performed on a remote server. -## FQDN {#fqdn} +## FQDN { #fqdn} Returns the fully qualified domain name. @@ -109,7 +109,7 @@ Returns a string containing the type name of the passed argument. If `NULL` is passed to the function as input, then it returns the `Nullable(Nothing)` type, which corresponds to an internal `NULL` representation in ClickHouse. -## blockSize() {#function-blocksize} +## blockSize() { #function-blocksize} Gets the size of the block. In ClickHouse, queries are always run on blocks (sets of column parts). This function allows getting the size of the block that you called it for. @@ -137,7 +137,7 @@ Sleeps 'seconds' seconds on each row. You can specify an integer or a floating-p Returns the name of the current database. You can use this function in table engine parameters in a CREATE TABLE query where you need to specify the database. -## currentUser() {#other_function-currentuser} +## currentUser() { #other_function-currentuser} Returns the login of current user. Login of user, that initiated query, will be returned in case distibuted query. @@ -178,7 +178,7 @@ Accepts Float32 and Float64 and returns UInt8 equal to 1 if the argument is not Accepts Float32 and Float64 and returns UInt8 equal to 1 if the argument is infinite, otherwise 0. Note that 0 is returned for a NaN. -## ifNotFinite {#ifnotfinite} +## ifNotFinite { #ifnotfinite} Checks whether floating point value is finite. @@ -225,7 +225,7 @@ Accepts constant strings: database name, table name, and column name. Returns a The function throws an exception if the table does not exist. For elements in a nested data structure, the function checks for the existence of a column. For the nested data structure itself, the function returns 0. -## bar {#function-bar} +## bar { #function-bar} Allows building a unicode-art diagram. @@ -408,7 +408,7 @@ Returns the timezone of the server. Returns the sequence number of the data block where the row is located. -## rowNumberInBlock {#function-rownumberinblock} +## rowNumberInBlock { #function-rownumberinblock} Returns the ordinal number of the row in the data block. Different data blocks are always recalculated. @@ -416,7 +416,7 @@ Returns the ordinal number of the row in the data block. Different data blocks a Returns the ordinal number of the row in the data block. This function only considers the affected data blocks. -## neighbor {#neighbor} +## neighbor { #neighbor} The window function that provides access to a row at a specified offset which comes before or after the current row of a given column. @@ -527,7 +527,7 @@ Result: └────────────┴───────┴───────────┴────────────────┘ ``` -## runningDifference(x) {#other_functions-runningdifference} +## runningDifference(x) { #other_functions-runningdifference} Calculates the difference between successive row values ​​in the data block. Returns 0 for the first row and the difference from the previous row for each subsequent row. @@ -772,7 +772,7 @@ SELECT defaultValueOfArgumentType( CAST(1 AS Nullable(Int8) ) ) ``` -## replicate {#other_functions-replicate} +## replicate { #other_functions-replicate} Creates an array with a single value. @@ -809,7 +809,7 @@ Result: └───────────────────────────────┘ ``` -## filesystemAvailable {#filesystemavailable} +## filesystemAvailable { #filesystemavailable} Returns amount of remaining space on the filesystem where the files of the databases located. It is always smaller than total free space ([filesystemFree](#filesystemfree)) because some space is reserved for OS. @@ -841,7 +841,7 @@ Result: └─────────────────┴────────┘ ``` -## filesystemFree {#filesystemfree} +## filesystemFree { #filesystemfree} Returns total amount of the free space on the filesystem where the files of the databases located. See also `filesystemAvailable` @@ -873,7 +873,7 @@ Result: └────────────┴────────┘ ``` -## filesystemCapacity {#filesystemcapacity} +## filesystemCapacity { #filesystemcapacity} Returns the capacity of the filesystem in bytes. For evaluation, the [path](../../operations/server_settings/settings.md#server_settings-path) to the data directory must be configured. @@ -905,17 +905,17 @@ Result: └───────────┴────────┘ ``` -## finalizeAggregation {#function-finalizeaggregation} +## finalizeAggregation { #function-finalizeaggregation} Takes state of aggregate function. Returns result of aggregation (finalized state). -## runningAccumulate {#function-runningaccumulate} +## runningAccumulate { #function-runningaccumulate} Takes the states of the aggregate function and returns a column with values, are the result of the accumulation of these states for a set of block lines, from the first to the current line. For example, takes state of aggregate function (example runningAccumulate(uniqState(UserID))), and for each row of block, return result of aggregate function on merge of states of all previous rows and current row. So, result of function depends on partition of data to blocks and on order of data in block. -## joinGet {#joinget} +## joinGet { #joinget} The function lets you extract data from the table the same way as from a [dictionary](../../query_language/dicts/index.md). @@ -978,7 +978,7 @@ Result: └──────────────────────────────────────────────────┘ ``` -## modelEvaluate(model_name, ...) {#function-modelevaluate} +## modelEvaluate(model_name, ...) { #function-modelevaluate} Evaluate external model. Accepts a model name and model arguments. Returns Float64. @@ -995,7 +995,7 @@ SELECT throwIf(number = 3, 'Too many') FROM numbers(10); Code: 395. DB::Exception: Received from localhost:9000. DB::Exception: Too many. ``` -## identity {#identity} +## identity { #identity} Returns the same value that was used as its argument. Used for debugging and testing, allows to cancel using index, and get the query performance of a full scan. When query is analyzed for possible use of index, the analyzer doesn't look inside `identity` functions. @@ -1021,7 +1021,7 @@ Result: └──────────────┘ ``` -## randomPrintableASCII {#randomascii} +## randomPrintableASCII { #randomascii} Generates a string with a random set of [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters) printable characters. diff --git a/docs/en/query_language/functions/rounding_functions.md b/docs/en/query_language/functions/rounding_functions.md index 740ae3112fb..6e8bb1635d5 100644 --- a/docs/en/query_language/functions/rounding_functions.md +++ b/docs/en/query_language/functions/rounding_functions.md @@ -20,7 +20,7 @@ Returns the smallest round number that is greater than or equal to `x`. In every Returns the round number with largest absolute value that has an absolute value less than or equal to `x`'s. In every other way, it is the same as the 'floor' function (see above). -## round(x\[, N\]) {#rounding_functions-round} +## round(x\[, N\]) { #rounding_functions-round} Rounds a value to a specified number of decimal places. @@ -82,7 +82,7 @@ round(3.65, 1) = 3.6 - [roundBankers](#roundbankers) -## roundBankers {#roundbankers} +## roundBankers { #roundbankers} Rounds a number to a specified decimal position. diff --git a/docs/en/query_language/functions/string_functions.md b/docs/en/query_language/functions/string_functions.md index 18ad0c38093..0fc305363ba 100644 --- a/docs/en/query_language/functions/string_functions.md +++ b/docs/en/query_language/functions/string_functions.md @@ -1,6 +1,6 @@ # Functions for working with strings -## empty {#string_functions-empty} +## empty { #string_functions-empty} Returns 1 for an empty string or 0 for a non-empty string. The result type is UInt8. @@ -85,7 +85,7 @@ SELECT toValidUTF8('\x61\xF0\x80\x80\x80b') └───────────────────────┘ ``` -## repeat {#repeat} +## repeat { #repeat} Repeats a string as many times as specified and concatenates the replicated values as a single string. @@ -132,7 +132,7 @@ Reverses a sequence of Unicode code points, assuming that the string contains a ## format(pattern, s0, s1, ...) -Formatting constant pattern with the string listed in the arguments. `pattern` is a simplified Python format pattern. Format string contains "replacement fields" surrounded by curly braces `{}`. Anything that is not contained in braces is considered literal text, which is copied unchanged to the output. If you need to include a brace character in the literal text, it can be escaped by doubling: `{{` and `}}`. Field names can be numbers (starting from zero) or empty (then they are treated as consequence numbers). +Formatting constant pattern with the string listed in the arguments. `pattern` is a simplified Python format pattern. Format string contains "replacement fields" surrounded by curly braces `{}`. Anything that is not contained in braces is considered literal text, which is copied unchanged to the output. If you need to include a brace character in the literal text, it can be escaped by doubling: `{{ '{{' }}` and `{{ '}}' }}`. Field names can be numbers (starting from zero) or empty (then they are treated as consequence numbers). ```sql SELECT format('{1} {0} {1}', 'World', 'Hello') @@ -151,7 +151,7 @@ SELECT format('{} {}', 'Hello', 'World') └───────────────────────────────────┘ ``` -## concat {#concat} +## concat { #concat} Concatenates the strings listed in the arguments, without a separator. @@ -187,7 +187,7 @@ Result: └─────────────────────────────┘ ``` -## concatAssumeInjective {#concatassumeinjective} +## concatAssumeInjective { #concatassumeinjective} Same as [concat](#concat), the difference is that you need to ensure that `concat(s1, s2, ...) → sn` is injective, it will be used for optimization of GROUP BY. @@ -253,11 +253,11 @@ Decode base64-encoded string 's' into original string. In case of failure raises ## tryBase64Decode(s) Similar to base64Decode, but in case of error an empty string would be returned. -## endsWith(s, suffix) {#function-endswith} +## endsWith(s, suffix) { #function-endswith} Returns whether to end with the specified suffix. Returns 1 if the string ends with the specified suffix, otherwise it returns 0. -## startsWith(str, prefix) {#function-startswith} +## startsWith(str, prefix) { #function-startswith} Returns 1 whether string starts with the specified prefix, otherwise it returns 0. @@ -286,7 +286,7 @@ Result: └───────────────────────────────────┘ ``` -## trim {#trim} +## trim { #trim} Removes all specified characters from the start or end of a string. By default removes all consecutive occurrences of common whitespace (ASCII character 32) from both ends of a string. @@ -324,7 +324,7 @@ Result: └───────────────────────────────────────────────┘ ``` -## trimLeft {#trimleft} +## trimLeft { #trimleft} Removes all consecutive occurrences of common whitespace (ASCII character 32) from the beginning of a string. It doesn't remove other kinds of whitespace characters (tab, no-break space, etc.). @@ -362,7 +362,7 @@ Result: └─────────────────────────────────────┘ ``` -## trimRight {#trimright} +## trimRight { #trimright} Removes all consecutive occurrences of common whitespace (ASCII character 32) from the end of a string. It doesn't remove other kinds of whitespace characters (tab, no-break space, etc.). @@ -400,7 +400,7 @@ Result: └──────────────────────────────────────┘ ``` -## trimBoth {#trimboth} +## trimBoth { #trimboth} Removes all consecutive occurrences of common whitespace (ASCII character 32) from both ends of a string. It doesn't remove other kinds of whitespace characters (tab, no-break space, etc.). diff --git a/docs/en/query_language/functions/string_search_functions.md b/docs/en/query_language/functions/string_search_functions.md index b3db647a1b5..724d58ddf86 100644 --- a/docs/en/query_language/functions/string_search_functions.md +++ b/docs/en/query_language/functions/string_search_functions.md @@ -2,7 +2,7 @@ The search is case-sensitive by default in all these functions. There are separate variants for case insensitive search. -## position(haystack, needle), locate(haystack, needle) {#position} +## position(haystack, needle), locate(haystack, needle) { #position} Returns the position (in bytes) of the found substring in the string, starting from 1. @@ -64,7 +64,7 @@ Result: └───────────────────────────────┘ ``` -## positionCaseInsensitive {#positioncaseinsensitive} +## positionCaseInsensitive { #positioncaseinsensitive} The same as [position](#position) returns the position (in bytes) of the found substring in the string, starting from 1. Use the function for a case-insensitive search. @@ -104,7 +104,7 @@ Result: └───────────────────────────────────────────────────┘ ``` -## positionUTF8 {#positionutf8} +## positionUTF8 { #positionutf8} Returns the position (in Unicode points) of the found substring in the string, starting from 1. @@ -178,7 +178,7 @@ Result: └────────────────────────────────────────┘ ``` -## positionCaseInsensitiveUTF8 {#positioncaseinsensitiveutf8} +## positionCaseInsensitiveUTF8 { #positioncaseinsensitiveutf8} The same as [positionUTF8](#positionutf8), but is case-insensitive. Returns the position (in Unicode points) of the found substring in the string, starting from 1. @@ -218,7 +218,7 @@ Result: └────────────────────────────────────────────────────┘ ``` -## multiSearchAllPositions {#multiSearchAllPositions} +## multiSearchAllPositions { #multiSearchAllPositions} The same as [position](string_search_functions.md#position) but returns `Array` of positions (in bytes) of the found corresponding substrings in the string. Positions are indexed starting from 1. @@ -259,11 +259,11 @@ Result: └───────────────────────────────────────────────────────────────────┘ ``` -## multiSearchAllPositionsUTF8 {#multiSearchAllPositionsUTF8} +## multiSearchAllPositionsUTF8 { #multiSearchAllPositionsUTF8} See `multiSearchAllPositions`. -## multiSearchFirstPosition(haystack, [needle1, needle2, ..., needlen]) {#multiSearchFirstPosition} +## multiSearchFirstPosition(haystack, [needle1, needle2, ..., needlen]) { #multiSearchFirstPosition} The same as `position` but returns the leftmost offset of the string `haystack` that is matched to some of the needles. @@ -275,7 +275,7 @@ Returns the index `i` (starting from 1) of the leftmost found needlei For a case-insensitive search or/and in UTF-8 format use functions `multiSearchFirstIndexCaseInsensitive, multiSearchFirstIndexUTF8, multiSearchFirstIndexCaseInsensitiveUTF8`. -## multiSearchAny(haystack, [needle1, needle2, ..., needlen]) {#function-multisearchany} +## multiSearchAny(haystack, [needle1, needle2, ..., needlen]) { #function-multisearchany} Returns 1, if at least one string needlei matches the string `haystack` and 0 otherwise. @@ -336,7 +336,7 @@ Extracts a fragment of a string using a regular expression. If 'haystack' doesn' Extracts all the fragments of a string using a regular expression. If 'haystack' doesn't match the 'pattern' regex, an empty string is returned. Returns an array of strings consisting of all matches to the regex. In general, the behavior is the same as the 'extract' function (it takes the first subpattern, or the entire expression if there isn't a subpattern). -## like(haystack, pattern), haystack LIKE pattern operator {#function-like} +## like(haystack, pattern), haystack LIKE pattern operator { #function-like} Checks whether a string matches a simple regular expression. The regular expression can contain the metasymbols `%` and `_`. @@ -350,7 +350,7 @@ Use the backslash (`\`) for escaping metasymbols. See the note on escaping in th For regular expressions like `%needle%`, the code is more optimal and works as fast as the `position` function. For other regular expressions, the code is the same as for the 'match' function. -## notLike(haystack, pattern), haystack NOT LIKE pattern operator {#function-notlike} +## notLike(haystack, pattern), haystack NOT LIKE pattern operator { #function-notlike} The same thing as 'like', but negative. diff --git a/docs/en/query_language/functions/type_conversion_functions.md b/docs/en/query_language/functions/type_conversion_functions.md index ffb757c17a2..a4ce9467cec 100644 --- a/docs/en/query_language/functions/type_conversion_functions.md +++ b/docs/en/query_language/functions/type_conversion_functions.md @@ -1,6 +1,6 @@ # Type Conversion Functions -## Common Issues of Numeric Conversions {#numeric-conversion-issues} +## Common Issues of Numeric Conversions { #numeric-conversion-issues} When you convert a value from one to another data type, you should remember that in common case, it is an unsafe operation that can lead to a data loss. A data loss can occur if you try to fit value from a larger data type to a smaller data type, or if you convert values between different data types. @@ -297,7 +297,7 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut These functions accept a string and interpret the bytes placed at the beginning of the string as a number in host order (little endian). If the string isn't long enough, the functions work as if the string is padded with the necessary number of null bytes. If the string is longer than needed, the extra bytes are ignored. A date is interpreted as the number of days since the beginning of the Unix Epoch, and a date with time is interpreted as the number of seconds since the beginning of the Unix Epoch. -## reinterpretAsString {#type_conversion_functions-reinterpretAsString} +## reinterpretAsString { #type_conversion_functions-reinterpretAsString} This function accepts a number or date or date with time, and returns a string containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long. @@ -305,7 +305,7 @@ This function accepts a number or date or date with time, and returns a string c This function accepts a number or date or date with time, and returns a FixedString containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a FixedString that is one byte long. -## CAST(x, t) {#type_conversion_function-cast} +## CAST(x, t) { #type_conversion_function-cast} Converts 'x' to the 't' data type. The syntax CAST(x AS t) is also supported. @@ -349,7 +349,7 @@ SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null └─────────────────────────────────────────┘ ``` -## toInterval(Year|Quarter|Month|Week|Day|Hour|Minute|Second) {#function-tointerval} +## toInterval(Year|Quarter|Month|Week|Day|Hour|Minute|Second) { #function-tointerval} Converts a Number type argument to an [Interval](../../data_types/special_data_types/interval.md) data type. @@ -392,7 +392,7 @@ SELECT └───────────────────────────┴──────────────────────────────┘ ``` -## parseDateTimeBestEffort {#type_conversion_functions-parsedatetimebesteffort} +## parseDateTimeBestEffort { #type_conversion_functions-parsedatetimebesteffort} Parse a number type argument to a Date or DateTime type. different from toDate and toDateTime, parseDateTimeBestEffort can progress more complex date format. diff --git a/docs/en/query_language/functions/uuid_functions.md b/docs/en/query_language/functions/uuid_functions.md index af8ba7f84f2..4e3752f8cc6 100644 --- a/docs/en/query_language/functions/uuid_functions.md +++ b/docs/en/query_language/functions/uuid_functions.md @@ -2,7 +2,7 @@ The functions for working with UUID are listed below. -## generateUUIDv4 {#uuid_function-generate} +## generateUUIDv4 { #uuid_function-generate} Generates the [UUID](../../data_types/uuid.md) of [version 4](https://tools.ietf.org/html/rfc4122#section-4.4). diff --git a/docs/en/query_language/insert_into.md b/docs/en/query_language/insert_into.md index e2bf226c298..e2a6ff3f51b 100644 --- a/docs/en/query_language/insert_into.md +++ b/docs/en/query_language/insert_into.md @@ -44,7 +44,7 @@ You can insert data separately from the query by using the command-line client o If table has [constraints](create.md#constraints), their expressions will be checked for each row of inserted data. If any of those constraints is not satisfied — server will raise an exception containing constraint name and expression, the query will be stopped. -### Inserting The Results of `SELECT` {#insert_query_insert-select} +### Inserting The Results of `SELECT` { #insert_query_insert-select} ```sql INSERT INTO [db.]table [(c1, c2, c3)] SELECT ... diff --git a/docs/en/query_language/misc.md b/docs/en/query_language/misc.md index cbdbf0318f8..74514c14f12 100644 --- a/docs/en/query_language/misc.md +++ b/docs/en/query_language/misc.md @@ -54,7 +54,7 @@ If the table is corrupted, you can copy the non-corrupted data to another table. 3. Execute the query `INSERT INTO SELECT * FROM `. This request copies the non-corrupted data from the damaged table to another table. Only the data before the corrupted part will be copied. 4. Restart the `clickhouse-client` to reset the `max_threads` value. -## DESCRIBE TABLE {#misc-describe-table} +## DESCRIBE TABLE { #misc-describe-table} ```sql DESC|DESCRIBE TABLE [db.]table [INTO OUTFILE filename] [FORMAT format] @@ -151,7 +151,7 @@ The response contains the `kill_status` column, which can take the following val A test query (`TEST`) only checks the user's rights and displays a list of queries to stop. -## KILL MUTATION {#kill-mutation} +## KILL MUTATION { #kill-mutation} ```sql KILL MUTATION [ON CLUSTER cluster] @@ -178,7 +178,7 @@ The query is useful when a mutation is stuck and cannot finish (e.g. if some fun Changes already made by the mutation are not rolled back. -## OPTIMIZE {#misc_operations-optimize} +## OPTIMIZE { #misc_operations-optimize} ```sql OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE] @@ -198,7 +198,7 @@ When `OPTIMIZE` is used with the [ReplicatedMergeTree](../operations/table_engin !!! warning "Warning" `OPTIMIZE` can't fix the "Too many parts" error. -## RENAME {#misc_operations-rename} +## RENAME { #misc_operations-rename} Renames one or more tables. @@ -208,7 +208,7 @@ RENAME TABLE [db11.]name11 TO [db12.]name12, [db21.]name21 TO [db22.]name22, ... All tables are renamed under global locking. Renaming tables is a light operation. If you indicated another database after TO, the table will be moved to this database. However, the directories with databases must reside in the same file system (otherwise, an error is returned). -## SET {#query-set} +## SET { #query-set} ```sql SET param = value diff --git a/docs/en/query_language/operators.md b/docs/en/query_language/operators.md index 3c280a3a399..a7c4b40c6d9 100644 --- a/docs/en/query_language/operators.md +++ b/docs/en/query_language/operators.md @@ -65,9 +65,9 @@ Groups of operators are listed in order of priority (the higher it is in the lis `a GLOBAL NOT IN ...` – The `globalNotIn(a, b)` function. -## Operators for Working with Dates and Times {#operators-datetime} +## Operators for Working with Dates and Times { #operators-datetime} -### EXTRACT {#operator-extract} +### EXTRACT { #operator-extract} ```sql EXTRACT(part FROM date); @@ -129,7 +129,7 @@ FROM test.Orders; You can see more examples in [tests](https://github.com/ClickHouse/ClickHouse/blob/master/dbms/tests/queries/0_stateless/00619_extract.sql). -### INTERVAL {#operator-interval} +### INTERVAL { #operator-interval} Creates an [Interval](../data_types/special_data_types/interval.md)-type value that should be used in arithmetical operations with [Date](../data_types/date.md) and [DateTime](../data_types/datetime.md)-type values. @@ -182,7 +182,7 @@ Note: The conditional operator calculates the values of b and c, then checks whether condition a is met, and then returns the corresponding value. If `b` or `C` is an [arrayJoin()](functions/array_join.md#functions_arrayjoin) function, each row will be replicated regardless of the "a" condition. -## Conditional Expression {#operator_case} +## Conditional Expression { #operator_case} ```sql CASE [x] @@ -227,7 +227,7 @@ For efficiency, the `and` and `or` functions accept any number of arguments. The ClickHouse supports the `IS NULL` and `IS NOT NULL` operators. -### IS NULL {#operator-is-null} +### IS NULL { #operator-is-null} - For [Nullable](../data_types/nullable.md) type values, the `IS NULL` operator returns: - `1`, if the value is `NULL`. diff --git a/docs/en/query_language/select.md b/docs/en/query_language/select.md index 6a19444f44e..8a6f9e4a4e1 100644 --- a/docs/en/query_language/select.md +++ b/docs/en/query_language/select.md @@ -93,7 +93,7 @@ FROM ``` -### FROM Clause {#select-from} +### FROM Clause { #select-from} If the FROM clause is omitted, data will be read from the `system.one` table. The `system.one` table contains exactly one row (this table fulfills the same purpose as the DUAL table found in other DBMSs). @@ -112,7 +112,7 @@ In contrast to standard SQL, a synonym does not need to be specified after a sub To execute a query, all the columns listed in the query are extracted from the appropriate table. Any columns not needed for the external query are thrown out of the subqueries. If a query does not list any columns (for example, `SELECT count() FROM t`), some column is extracted from the table anyway (the smallest one is preferred), in order to calculate the number of rows. -#### FINAL Modifier {#select-from-final} +#### FINAL Modifier { #select-from-final} Applicable when selecting data from tables from the [MergeTree](../operations/table_engines/mergetree.md)-engine family other than `GraphiteMergeTree`. When `FINAL` is specified, ClickHouse fully merges the data before returning the result and thus performs all data transformations that happen during merges for the given table engine. @@ -127,7 +127,7 @@ Queries that use `FINAL` are executed not as fast as similar queries that don't, In most cases, avoid using `FINAL`. -### SAMPLE Clause {#select-sample-clause} +### SAMPLE Clause { #select-sample-clause} The `SAMPLE` clause allows for approximated query processing. @@ -157,7 +157,7 @@ For the `SAMPLE` clause the following syntax is supported: | `SAMPLE k OFFSET m` | Here `k` and `m` are the numbers from 0 to 1.
The query is executed on a sample of `k` fraction of the data. The data used for the sample is offset by `m` fraction. [Read more](#select-sample-offset) | -#### SAMPLE k {#select-sample-k} +#### SAMPLE k { #select-sample-k} Here `k` is the number from 0 to 1 (both fractional and decimal notations are supported). For example, `SAMPLE 1/2` or `SAMPLE 0.5`. @@ -177,7 +177,7 @@ ORDER BY PageViews DESC LIMIT 1000 In this example, the query is executed on a sample from 0.1 (10%) of data. Values of aggregate functions are not corrected automatically, so to get an approximate result, the value `count()` is manually multiplied by 10. -#### SAMPLE n {#select-sample-n} +#### SAMPLE n { #select-sample-n} Here `n` is a sufficiently large integer. For example, `SAMPLE 10000000`. @@ -213,7 +213,7 @@ FROM visits SAMPLE 10000000 ``` -#### SAMPLE k OFFSET m {#select-sample-offset} +#### SAMPLE k OFFSET m { #select-sample-offset} Here `k` and `m` are numbers from 0 to 1. Examples are shown below. @@ -237,7 +237,7 @@ Here, a sample of 10% is taken from the second half of the data. `[----------++--------]` -### ARRAY JOIN Clause {#select-array-join-clause} +### ARRAY JOIN Clause { #select-array-join-clause} Allows executing `JOIN` with an array or nested data structure. The intent is similar to the [arrayJoin](functions/array_join.md#functions_arrayjoin) function, but its functionality is broader. @@ -504,7 +504,7 @@ ARRAY JOIN nest AS n, arrayEnumerate(`nest.x`) AS num; └───────┴─────┴─────┴─────────┴────────────┴─────┘ ``` -### JOIN Clause {#select-join} +### JOIN Clause { #select-join} Joins the data in the normal [SQL JOIN](https://en.wikipedia.org/wiki/Join_(SQL)) sense. @@ -520,7 +520,7 @@ FROM The table names can be specified instead of `` and ``. This is equivalent to the `SELECT * FROM table` subquery, except in a special case when the table has the [Join](../operations/table_engines/join.md) engine – an array prepared for joining. -#### Supported Types of `JOIN` {#select-join-types} +#### Supported Types of `JOIN` { #select-join-types} - `INNER JOIN` (or `JOIN`) - `LEFT JOIN` (or `LEFT OUTER JOIN`) @@ -552,7 +552,7 @@ Don't mix these syntaxes. ClickHouse doesn't directly support syntax with commas, so we don't recommend using them. The algorithm tries to rewrite the query in terms of `CROSS JOIN` and `INNER JOIN` clauses and then proceeds to query processing. When rewriting the query, ClickHouse tries to optimize performance and memory consumption. By default, ClickHouse treats commas as an `INNER JOIN` clause and converts `INNER JOIN` to `CROSS JOIN` when the algorithm cannot guarantee that `INNER JOIN` returns the required data. -#### Strictness {#select-join-strictness} +#### Strictness { #select-join-strictness} - `ALL` — If the right table has several matching rows, ClickHouse creates a [Cartesian product](https://en.wikipedia.org/wiki/Cartesian_product) from matching rows. This is the standard `JOIN` behavior in SQL. - `ANY` — If the right table has several matching rows, only the first one found is joined. If the right table has only one matching row, the results of queries with `ANY` and `ALL` keywords are the same. @@ -704,7 +704,7 @@ For `ON`, `WHERE`, and `GROUP BY` clauses: - Arbitrary expressions cannot be used in `ON`, `WHERE`, and `GROUP BY` clauses, but you can define an expression in a `SELECT` clause and then use it in these clauses via an alias. -### WHERE Clause {#select-where} +### WHERE Clause { #select-where} If there is a WHERE clause, it must contain an expression with the UInt8 type. This is usually an expression with comparison and logical operators. This expression will be used for filtering data before all other transformations. @@ -727,7 +727,7 @@ A query may simultaneously specify PREWHERE and WHERE. In this case, PREWHERE pr If the 'optimize_move_to_prewhere' setting is set to 1 and PREWHERE is omitted, the system uses heuristics to automatically move parts of expressions from WHERE to PREWHERE. -### GROUP BY Clause {#select-group-by-clause} +### GROUP BY Clause { #select-group-by-clause} This is one of the most important parts of a column-oriented DBMS. @@ -824,7 +824,7 @@ If `max_rows_to_group_by` and `group_by_overflow_mode = 'any'` are not used, all You can use WITH TOTALS in subqueries, including subqueries in the JOIN clause (in this case, the respective total values are combined). -#### GROUP BY in External Memory {#select-group-by-in-external-memory} +#### GROUP BY in External Memory { #select-group-by-in-external-memory} You can enable dumping temporary data to the disk to restrict memory usage during `GROUP BY`. The [max_bytes_before_external_group_by](../operations/settings/settings.md#settings-max_bytes_before_external_group_by) setting determines the threshold RAM consumption for dumping `GROUP BY` temporary data to the file system. If set to 0 (the default), it is disabled. @@ -911,7 +911,7 @@ WHERE and HAVING differ in that WHERE is performed before aggregation (GROUP BY) If aggregation is not performed, HAVING can't be used. -### ORDER BY Clause {#select-order-by} +### ORDER BY Clause { #select-order-by} The ORDER BY clause contains a list of expressions, which can each be assigned DESC or ASC (the sorting direction). If the direction is not specified, ASC is assumed. ASC is sorted in ascending order, and DESC in descending order. The sorting direction applies to a single expression, not to the entire list. Example: `ORDER BY Visits DESC, SearchPhrase` @@ -974,7 +974,7 @@ Running a query may use more memory than 'max_bytes_before_external_sort'. For t External sorting works much less effectively than sorting in RAM. -### SELECT Clause {#select-select} +### SELECT Clause { #select-select} [Expressions](syntax.md#syntax-expressions) specified in the `SELECT` clause are calculated after all the operations in the clauses described above are finished. These expressions work as if they apply to separate rows in the result. If expressions in the `SELECT` clause contain aggregate functions, then ClickHouse processes aggregate functions and expressions used as their arguments during the [GROUP BY](#select-group-by-clause) aggregation. @@ -1035,7 +1035,7 @@ In this example, `COLUMNS('a')` returns two columns: `aa` and `ab`. `COLUMNS('c' Columns that matched the `COLUMNS` expression can have different data types. If `COLUMNS` doesn't match any columns and is the only expression in `SELECT`, ClickHouse throws an exception. -### DISTINCT Clause {#select-distinct} +### DISTINCT Clause { #select-distinct} If DISTINCT is specified, only a single row will remain out of all the sets of fully matching rows in the result. The result will be the same as if GROUP BY were specified across all the fields specified in SELECT without aggregate functions. But there are several differences from GROUP BY: @@ -1120,7 +1120,7 @@ The structure of results (the number and type of columns) must match for the que Queries that are parts of UNION ALL can't be enclosed in brackets. ORDER BY and LIMIT are applied to separate queries, not to the final result. If you need to apply a conversion to the final result, you can put all the queries with UNION ALL in a subquery in the FROM clause. -### INTO OUTFILE Clause {#into-outfile-clause} +### INTO OUTFILE Clause { #into-outfile-clause} Add the `INTO OUTFILE filename` clause (where filename is a string literal) to redirect query output to the specified file. In contrast to MySQL, the file is created on the client side. The query will fail if a file with the same filename already exists. @@ -1128,7 +1128,7 @@ This functionality is available in the command-line client and clickhouse-local The default output format is TabSeparated (the same as in the command-line client batch mode). -### FORMAT Clause {#format-clause} +### FORMAT Clause { #format-clause} Specify 'FORMAT format' to get data in any specified format. You can use this for convenience, or for creating dumps. @@ -1138,7 +1138,7 @@ If the FORMAT clause is omitted, the default format is used, which depends on bo When using the command-line client, data is passed to the client in an internal efficient format. The client independently interprets the FORMAT clause of the query and formats the data itself (thus relieving the network and the server from the load). -### IN Operators {#select-in-operators} +### IN Operators { #select-in-operators} The `IN`, `NOT IN`, `GLOBAL IN`, and `GLOBAL NOT IN` operators are covered separately, since their functionality is quite rich. @@ -1237,7 +1237,7 @@ FROM t_null ``` -#### Distributed Subqueries {#select-distributed-subqueries} +#### Distributed Subqueries { #select-distributed-subqueries} There are two options for IN-s with subqueries (similar to JOINs): normal `IN` / `JOIN` and `GLOBAL IN` / `GLOBAL JOIN`. They differ in how they are run for distributed query processing. diff --git a/docs/en/query_language/show.md b/docs/en/query_language/show.md index f6a9cc6865b..e51a1ef4d18 100644 --- a/docs/en/query_language/show.md +++ b/docs/en/query_language/show.md @@ -8,7 +8,7 @@ SHOW CREATE [TEMPORARY] [TABLE|DICTIONARY] [db.]table [INTO OUTFILE filename] [F Returns a single `String`-type 'statement' column, which contains a single value – the `CREATE` query used for creating the specified object. -## SHOW DATABASES {#show-databases} +## SHOW DATABASES { #show-databases} ```sql SHOW DATABASES [INTO OUTFILE filename] [FORMAT format] diff --git a/docs/en/query_language/syntax.md b/docs/en/query_language/syntax.md index f73d319e7b8..ae3ebc54c9d 100644 --- a/docs/en/query_language/syntax.md +++ b/docs/en/query_language/syntax.md @@ -27,7 +27,7 @@ SQL-style and C-style comments are supported. SQL-style comments: from `--` to the end of the line. The space after `--` can be omitted. Comments in C-style: from `/*` to `*/`. These comments can be multiline. Spaces are not required here, either. -## Keywords {#syntax-keywords} +## Keywords { #syntax-keywords} Keywords are case-insensitive when they correspond to: @@ -40,7 +40,7 @@ In contrast to standard SQL all other keywords (including functions names) are * Keywords are not reserved (they are just parsed as keywords in the corresponding context). If you use [identifiers](#syntax-identifiers) the same as the keywords, enclose them into quotes. For example, the query `SELECT "FROM" FROM table_name` is valid if the table `table_name` has column with the name `"FROM"`. -## Identifiers {#syntax-identifiers} +## Identifiers { #syntax-identifiers} Identifiers are: @@ -75,7 +75,7 @@ For example, 1 is parsed as `UInt8`, but 256 is parsed as `UInt16`. For more inf Examples: `1`, `18446744073709551615`, `0xDEADBEEF`, `01`, `0.1`, `1e100`, `-1e-100`, `inf`, `nan`. -### String {#syntax-string-literal} +### String { #syntax-string-literal} Only string literals in single quotes are supported. The enclosed characters can be backslash-escaped. The following escape sequences have a corresponding special value: `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\a`, `\v`, `\xHH`. In all other cases, escape sequences in the format `\c`, where `c` is any character, are converted to `c`. This means that you can use the sequences `\'`and`\\`. The value will have the [String](../data_types/string.md) type. @@ -88,7 +88,7 @@ Actually, these are not literals, but expressions with the array creation operat An array must consist of at least one item, and a tuple must have at least two items. Tuples have a special purpose for use in the `IN` clause of a `SELECT` query. Tuples can be obtained as the result of a query, but they can't be saved to a database (with the exception of [Memory](../operations/table_engines/memory.md) tables). -### NULL {#null-literal} +### NULL { #null-literal} Indicates that the value is missing. @@ -115,7 +115,7 @@ For example, the expression `1 + 2 * 3 + 4` is transformed to `plus(plus(1, mult Data types and table engines in the `CREATE` query are written the same way as identifiers or functions. In other words, they may or may not contain an arguments list in brackets. For more information, see the sections "Data types," "Table engines," and "CREATE". -## Expression Aliases {#syntax-expression_aliases} +## Expression Aliases { #syntax-expression_aliases} An alias is a user-defined name for an expression in a query. @@ -173,7 +173,7 @@ In this example, we declared table `t` with column `b`. Then, when selecting dat In a `SELECT` query, an asterisk can replace the expression. For more information, see the section "SELECT". -## Expressions {#syntax-expressions} +## Expressions { #syntax-expressions} An expression is a function, identifier, literal, application of an operator, expression in brackets, subquery, or asterisk. It can also contain an alias. A list of expressions is one or more expressions separated by commas. diff --git a/docs/en/query_language/system.md b/docs/en/query_language/system.md index 6bff8381f0e..7408f4d883b 100644 --- a/docs/en/query_language/system.md +++ b/docs/en/query_language/system.md @@ -1,4 +1,4 @@ -# SYSTEM Queries {#query_language-system} +# SYSTEM Queries { #query_language-system} - [RELOAD DICTIONARIES](#query_language-system-reload-dictionaries) - [RELOAD DICTIONARY](#query_language-system-reload-dictionary) @@ -14,13 +14,13 @@ - [STOP MERGES](#query_language-system-stop-merges) - [START MERGES](#query_language-system-start-merges) -## RELOAD DICTIONARIES {#query_language-system-reload-dictionaries} +## RELOAD DICTIONARIES { #query_language-system-reload-dictionaries} Reloads all dictionaries that have been successfully loaded before. By default, dictionaries are loaded lazily (see [dictionaries_lazy_load](../operations/server_settings/settings.md#server_settings-dictionaries_lazy_load)), so instead of being loaded automatically at startup, they are initialized on first access through dictGet function or SELECT from tables with ENGINE = Dictionary. The `SYSTEM RELOAD DICTIONARIES` query reloads such dictionaries (LOADED). Always returns `Ok.` regardless of the result of the dictionary update. -## RELOAD DICTIONARY dictionary_name {#query_language-system-reload-dictionary} +## RELOAD DICTIONARY dictionary_name { #query_language-system-reload-dictionary} Completely reloads a dictionary `dictionary_name`, regardless of the state of the dictionary (LOADED / NOT_LOADED / FAILED). Always returns `Ok.` regardless of the result of updating the dictionary. @@ -30,38 +30,38 @@ The status of the dictionary can be checked by querying the `system.dictionaries SELECT name, status FROM system.dictionaries; ``` -## DROP DNS CACHE {#query_language-system-drop-dns-cache} +## DROP DNS CACHE { #query_language-system-drop-dns-cache} Resets ClickHouse's internal DNS cache. Sometimes (for old ClickHouse versions) it is necessary to use this command when changing the infrastructure (changing the IP address of another ClickHouse server or the server used by dictionaries). For more convenient (automatic) cache management, see disable_internal_dns_cache, dns_cache_update_period parameters. -## DROP MARK CACHE {#query_language-system-drop-mark-cache} +## DROP MARK CACHE { #query_language-system-drop-mark-cache} Resets the mark cache. Used in development of ClickHouse and performance tests. -## FLUSH LOGS {#query_language-system-flush_logs} +## FLUSH LOGS { #query_language-system-flush_logs} Flushes buffers of log messages to system tables (e.g. system.query_log). Allows you to not wait 7.5 seconds when debugging. -## RELOAD CONFIG {#query_language-system-reload-config} +## RELOAD CONFIG { #query_language-system-reload-config} Reloads ClickHouse configuration. Used when configuration is stored in ZooKeeeper. -## SHUTDOWN {#query_language-system-shutdown} +## SHUTDOWN { #query_language-system-shutdown} Normally shuts down ClickHouse (like `service clickhouse-server stop` / `kill {$pid_clickhouse-server}`) -## KILL {#query_language-system-kill} +## KILL { #query_language-system-kill} Aborts ClickHouse process (like `kill -9 {$ pid_clickhouse-server}`) -## Managing Distributed Tables {#query_language-system-distributed} +## Managing Distributed Tables { #query_language-system-distributed} ClickHouse can manage [distributed](../operations/table_engines/distributed.md) tables. When a user inserts data into these tables, ClickHouse first creates a queue of the data that should be sent to cluster nodes, then asynchronously sends it. You can manage queue processing with the [STOP DISTRIBUTED SENDS](#query_language-system-stop-distributed-sends), [FLUSH DISTRIBUTED](#query_language-system-flush-distributed), and [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends) queries. You can also synchronously insert distributed data with the `insert_distributed_sync` setting. -### STOP DISTRIBUTED SENDS {#query_language-system-stop-distributed-sends} +### STOP DISTRIBUTED SENDS { #query_language-system-stop-distributed-sends} Disables background data distribution when inserting data into distributed tables. @@ -70,7 +70,7 @@ SYSTEM STOP DISTRIBUTED SENDS [db.] ``` -### FLUSH DISTRIBUTED {#query_language-system-flush-distributed} +### FLUSH DISTRIBUTED { #query_language-system-flush-distributed} Forces ClickHouse to send data to cluster nodes synchronously. If any nodes are unavailable, ClickHouse throws an exception and stops query execution. You can retry the query until it succeeds, which will happen when all nodes are back online. @@ -79,7 +79,7 @@ SYSTEM FLUSH DISTRIBUTED [db.] ``` -### START DISTRIBUTED SENDS {#query_language-system-start-distributed-sends} +### START DISTRIBUTED SENDS { #query_language-system-start-distributed-sends} Enables background data distribution when inserting data into distributed tables. @@ -88,7 +88,7 @@ SYSTEM START DISTRIBUTED SENDS [db.] ``` -### STOP MERGES {#query_language-system-stop-merges} +### STOP MERGES { #query_language-system-stop-merges} Provides possibility to stop background merges for tables in the MergeTree family: @@ -99,7 +99,7 @@ SYSTEM STOP MERGES [[db.]merge_tree_family_table_name] `DETACH / ATTACH` table will start background merges for the table even in case when merges have been stopped for all MergeTree tables before. -### START MERGES {#query_language-system-start-merges} +### START MERGES { #query_language-system-start-merges} Provides possibility to start background merges for tables in the MergeTree family: diff --git a/docs/en/query_language/table_functions/jdbc.md b/docs/en/query_language/table_functions/jdbc.md index 97f3b5af666..6f93cb2819f 100644 --- a/docs/en/query_language/table_functions/jdbc.md +++ b/docs/en/query_language/table_functions/jdbc.md @@ -1,4 +1,4 @@ -# jdbc {#table_function-jdbc} +# jdbc { #table_function-jdbc} `jdbc(jdbc_connection_uri, schema, table)` - returns table that is connected via JDBC driver. diff --git a/docs/en/query_language/table_functions/odbc.md b/docs/en/query_language/table_functions/odbc.md index d9115557f1e..17afd91a22c 100644 --- a/docs/en/query_language/table_functions/odbc.md +++ b/docs/en/query_language/table_functions/odbc.md @@ -1,4 +1,4 @@ -# odbc {#table_functions-odbc} +# odbc { #table_functions-odbc} Returns table that is connected via [ODBC](https://en.wikipedia.org/wiki/Open_Database_Connectivity). diff --git a/docs/en/roadmap.md b/docs/en/roadmap.md index 8fc91286dfe..613968b9c93 100644 --- a/docs/en/roadmap.md +++ b/docs/en/roadmap.md @@ -3,6 +3,9 @@ ## Q1 2020 - Role-based access control + +## Q2 2020 + - Integration with external authentication services - Resource pools for more precise distribution of cluster capacity between users diff --git a/docs/en/security_changelog.md b/docs/en/security_changelog.md index dbd5690499a..7e547c577f8 100644 --- a/docs/en/security_changelog.md +++ b/docs/en/security_changelog.md @@ -2,7 +2,7 @@ ### CVE-2019-15024 -Аn attacker having write access to ZooKeeper and who is able to run a custom server available from the network where ClickHouse runs, can create a custom-built malicious server that will act as a ClickHouse replica and register it in ZooKeeper. When another replica will fetch data part from the malicious replica, it can force clickhouse-server to write to arbitrary path on filesystem. +Аn attacker that has write access to ZooKeeper and who ican run a custom server available from the network where ClickHouse runs, can create a custom-built malicious server that will act as a ClickHouse replica and register it in ZooKeeper. When another replica will fetch data part from the malicious replica, it can force clickhouse-server to write to arbitrary path on filesystem. Credits: Eldar Zaitov of Yandex Information Security Team @@ -14,7 +14,7 @@ Credits: Eldar Zaitov of Yandex Information Security Team ### CVE-2019-16536 -Stack overflow leading to DoS can be triggered by malicious authenticated client. +Stack overflow leading to DoS can be triggered by a malicious authenticated client. Credits: Eldar Zaitov of Yandex Information Security Team @@ -62,7 +62,7 @@ Credits: Andrey Krasichkov and Evgeny Sidorov of Yandex Information Security Tea ### CVE-2018-14670 -Incorrect configuration in deb package could lead to unauthorized use of the database. +Incorrect configuration in deb package could lead to the unauthorized use of the database. Credits: the UK's National Cyber Security Centre (NCSC) diff --git a/docs/ru/operations/tips.md b/docs/ru/operations/tips.md index e984e8bb0b7..3d6b4099993 100644 --- a/docs/ru/operations/tips.md +++ b/docs/ru/operations/tips.md @@ -124,9 +124,9 @@ maxClientCnxns=2000 maxSessionTimeout=60000000 # the directory where the snapshot is stored. -dataDir=/opt/zookeeper/{{ cluster['name'] }}/data +dataDir=/opt/zookeeper/{{ '{{' }} cluster['name'] {{ '{{' }} '}}' }}/data # Place the dataLogDir to a separate physical disc for better performance -dataLogDir=/opt/zookeeper/{{ cluster['name'] }}/logs +dataLogDir=/opt/zookeeper/{{ '{{' }} cluster['name'] {{ '{{' }} '}}' }}/logs autopurge.snapRetainCount=10 autopurge.purgeInterval=1 @@ -159,7 +159,7 @@ snapCount=3000000 leaderServes=yes standaloneEnabled=false -dynamicConfigFile=/etc/zookeeper-{{ cluster['name'] }}/conf/zoo.cfg.dynamic +dynamicConfigFile=/etc/zookeeper-{{ '{{' }} cluster['name'] {{ '{{' }} '}}' }}/conf/zoo.cfg.dynamic ``` Версия Java: @@ -172,7 +172,7 @@ Java HotSpot(TM) 64-Bit Server VM (build 25.25-b02, mixed mode) Параметры JVM: ```bash -NAME=zookeeper-{{ cluster['name'] }} +NAME=zookeeper-{{ '{{' }} cluster['name'] {{ '{{' }} '}}' }} ZOOCFGDIR=/etc/$NAME/conf # TODO this is really ugly @@ -191,8 +191,8 @@ JAVA=/usr/bin/java ZOOMAIN="org.apache.zookeeper.server.quorum.QuorumPeerMain" ZOO_LOG4J_PROP="INFO,ROLLINGFILE" JMXLOCALONLY=false -JAVA_OPTS="-Xms{{ cluster.get('xms','128M') }} \ - -Xmx{{ cluster.get('xmx','1G') }} \ +JAVA_OPTS="-Xms{{ '{{' }} cluster.get('xms','128M') {{ '{{' }} '}}' }} \ + -Xmx{{ '{{' }} cluster.get('xmx','1G') {{ '{{' }} '}}' }} \ -Xloggc:/var/log/$NAME/zookeeper-gc.log \ -XX:+UseGCLogFileRotation \ -XX:NumberOfGCLogFiles=16 \ @@ -213,7 +213,7 @@ JAVA_OPTS="-Xms{{ cluster.get('xms','128M') }} \ Salt init: ```text -description "zookeeper-{{ cluster['name'] }} centralized coordination service" +description "zookeeper-{{ '{{' }} cluster['name'] {{ '{{' }} '}}' }} centralized coordination service" start on runlevel [2345] stop on runlevel [!2345] @@ -223,19 +223,19 @@ respawn limit nofile 8192 8192 pre-start script - [ -r "/etc/zookeeper-{{ cluster['name'] }}/conf/environment" ] || exit 0 - . /etc/zookeeper-{{ cluster['name'] }}/conf/environment + [ -r "/etc/zookeeper-{{ '{{' }} cluster['name'] {{ '{{' }} '}}' }}/conf/environment" ] || exit 0 + . /etc/zookeeper-{{ '{{' }} cluster['name'] {{ '{{' }} '}}' }}/conf/environment [ -d $ZOO_LOG_DIR ] || mkdir -p $ZOO_LOG_DIR chown $USER:$GROUP $ZOO_LOG_DIR end script script - . /etc/zookeeper-{{ cluster['name'] }}/conf/environment + . /etc/zookeeper-{{ '{{' }} cluster['name'] {{ '{{' }} '}}' }}/conf/environment [ -r /etc/default/zookeeper ] && . /etc/default/zookeeper if [ -z "$JMXDISABLE" ]; then JAVA_OPTS="$JAVA_OPTS -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.local.only=$JMXLOCALONLY" fi - exec start-stop-daemon --start -c $USER --exec $JAVA --name zookeeper-{{ cluster['name'] }} \ + exec start-stop-daemon --start -c $USER --exec $JAVA --name zookeeper-{{ '{{' }} cluster['name'] {{ '{{' }} '}}' }} \ -- -cp $CLASSPATH $JAVA_OPTS -Dzookeeper.log.dir=${ZOO_LOG_DIR} \ -Dzookeeper.root.logger=${ZOO_LOG4J_PROP} $ZOOMAIN $ZOOCFG end script diff --git a/docs/ru/query_language/functions/string_functions.md b/docs/ru/query_language/functions/string_functions.md index 1aaabcdf6b1..106672c9fdc 100644 --- a/docs/ru/query_language/functions/string_functions.md +++ b/docs/ru/query_language/functions/string_functions.md @@ -112,7 +112,7 @@ SELECT repeat('abc', 10) ## format(pattern, s0, s1, ...) -Форматирует константный шаблон со строками, перечисленными в аргументах. `pattern` -- упрощенная версия шаблона в языке Python. Шаблон содержит "заменяющие поля", которые окружены фигурными скобками `{}`. Всё, что не содержится в скобках, интерпретируется как обычный текст и просто копируется. Если нужно использовать символ фигурной скобки, можно экранировать двойной скобкой `{{` или `}}`. Имя полей могут быть числами (нумерация с нуля) или пустыми (тогда они интерпретируются как последовательные числа). +Форматирует константный шаблон со строками, перечисленными в аргументах. `pattern` -- упрощенная версия шаблона в языке Python. Шаблон содержит "заменяющие поля", которые окружены фигурными скобками `{}`. Всё, что не содержится в скобках, интерпретируется как обычный текст и просто копируется. Если нужно использовать символ фигурной скобки, можно экранировать двойной скобкой `{{ '{{' }}` или `{{ '}}' }}`. Имя полей могут быть числами (нумерация с нуля) или пустыми (тогда они интерпретируются как последовательные числа). ```sql SELECT format('{1} {0} {1}', 'World', 'Hello') diff --git a/docs/tools/build.py b/docs/tools/build.py index 64c1b0e99c9..e395b56afb1 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- from __future__ import unicode_literals @@ -86,6 +86,10 @@ def build_for_lang(lang, args): else: site_dir = os.path.join(args.docs_output_dir, lang) + plugins = ['macros', 'search'] + if args.htmlproofer: + plugins.append('htmlproofer') + cfg = config.load_config( config_file=config_path, site_name=site_names.get(lang, site_names['en']) % args.version_prefix, @@ -101,7 +105,7 @@ def build_for_lang(lang, args): edit_uri='edit/master/docs/%s' % lang, extra_css=['assets/stylesheets/custom.css?%s' % args.rev_short], markdown_extensions=[ - 'clickhouse', + 'mdx_clickhouse', 'admonition', 'attr_list', 'codehilite', @@ -113,7 +117,7 @@ def build_for_lang(lang, args): } } ], - plugins=[], + plugins=plugins, extra={ 'stable_releases': args.stable_releases, 'version_prefix': args.version_prefix, @@ -302,6 +306,7 @@ if __name__ == '__main__': arg_parser.add_argument('--skip-pdf', action='store_true') arg_parser.add_argument('--skip-website', action='store_true') arg_parser.add_argument('--minify', action='store_true') + arg_parser.add_argument('--htmlproofer', action='store_true') arg_parser.add_argument('--save-raw-single-page', type=str) arg_parser.add_argument('--livereload', type=int, default='0') arg_parser.add_argument('--verbose', action='store_true') @@ -311,8 +316,8 @@ if __name__ == '__main__': from github import choose_latest_releases, get_events args.stable_releases = choose_latest_releases() if args.enable_stable_releases else [] - args.rev = subprocess.check_output('git rev-parse HEAD', shell=True).strip() - args.rev_short = subprocess.check_output('git rev-parse --short HEAD', shell=True).strip() + args.rev = subprocess.check_output('git rev-parse HEAD', shell=True).decode('utf-8').strip() + args.rev_short = subprocess.check_output('git rev-parse --short HEAD', shell=True).decode('utf-8').strip() args.rev_url = 'https://github.com/ClickHouse/ClickHouse/commit/%s' % args.rev args.events = get_events(args) diff --git a/docs/tools/mdx_clickhouse.py b/docs/tools/mdx_clickhouse.py index a2d71b0ea58..bfe5ebbd1ae 100755 --- a/docs/tools/mdx_clickhouse.py +++ b/docs/tools/mdx_clickhouse.py @@ -7,15 +7,17 @@ import os import markdown.inlinepatterns import markdown.extensions import markdown.util +import macros.plugin import slugify as slugify_impl + class ClickHouseLinkMixin(object): - def handleMatch(self, m): + def handleMatch(self, m, data): single_page = (os.environ.get('SINGLE_PAGE') == '1') try: - el = super(ClickHouseLinkMixin, self).handleMatch(m) + el, start, end = super(ClickHouseLinkMixin, self).handleMatch(m, data) except IndexError: return @@ -23,21 +25,21 @@ class ClickHouseLinkMixin(object): href = el.get('href') or '' is_external = href.startswith('http:') or href.startswith('https:') if is_external: - if not href.startswith('https://clickhouse.yandex'): + if not href.startswith('https://clickhouse.tech'): el.set('rel', 'external nofollow') elif single_page: if '#' in href: el.set('href', '#' + href.split('#', 1)[1]) else: el.set('href', '#' + href.replace('/index.md', '/').replace('.md', '/')) - return el + return el, start, end -class ClickHouseAutolinkPattern(ClickHouseLinkMixin, markdown.inlinepatterns.AutolinkPattern): +class ClickHouseAutolinkPattern(ClickHouseLinkMixin, markdown.inlinepatterns.AutolinkInlineProcessor): pass -class ClickHouseLinkPattern(ClickHouseLinkMixin, markdown.inlinepatterns.LinkPattern): +class ClickHouseLinkPattern(ClickHouseLinkMixin, markdown.inlinepatterns.LinkInlineProcessor): pass @@ -59,8 +61,20 @@ class ClickHouseMarkdown(markdown.extensions.Extension): md.inlinePatterns['link'] = ClickHouseLinkPattern(markdown.inlinepatterns.LINK_RE, md) md.inlinePatterns['autolink'] = ClickHouseAutolinkPattern(markdown.inlinepatterns.AUTOLINK_RE, md) + def makeExtension(**kwargs): return ClickHouseMarkdown(**kwargs) + def slugify(value, separator): return slugify_impl.slugify(value, separator=separator, word_boundary=True, save_order=True) + + +class PatchedMacrosPlugin(macros.plugin.MacrosPlugin): + def on_config(self, config): + super(PatchedMacrosPlugin, self).on_config(config) + self.env.comment_start_string = '{##' + self.env.comment_end_string = '##}' + + +macros.plugin.MacrosPlugin = PatchedMacrosPlugin diff --git a/docs/tools/release.sh b/docs/tools/release.sh index 37d39c01448..649a5c7881b 100755 --- a/docs/tools/release.sh +++ b/docs/tools/release.sh @@ -44,7 +44,7 @@ then if [[ ! -z "${CLOUDFLARE_TOKEN}" ]] then sleep 1m - git diff --stat="9999,9999" --diff-filter=M HEAD~1 | grep '|' | awk '$1 ~ /\.html$/ { if ($3>6) { url="https://'${BASE_DOMAIN}'/"$1; sub(/\/index.html/, "/", url); print "\""url"\""; }}' | split -l 25 /dev/stdin PURGE + git diff --stat="9999,9999" --diff-filter=M HEAD~1 | grep '|' | awk '$1 ~ /\.html$/ { if ($3>4) { url="https://'${BASE_DOMAIN}'/"$1; sub(/\/index.html/, "/", url); print "\""url"\""; }}' | split -l 25 /dev/stdin PURGE for FILENAME in $(ls PURGE*) do POST_DATA=$(cat "${FILENAME}" | sed -n -e 'H;${x;s/\n/,/g;s/^,//;p;}' | awk '{print "{\"files\":["$0"]}";}') diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index ed4a64a3c2a..6b0f39558a6 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -1,35 +1,36 @@ -alabaster==0.7.12 -Babel==2.8.0 backports-abc==0.5 +backports.functools-lru-cache==1.6.1 beautifulsoup4==4.8.2 certifi==2019.11.28 chardet==3.0.4 click==7.1.1 -CommonMark==0.9.1 cssmin==0.2.0 -docutils==0.16 -futures==3.1.1 +future==0.18.2 htmlmin==0.1.12 idna==2.9 -imagesize==1.2.0 Jinja2==2.11.1 jinja2-highlight==0.6.1 jsmin==2.2.2 livereload==2.6.1 -Markdown==2.6.11 +lunr==0.5.6 +Markdown==3.2.1 MarkupSafe==1.1.1 -mkdocs==1.0.4 +mkdocs==1.1 +mkdocs-htmlproofer-plugin==0.0.3 +mkdocs-macros-plugin==0.4.4 +nltk==3.4.5 +nose==1.3.7 +numpy==1.15.4 +protobuf==3.6.1 +Pygments==2.5.2 python-slugify==1.2.6 -pytz==2019.3 PyYAML==5.3 -recommonmark==0.4.0 +repackage==0.7.3 requests==2.23.0 singledispatch==3.4.0.3 six==1.14.0 -snowballstemmer==1.2.1 -Sphinx==1.6.5 -sphinxcontrib-websupport==1.0.1 -tornado==5.1 -typing==3.7.4.1 +soupsieve==1.9.5 +termcolor==1.1.0 +tornado==5.1.1 Unidecode==1.1.1 urllib3==1.25.8 diff --git a/docs/tools/website.py b/docs/tools/website.py index d7ba55745c8..82bd0d2510f 100644 --- a/docs/tools/website.py +++ b/docs/tools/website.py @@ -49,7 +49,7 @@ def build_website(args): for root, _, filenames in os.walk(args.output_dir): for filename in filenames: path = os.path.join(root, filename) - if not (filename.endswith('.html') or filename.endswith('.css')): + if not (filename.endswith('.html') or filename.endswith('.css') or filename.endswith('.js')): continue logging.info('Processing %s', path) with open(path, 'rb') as f: diff --git a/docs/zh/operations/tips.md b/docs/zh/operations/tips.md index 4780d3a0b8c..be39adf8d77 100644 --- a/docs/zh/operations/tips.md +++ b/docs/zh/operations/tips.md @@ -138,9 +138,9 @@ maxClientCnxns=2000 maxSessionTimeout=60000000 # the directory where the snapshot is stored. -dataDir=/opt/zookeeper/{{ cluster['name'] }}/data +dataDir=/opt/zookeeper/{{ '{{' }} cluster['name'] {{ '}}' }}/data # Place the dataLogDir to a separate physical disc for better performance -dataLogDir=/opt/zookeeper/{{ cluster['name'] }}/logs +dataLogDir=/opt/zookeeper/{{ '{{' }} cluster['name'] {{ '}}' }}/logs autopurge.snapRetainCount=10 autopurge.purgeInterval=1 @@ -173,7 +173,7 @@ snapCount=3000000 leaderServes=yes standaloneEnabled=false -dynamicConfigFile=/etc/zookeeper-{{ cluster['name'] }}/conf/zoo.cfg.dynamic +dynamicConfigFile=/etc/zookeeper-{{ '{{' }} cluster['name'] {{ '}}' }}/conf/zoo.cfg.dynamic ``` Java version: @@ -186,7 +186,7 @@ Java HotSpot(TM) 64-Bit Server VM (build 25.25-b02, mixed mode) JVM parameters: ```bash -NAME=zookeeper-{{ cluster['name'] }} +NAME=zookeeper-{{ '{{' }} cluster['name'] {{ '}}' }} ZOOCFGDIR=/etc/$NAME/conf # TODO this is really ugly @@ -205,8 +205,8 @@ JAVA=/usr/bin/java ZOOMAIN="org.apache.zookeeper.server.quorum.QuorumPeerMain" ZOO_LOG4J_PROP="INFO,ROLLINGFILE" JMXLOCALONLY=false -JAVA_OPTS="-Xms{{ cluster.get('xms','128M') }} \ - -Xmx{{ cluster.get('xmx','1G') }} \ +JAVA_OPTS="-Xms{{ '{{' }} cluster.get('xms','128M') {{ '}}' }} \ + -Xmx{{ '{{' }} cluster.get('xmx','1G') {{ '}}' }} \ -Xloggc:/var/log/$NAME/zookeeper-gc.log \ -XX:+UseGCLogFileRotation \ -XX:NumberOfGCLogFiles=16 \ @@ -227,7 +227,7 @@ JAVA_OPTS="-Xms{{ cluster.get('xms','128M') }} \ Salt init: ``` -description "zookeeper-{{ cluster['name'] }} centralized coordination service" +description "zookeeper-{{ '{{' }} cluster['name'] {{ '}}' }} centralized coordination service" start on runlevel [2345] stop on runlevel [!2345] @@ -237,19 +237,19 @@ respawn limit nofile 8192 8192 pre-start script - [ -r "/etc/zookeeper-{{ cluster['name'] }}/conf/environment" ] || exit 0 - . /etc/zookeeper-{{ cluster['name'] }}/conf/environment + [ -r "/etc/zookeeper-{{ '{{' }} cluster['name'] {{ '}}' }}/conf/environment" ] || exit 0 + . /etc/zookeeper-{{ '{{' }} cluster['name'] {{ '}}' }}/conf/environment [ -d $ZOO_LOG_DIR ] || mkdir -p $ZOO_LOG_DIR chown $USER:$GROUP $ZOO_LOG_DIR end script script - . /etc/zookeeper-{{ cluster['name'] }}/conf/environment + . /etc/zookeeper-{{ '{{' }} cluster['name'] {{ '}}' }}/conf/environment [ -r /etc/default/zookeeper ] && . /etc/default/zookeeper if [ -z "$JMXDISABLE" ]; then JAVA_OPTS="$JAVA_OPTS -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.local.only=$JMXLOCALONLY" fi - exec start-stop-daemon --start -c $USER --exec $JAVA --name zookeeper-{{ cluster['name'] }} \ + exec start-stop-daemon --start -c $USER --exec $JAVA --name zookeeper-{{ '{{' }} cluster['name'] {{ '}}' }} \ -- -cp $CLASSPATH $JAVA_OPTS -Dzookeeper.log.dir=${ZOO_LOG_DIR} \ -Dzookeeper.root.logger=${ZOO_LOG4J_PROP} $ZOOMAIN $ZOOCFG end script diff --git a/docs/zh/query_language/functions/string_functions.md b/docs/zh/query_language/functions/string_functions.md index c2db80e8c51..9dba69bab40 100644 --- a/docs/zh/query_language/functions/string_functions.md +++ b/docs/zh/query_language/functions/string_functions.md @@ -95,7 +95,7 @@ SELECT toValidUTF8('\x61\xF0\x80\x80\x80b') ## format(pattern, s0, s1, ...) -使用常量字符串`pattern`格式化其他参数。`pattern`字符串中包含由大括号`{}`包围的“替换字段”。 未被包含在大括号中的任何内容都被视为文本内容,它将原样保留在返回值中。 如果你需要在文本内容中包含一个大括号字符,它可以通过加倍来转义:`{{`和`}}`。 字段名称可以是数字(从零开始)或空(然后将它们视为连续数字) +使用常量字符串`pattern`格式化其他参数。`pattern`字符串中包含由大括号`{}`包围的“替换字段”。 未被包含在大括号中的任何内容都被视为文本内容,它将原样保留在返回值中。 如果你需要在文本内容中包含一个大括号字符,它可以通过加倍来转义:`{{ '{{' }}`和`{{ '{{' }} '}}' }}`。 字段名称可以是数字(从零开始)或空(然后将它们视为连续数字) ```sql SELECT format('{1} {0} {1}', 'World', 'Hello') diff --git a/website/css/base.css b/website/css/base.css index 3d8595bb0ef..e171339f171 100644 --- a/website/css/base.css +++ b/website/css/base.css @@ -56,19 +56,6 @@ a.btn-outline-yellow { text-decoration: none; } -.stealth-link:link, .stealth-link:visited { - color: #000; - text-decoration: none; -} - -.text-light .stealth-link:link, .text-light .stealth-link:visited { - color: #fff; -} - -.text-muted .stealth-link:link, .text-muted .stealth-link:visited { - color: #999; -} - .text-red { color: #ff3939; } @@ -89,7 +76,7 @@ a.btn-outline-yellow { color: #000; } -.navbar-light .navbar-nav .nav-link:hover, .stealth-link:hover, .stealth-link:active { +.navbar-light .navbar-nav .nav-link:hover { color: #f14600; } diff --git a/website/images/clickhouse-black.svg b/website/images/clickhouse-black.svg index a0a607dc0b2..695d0175685 100644 --- a/website/images/clickhouse-black.svg +++ b/website/images/clickhouse-black.svg @@ -1 +1 @@ -ClickHouse \ No newline at end of file +ClickHouse diff --git a/website/images/index/hardware-efficient.jpg b/website/images/index/hardware-efficient.jpg deleted file mode 100644 index f6e75ed0ac92649b14511397ff0129e31e9241f6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 111195 zcmb@t1z1#Fv@m>Th6X`GP+D42L^`D#L=b7|?ru;~0Rct2L68Q4mo60rVStgA7Ks6d z5QdQc&RDPaUhn|0#ybXvjJOK`3?Kjq0V;so+|twShPJjk z0G3}h2L89~jRK%102t!I==vA`zk*1utUWCO0HO_YTUdEoI)m^(0D$$t(#^vQ0B}IN z0cKw>Hw;vJkFJg!$WsOYctqyr9<~5L+yLSkeJpK3 zJ;a|t_^OMQy$k3+9tcZW+L>E{urdg9J3G5#V2}^su=qod?H{nYxy5gt=H|A);UB($ zl3>H~_MUD|=KkMr{{OrYnK~8V*)d^ z_Pm3!?Ym!A*76D<4EhfHXzil+(_h$UH!oRCTNv4GH>dAo1LVgduy#_y;F&;}*3;+K zPgyJ}FArS|9<&on)6Gj86GPBWEDHy7RZQN4_F(w|>VO;|4=@7W;LieZ1snkT{W#X>umrZ?45$H4;Jr5pTZ1ycTDuJ_fEfGV_rF@V1Z{N!e;C?-#oYkCQU_bbaC!gI z%SiHz6Gs6@9!D8R5$6gPHI@{XIF>qil>vX(utcy#e_8u4KlT##0QMK`5$w;{<7W2v zJbrQf(g$b&pTTy1#L60s;~zd_v4L6zv1G9%LH(d^EJ`eDfCuY3*wPIwX|QcU5G(i7 z8;tkABLAmXzkLAyjrvbM{Hyp-d@6hye2)JtNz6$s_}fBEOn=1VN7R1UYVkX+zbyE# z?f*-$2dqFl)qdLgn-lgJRs-vTy@7SZT4416Bdi_P0PBUdV&I?cD}L`q+uyC}eD7mN zuxIRl^ZfGH9K>MS#N>hJ)zYggSC4;a@v`>w0`nG-b9M9gu(!4IV!Q#){ML*rE|$F9 zjMuJSy9xl9bp*o$0BFkZ>jQ+c=U?2tX#kKkI6FIQ`4^`Sj?;gZ0f1obUmSZW0Gw+B zfDaLt-X1s(==t3m5>VVAk1z`RoDs z0zp7H@DPXwo&t$L8juaV0E&Qepc<$LT7Wk|CjbXN0wcgAm_3UC3it}_0Y?xB1Q&7+ zLJqkEVSungxFFXcA`mHv0z@6612KTyh1fz|Al{H5$bCpOBp#9m$%Pa{svwP!H;`_~ z0Aw682U&q^Lk^)ZC=rwbN(W_w@Xi-eC=4&0&4TI>kPRO^eNiErzXveFxhX+ZX#G zb`thW>{{&i;AmRJ-owGcp~PXw5e7%20geMsAkGt^EkV>xVV>axo{K7mrs9_2w&M=sF5-T}BgA9CyN0KTXNc#5cMmTKuLQ3RZx9cOcZ7c)pA}yW zUkl#~-yc5?KOesdAA!G!e?&k^z)m1ZpiAID5KfRnP(kpXV2S`uNJz*;C`x#f(4H`i zFop0H;RnJw!UG~QB2FSXB4Z+NqFACrqSr*@MCfzp&as`7IcIpz>s;));&UD6X3l*h zCMV`4Rw1?`4kb<}t|J~K-XOszVI{diVoDM~l0;HNf*@HtkAI%+yxe*7^C9Qc&o`bQ zJHJaxPRdWJMe0oYgtUaTn{=5BmyC@}fy|2R0oe<(4zhW27&$Y!9JwX=ee!(r_vDKd zI27y@DijVB(G=wr{S;p>kX^Wb;r0c;3+WeHFMOtiQL<60P&!e@QC3rqQXW##QQe@j zri!Ghpc zpb@9Bq=}-brkS9H(sI)3&<4`JpzWpIrK6`)qI085rR$(uqo<^op?9E9pl_pJVIXIa zVz6gOU}$GpWxT+6gVBjGh4CHZ*UPk*l`nf=&b{1!`G|>~Nss9sQzg?hGXb**vkh|s z^IPVxEDS6fEI}-#EEBBwtRk#-tjVlhtov;2Z2D}E*y`Dq*)OuIum`Z0vQKfG5SKkyCf5KrHn%9Z3wIv(7!MK84IUq! zQl2?pN?vu|d%X3$8+F z6HpWg6=)FHy3Tdo`g+#&PlDuv8iJ1m-wK`x2@81$RS2yJvk2c6&JZ3Gp%Bp)c_Q*b z6kAkQG*q-j^gv8V%uB3VY*U;^+)=zld|85B!dfCL<9*^Ld^tFj)lwX*wiVsgQ9ujOI#O7c z4XB2y#&eA+O;$}8&3Y|BOGWFs*0eUew!3!oO`MzBH`8w}>G10W>2%#9zh!o-^B)5%k7vJ93m)DQi|9pqL4zT#;cdg0hPy@zMu|p? z#)8HVjYmz`P5ex{O{qK3W z&-t!%qYIgfqsu#2dRIT!0XHtUhi;$U#oQC!zj~;7y!3>4ns_#NQGgrvUT+TX2=6%` zDW7zoZ@#yEYy3$4T>N_dIsGI3kpYSU1%X(BmVs}BE(e7N%>~~G&JBTtScJR{We$B1 zx)`PuRuoPU?ik*8@9Mqydpq~_?>9Z5dl2?uK0+y?^da#>kB4KA#2;lp#(Hf3xIa=L zGBxrv$||ZS`f7Ai^zjqRCp|IzF)1-;u{N>&aYAufPjQ~QJRN-|{j4aSG(I4H{<+5U zx&+3AsDzzFv&8PC>q*(k1j*jXpHtLR8d6zOpQawC*`}OhK4ra+@ zRbxI_1HiEVXZD;KP?YpntUa!5e zeKY^o?CoU7osQvmH{bQYS9|}VQ?aw7>qb}m2gwgD-D2I1Jt95za3OeIuV8O&pI~2Y zzfgZ2LKxBTQS@WefW$!Cp!DFIA^D-sVU=O{h~~(^sNU$OG2^k%pR7JDk2{TTP54Y4 zO@>WjPeo0ipH7;goynWMGF$ms=yU6w{9NysTVE#UE$2}So(so|50=g?B_SD*#mm>0 zTUHcTKCT+AE~4B}M{5t)N!K$rI5z&-l-cb4dgtrnmdDoF_7n6abm7kRowvK%yEA*v zdq?|G-!6SCIuJSNI@CWz9{C>Q9;cjKIcYjoJDoanK09+W_b~sy7l46RsErK(Y!`z2 z1Oosd{|N3MjDDQ+zH>l+oJ~O(^7Ds5|0npzSr9`2_Ygn@SYfv7a<2j475HNd19gM< zn60|53;-1n`27PhnBr+8aP2z)A@TsXCg0A^&~yNR+XMh79%pAqnP+Dw`QTY$8~|Rs z|2SV_xN*({Kn}Q50)Rn=GJH(^XE~b$_g*k4reLlxuz(9arr_b=VB_H95fI?x;o}n! zkq{9O5)3ISoGIFx>+A^ z80bFa5RA)T3r9rJU-ADf@?t>0P)%gxD`fdYun+$a1v~w!@Ds^i9-F?*?BoGVC;#t@ zrK6Z;%hz3g?{pvEENS(G|Ggy0No?6pXQpm2JhQbj=(hHM5(SV=p6D|`iO{y_AXr=- z+%*3m3Z!TyR=l0gBS0&C;8lOv|3MPKCgPdDHAK6wj!H!x>`tL)e6Igr#PJMDP?40+ z)rN#vCi{$P*r{i?Q2(C+KdXKFm4NGr39c+en{Y!;Lkg5L79HL-b z*h_8$rwPxM%$VN{M3P7nzqIk{38T{6hCBvNQQoI62Ldv`=rCNrcQa1o2EGzaCV%T! zrnI)HPWqREN#ted{Z@`&VmPIhA_fGYeIrTHw~ZK|*5u5O`NMzPh$B=JJtVScY5Zkm zZRW+JyT8Q#s5?i!gZ;Y_CS?_NvB9VQm&xC#`;Gq zLPktas|6oX3RFZH>ft4D$p2tZi28n0pxr5)+T874O4%F8Gd^`7N%oh3W2cLr9+w%! zSeNn`G|!FkiS^u+(Wa372o1|4fQIE@Lu~yk3aY_~e{bsojSy{JRhmw(YASc@mLuXdw_jD}vK_olah6MivZYKKG1I261XIphs5koiS` z*z`LNbYhqCJXIbihD$s{wJOjEg{AK`b#@glg&GNGjCMrmVrmuZ-L(cmZoFSVUQQ#F4kp~Y4xw%n^C`P zEn=Ug*I03OTZb=mehI+zZLl-xy|9Tb3#DFU%ltzBQ6qh0Mg^bd;L%ELO^B+9o2Y8OzZ((1#%C=?>=^Oc=T0I+!IxhlHz3;k~mOP!U9qXVC&Qq!a> zDP8|XAacwS9_f60#|Yn=aJ5ZKL*^I3BdzO3rgaejd1TXu^v?|Yh5udOo%%f9(*GI& zL{4mjd3%2Q=F!^d+3G*y#Kz>CA-*HA_e`xX0ASJcF*By z6#8C4a%O~5M~!nZH=>y~J6@O=VG5*h1z|KMMrV78|M|z`)0nnjoklxTRhQPEfZ@SO zztf#;{)>RYU;sid6Lq- z91^NHOZ@{6qwJ%s8Jf44sc|)^FFvPiK*NzxeQ)Ajo=(5Rk_&Cs`zhqz-|b)k4uk6V zDTZm2LTEkNsCzHVCN6M7wDI8{+!?`$P;gLS%6}IeMvqL7)!n2i)ReMYW{SK5&A9+{@hWx* zd4zpuw?Fh8-R*y5(5Y^H_2&|6Fe4c3kI<1}CA%-f0jL_vFhQ)w;I+f#$$W9@ z{gT!kzZ-;3m^)am>M0v@rKIDRBq~>u6h7bZ7zXHw0{Z!Kt=*tGWc&_mR`Ad(KbH5u zH%RIGM6+e9HkDi(Uq0s}0YH}Y6ff+YlncURPX2;%zllK0Nx|Ad-`h#?wKYfKsXYpAu>>% zLCARs!LY@KFGpCpirOo*4}5<3r<+*kT+lW><=T2Gr8T^~d>O0DwUS1e2TP`T66yKy z6+S?I%m!r{$Y8vFMeH!_ZFe zP%||AM;X*$*}S;k^%&|5j^BVZ)>p$V0>{J zg~{KiG5}IJ>bA=X(Hmv+zvq{k<=E;3wkI>XbSNyo_NkrPtKZs^j-RJD4S+=QmhzQd zT{~yu!c11p^trULzDnk=mO zu$}tPJJv}xp#ly?HeD+-cVg;`pi{HUW?T3D!oy*y<)yuLH#sO2uFHt2i3deZkotrZ$!>IGM%&=S=%%{q4*!w?PQ@ zchf7)L$8b;Ww3Xj#AO~!k50`mSCxCt6iTktl~nCe4IJCxBHQXJ!_n2M=Fb}~^z_gB z(D?~QVk?0VPrXqW+SJ72!2DeZPv^^}%yUUipBq71}o)MmEtP(ShD4Dd{x zs;9{?*wFIi3?mh6Z2Gj@aLC2o=xPCaWFLTDYWD2>bLVi8?wh59v0)AccBYoVH-?hc zren3k9lcRx^O9fXqIB+oOWV$T{lO;$GONCCYoo@1qgL6((=@JzPoEgM*WMrCTJW{l z;+a$Kb6)j^mfGs6R$UJ0O`$9^OnQ<;$^)R$+qlKIn zC(>$NVk$0HqirUpw?7A7*?g86{X7kfQ~?T+wvM>?=lbT@yj5G)r+nE)Xi2KZrDY%8 zUN${Hwiv$daf^|rDaT~#%KC?lhEU&; z#S>Sxqcb3CceeGLvv1ro@+id1DO^V94TH4FnEIj`*k6rXhnwi{yEh^u!Dhrmz8ldK zPm55}*C!FWD!1R(872eRL)Bl8<4o4~e8=1PioZx&DZeYVG?G*Sh>FF}0KVvt0l~!g zh2L^-o5qL1#))g+E3?ONHEjLihZ7qK0cP!J6aD^vs?NhY zse0a_-O&3}C$15oXSP|THi|Dh#QOUm$Badf`C_)H0Hk!(-1eT)GQAlNs<6_mt(j+j zal9?m;~_$$E(ZVvEQcE%YdYpbo{K%YF=zz}wDProZOKJB@_~vu9~vk4pth7W$FO2U=UobvIs*50N}k7wgKMy_aVAy!;@yB2Bm5w~ZL@(EMB!v} z>ZWOBr?1&9v^Js+9F6X?G&>(y|VrrpVP&BlANS!sazP}>P zx?)y-8fed>*Yas;YUP=z@2Sf;*+x*?cUOm9sfjbT*;-5;2St{erq?x|fSWzUhRkj( ztt1@r`HQ5P*}H&luVkVt-UbHYD+=qP!D?)es8%-FlS3N1X+k90k0qlo?bl zP?pHp_+?;UeO6}ldc6>9ci%d?0zAVAzj*+>qh`tas+;VQLD{MpddR!8d1x`wJ4@@fN0&hnfR00i)Hos#BjDmuHdE6Qat7cZ z&u+9#){i%*uD6DAEt?sk$4*SSrA}RJzl5Z}4zb;E7jvzfL${s*JK66AO|_=|ecznK zNVIhe$CZ1AG$I1Hxm;)8?=q~1#%kwW=EdPY@{CA~)z8*+j5BjZ`>wd>fMF2U42pa4 zMEx|dneTivZN2EI)reLS>P92%lc?=z6@ok@rpaH!&sW8*-2`V5e579J+wB@W1gw%6$~sMTu_K^oRY|L4J;Ms|Ids0Nt5vi)VJGId#J{u48Z3=%8*kwEXq_ z%3_PzE#zkQk+Ej_TFVScPN>pxGy80sZ6V#)GvMYzJI$U%eN91(woq$}nMO$00gjeB zHw>0k;e#Y(^k_Nsplz~J+oU%=!aTfrJL(swMd+hF;W4#1H)bBkry#b>_aZcX`!wTp zf#l{qeCl{soNr`zmvFw$UrEI+*44%C5GfS~?Jntq_Z{9!JV4z_nF2@(MW;p4^(rH3 z>z6X5)tA>$^X=*cp94H|uLRCWO5RV_1_pPLeLl!1mTcDo7_tZIJHud6J8olM@9}Q8 z4=pz6KC4?Cdx;JZJaM-a>+WpX{CryKUI}+?iz7PNsm;;~jXWk9Kd5)D@fDa53o#p( znq734Y$LiK2@aeVn)gSvn(6BeL7VxZTE`tm9jjIWEw{b1Q>8DT+)`TI0LJ&WPA(l* zE)3QP)N^OGlsjI1;$m{`$Rix?fqI9YKKIglha|a8HwPjYxpVMUI3>BJBKwia?JdNx zr|-4-k>U`)a_k1&LUda@Z*N4F1tu9l=l9=li!$hsea(ueV?C^MsB!biZ89qnZS$?5 zas#B&kzVdGFBNN$lYNpic7DN=9L>L1Mr8ukdFEAxV;H;evjc33z{NEbff2KNx@ zV++MP^7b*ire>N$$?6(fBHzr4XBwSP=#|=+bda5A0P6`k+!r%J6u#J^LyjJcGvxQ| z_={wx?;YbVg&@ga9+^oa_TvW5fO8Xp?KeE(>w^cavmzm(hd6N~Q;?eV`9pzSv+n$) z{YbA)GgE!nEbyrm@#~mv$t?7t%D)pHYR2 zk&bSTcm0LZ6T9DkSVXS2%<^%K?f+AY^zkcHacfN9jZ1!0y%V_E^?dB%?lWe4P?ZjiBrEiWNBg}GCjBI1R(lxAZ*acM5w z5+M(%Sn8~l$n;Dn>jlZSD+xR!?#+!am`u(9;zLB@UNQW$>q+3qLEG*LdP^E{iue&C zX|0~au4UNLG?)ev;pz;NMyPnw@{o+_mBa;JY_3G%s13w z(sJ80`jO*!xu&n*0+oB$!%Y+R07tE>Lo~{Cru31S)@lPw-`ENtdOf>_4ryA9;capr zqHhzP>d(Ph3HfI+UO?~-LJ{B2g#x54yBTljt*G$ zs|Md7B^xXZYSIn`;LAmr1Eni?prqsP-$zW^$7oY}i&W}ou^#wdoZ?;xZsTroB{9e! z>2`!FX!;^gg7^3o#^CkOX|i1baE>t(THkY73LGolnrps!e>l0LM1*Ml3!cm;hu^{skx7i%jAarF_| zwuo3ht=s-#Es6dwpTcKDe34CYuFL z#cSo|EqzS>}F z<}u5lRO+}r?fUVUU2ge=vhe**BT43-gc|&?*BMnghdx2=nB9wOesj)xY0slgy0Ql~ zhL|65)>+zoRCdzdp^IHY#cQ~OOhlrJHzWOzV&(2E)yDkOu5cj9=vW^sF!XlMd%$fi z9KN^MlpN7tClMsxddR;!Nr5)G`cL@BF9E!wUpC9;A696o+~T{{^R0wlpg3Sa_MubO z^Jt`4S}P%ipVU8R0QrehF}OpIod`+ZqW3fA zXohy|dhjDBk0frk(ue9}qeJ$2Dsx4}`+~AcC%2Kaa94v7@MvARl%+LbFxo_KYNmBK z8~!q61kN=U7kkVw;TY%*;dS{IfS5cMlrdm)@s263LN-f^qN*C_bE>nPhZ0K;ZQrka zxCy@SQyb`T?W*X?>SWEBeenck-cgYz1g$1vE|X;R{^m`l=i&HF#F_OLd?aXASfG7y zlnOyT)#|{ZQ^T^$Ju6-#munHn*JPG-vRM%F{Od%hZ6DIBuOg^gd-Yb0M*ISL8Rxmd z7jgI95IXT06FcqU-Ol@!;upXu*nW=!Qha;Nt8+73Ev(*_d3{@7Ea%CS$=mjAX49GJ z*@a(Mo3i#L}fg=YFPaK;r6Qy3* zNxZgejO1M%kD-@t3>elfu3&YZ)x<`Qlh?;GW^XuT~5QZ z%QbmGO6#`Q3lf7btrB8p!tNELE)|9K+#0qS#*yNulI}~jkqd{G_OQH;nkOaU@C>2E z;zd+TP>Q6aKQ>pVSF3V?cJK2MbWndETPoY9VyD$Aua$T9oR@B!YCc`DSZj9jYH?<^ zq*q!-AL`RjO9_)btbBRh#D6}Cvo|xDc#>!WrZ$;HjQ*z|%!JD*v~uwsnW1AuuUMtse;YOGijgCQ?4s zmzAsDqEa5MfD0JM3(jzLC)Tip-y-sa?@xhH1QH!oj`)3BiqJ>&C z9uL)9A4#Qt#7**@L(PO75X$}x#z_3TpTWTOAKF7!gTXLbAD9Alu$+aCaX0Rj8PuvEd3Dp<)au^cWwP7$?KNU2Sy(=y3SxG4A zx8RAIpC#N&o0*nTOK+pE*#kE!w=83vw%S|=x(E#@3@;fsIUjhHiYHdP`AYRI?Es=} zQ`MrIw_4T&N(XW`;V?hIj`E$e=*FIJw}JAeX9~*T=Ib%m0+p^Z>sZ^{{PSXFC0Kb& zT_tmi6KUlfH|_41z^^a(UK@HLJmA;Ib7_0mxCs!MX=$a3bD)r2J+xF9%aSyR#vgZE zf7O@6ZuyXWs<*4M}@RKr0~rs8m$Q^R#>xVRX7xHF>_D!2O8Xme@!_zWmt zYSS7R|334n9=0PqYxmqm_q$iRI@{RSyjz>XlEs50o7Y0v#i@sPkp-w;z-Xmye#{=> zC*MPPfT}@$ghYFO*m+;F?7!yFG_N)Gbk61wYkJ&i;mEUjl5PyK^9fm=^!8O?zl#6YkL0FR1!h zmAB^mSRF}uWfkbzKPyRFM{M_Rx!d#HIR$s56s4%uyM%sN14g@%fb2L?&vu7>F1 zr?P&d;DrzPAI0c6B}-@R)9wK<@B}NIaedm-5}YAw3a@g$P~zhgTu!D&%QwH>$`MWL z?|WS{*x+BjvpdPB^xWR3hmO)!EAvi`Uk1+0s0bo-VNB#Rs@K?W2Qo9gruJ~?^N0Kq z4v#q;#eOCsu60L6hfr9KXzbM91vgSrGySxg{8QJLn@gvem7k6cHmqzqt2P|+WT*;y zJF9C-$46h(a|!8Q5ZQdb5AK{67bnNLz3V%MmS*@GM?x&?FC1pfoqF!-w;^A{_kuQW z9A+mrSS5efrVy40w6)Y0T;W6=pHzactlUn%X^Hix!TVx7gTH+mH7_pNKd6@$AF~mR zZj_Jqdf4ccv8$Wl`wjWLqE--@Iqp%!6hE!mZ|XLzhb|Cye`?A_!9jE{qA{Q%m`|p2 zcqw3ehfmn8=-$J^N=ZI-OR&4Vj{}^xN}yq#6%0cirJYMeJjje4|IO1|sD%)QP0KSt z_ISGeW6&hPc(yTv&?+vD^q9t)HnN1wN`~0XbOe*tt*SsJ|R-A zp{4U1P5!+167vtGOlHByRK2d=T}7h>T;f?xStc?7B*r!?An1rUSa@oWd)2e`+e8gj zyF?qEwkl6Uvn+p~QJ7a3o6j9&Ohx^NENknYoG7*U%={MH`>54Z5>znXu31?BELRR* z?k$d2vSLnN@V<*_8D=h0kC5w`f);SGB<@<>8me13Ncyt(_x%Oa(=B)vRy?df^nKjv z5|~Uji%q^!wt`S<^KIwfWSy26mkdV8(D0Geb_9aaQ!gxEAA);iFK zzI4ZJj_6RO`(yrr^U^b?u#chQJJ*7yD;Z(r0|+j}5WCon+q$;r*fmMqzU8OjbIIZv z5k4j2`^hHllF^1~#8XwfP>zh=F&@DclZJ^IkK+@&95?TA%xX|U8g3f=)5V1r;RWfu z>n3{vW;BV4o;Lx7pJZKlIc&oCSx_yT2S%*Cxk&Y{H z87w=}!Y??%7d^oBqb}U#x$I1@pg_~8+u4!IN z6hs2D&dD6%OSg?n@W$bzZ2eb{+vRBD3SD=kMC413wL3YSJj?flMQV-fT?{4BEBvj) zBR8Nd%9mJc%*y6RQo`H%3k@IW>Tt=j+%oZuv6ai*L>E(X5A4rHmyKmj3m~N*pJwNN zI|K4|&wv%P_a0(jp*heT>@w^`?J(ftWU5G;)=?!QAapKNp`yQ;$Wc10lgINAX%LLt zH+v29IusC`K!DD~*z9L(j7dFz609L@8c?Z4n?(oU<%QuC4X39D&FU=LC>ii>u-qiz zAh^K6%xN+zoSshoaR=GEgH&DFYB_Krn-DC-8Jf z+R)cL=TpgCFQDE?8Qz_DsA>qPH4|UkJncfVnDyrq8a}&1z*nUvR~%r_{s z0gCJFd?uGZ!bK=JA}Fw#YO^z)`OBsz_3~B_*((U@zJxKQm}#u;XB;x`%5)?B+NvLq zJb0}xcF&1&OCd@=*MZ5XhTwxiO7j}ZHB@<=szyplM z1t&Ud{6|n%U)iWN_wRXpqyQRpbIArMaua!*b+D948Ra9%iajPsRe2g2wR6b?ZI(t7 zu*ZFL)>si~-Q25|C?W53A){&`#UiHsf@dD!+}U3Ve-@+NQG?S>8UWU|1|AEx24pkQrJ`0~!<2^H25~mLHIX(KdhG}Y*V*dE zM-2x!v?+iV?OdlwayIUZd$M`*kx*6w(kpsQ33pf0bQ-uUy7M97mla_BX4I>>v(SIU@nX}x2=HH|I3TiN zm&n%L%R7a6)WUCExe0!^XZ@B%nUu*EUe*|)9ZAN5H^`At!o!pZz2d^89ittkohxtD zj4bQ+7Vob&sJ&aPO@LWCia2d;jf@>#V>aAdQf2;V5dbX5!Pn(Z??&C7s;txQ$rrP> zQZ05T^BS+A%rmWfuGVI)i*0e*tjVYVuJ1@`ctS6bVJV+iHM&8O6U8(^IT+Cp zt{QdIE`d_VK)>W5m}>Da>VBgj#VavBTTH5thw;W`OgQmL8EtJok3=&mFv{l1L_+Z| zKodyi3;tnDxI+0sS0`FNk0+mdheSH~-;cI_tNH_gDUD^Zp*iC71hHat8+3CxwZr9@ zjAcijDW5NT$|Q47ahO0`fh_4glMaV=xJzCFM?}GGO?a|PWwOj4TK)@&InWBGmOW7X z5YM6$mP?keHaPsInnec-4C_3a}J=c<{PqBM2z4lyz0HHX8Pp8~^<# z^DnA#aL_MT`oGLXN8sh-4TBq5Ciz^l2g><)y5N|H$A^`(CIYOspWW9Xf$=CtdM*-s4_tr^dD6EEJ}}n2TQv^2UMI8 zE*ti^Jj*o@xi0uy_OAeRBrT+6-YPFwUR77w0PNkT9H6&(^5Hbh6eSclWh0m(IK!TR z)0L3Hn0H6{$X}KGi^O9qtl&2OP!cYi2*oD|hqCCf;0=QYT3IA8QG(gRp-n1J01HI~ zhw@hcEA@8>65istpQfk!-r<2V3!W}GVLZM3!%^)gJfNmr*~f;g%xtFh$r}Aae^>Db z7K_|uNF?w>5eGPmbL7J@xlD$|{Y2-1xdUhvn0p_V4_sd$8~>Zx>kmx;9vd+_qUCLo z0taS3Aj5|WLRmmfMT(Sp5fJK2jm;Z@GoB;w{}lP}U>Jiz$8g7}lL0j*dkd-)p`cCL z1)!#|{=NyMQEXt}pDW0Jrv}7BCoCN-RxQV4tfP4hepo~p9V*Oe+=~(yHcxk#`5$Ee zBMD2Hg)^*Jq@ngdqJP~nfd5T$?K}X%fOsej7&F~2Qn%>iI_HMB(pC2q0 zTz}`Mk{uWtP(Aq;eV}ST9kj6JJaD@2q=_$Hj-t~CUxf$tMZ%4;?>Z=Pj|!|ww^_Bu zcV5$Zz*KbxSf8v+r>geUi}sM;((S)FO4p!u)VuW2URA*-isjBTR?(MT;>`jInb4ug z@$t#SE{aewkEMQoovgl8net; zHZq5rd^L}ROBRrX+mC%`w!GGQgxJ_GG2J->I5!(#94=mS)ndKED;)heW8hR}*53!C zG;^rkhBvfuTTAMqoENj`rcQi9;w_K9{V7c=rjIQj96o-Gsp z&-U!svRZB=HDB8Gq57^3yd>%Wq0Ro6Hc%x-8+dtaj8PQAr6HzM-rao!a#!Q>kTUKf%djhHg%`FZQjjp#qpHKn=f z=d3VSOZD)U=_L(66TvyJu?QX03o3qktaI0s9}Jk1D)=c}n4{3Y|520UULYo>E3-|R zj8mq#u^MsZl*?Sa_o}}h!aG(giA!3qRVqr z6YS~z`A2Wewrz1TH1HpSP1{Jlh&#A9|5m}~X2o4g)P{|XYY)f4(}VZZL+!zCjXk}+ zvxy=yF#)eQqaIHzgexgjg~v@60@!!za_@60{33F1V=3BQeq& zmQ{tNoDkZpbQ-f8pG))R-Rv`IVRp#l^H+POABc38j9$EakQ}R?7zTJhP?mq1D|%ac z`#_xcvQF7ZcnzPT#~swy77}>(xoFj!DTOpPQK*+$Yr|!fftQz$Tgu08ST1)y^|1yv zsS6!1pWfE7Q_L`3Z-c*P;~nvg{m{<2IY~<@a9)6X-NsjYC*-#5nmsO+>dm#ooKQsT zafSL|dKf>@%#Ig-KH;mr!T991%Slaf&@d`;Xf$f!O`Y7p0sD2XsHg1DswSV655pL2 zi*dJYxMjHS=Bw-*7m0_xpJWv{)tIgGS{W+T%w0ZyApRQa4jSJD3TRGBsE=>fCua9d{gRR8%b5I=13O7oDECQAl(v9T=ORUu94>oX( zfUmdy5J)9#<=FbCk@T2kvhoa|is#Ds=#E3X+aJHw=icY8;_BTxx^Jvw>|xx^>saBi zq~L1%X>HJg|N6gO9^_ZgYVlmxN zbK8)#zID2y|i=9HnuWtuCuRQotpl|=bdMN3xr8gip)bbBRzsHR5WuYQ2}kjwUG|jFG99+ zS;(p{A}@$K1e_DnxYeR`5@xkd4?Ip(VLguVDwcvD7fKDw-Ga0A&{%r{8|p{LQ*nb^ zj4DmvBc3YywJ`XIZl9x`e4oBApzo5R#y2YY&GQ$YNX^lGN!>qX$^U4tv2-%{Tp{f> zvqVxo?S-kBsRw&_1z2y)XSJ2YxssitsS=zJ9YV8~@2c1$!unYn3a)4jn_p?=k1t*` zta3`{oT^n$>GN7Eo-KSzKd$hy?e;eh8EklDEP4jp8*dx8zY6&_b+x-kU21#6uXR9&@`~|c)+xy zdW#k-uU6l#GALtp&3j_1DF)6O^={ns#r$cjuM@@Zwi}r%b-rb%91Vt}78hE1Gry0z zo{R<~r|ih_dLQHN!MxF0x=D|RydEZY51PnGy$7O__`Eol__TEeyej?oI{N73JQZf3 zU)w)tg-T7Picy@85|CpP5)!|n#B7nkTcg!<>j^1OIZ0Y9&qg|(FNx9Mla0Q?C5M4? zVc9K7ir3|yMk|Fb6>z*2^!PUUPT_uTUm@MIP`=+W(9MqA(=}z6<7*EY zyB;gx#lPUvPM}etU>fv~!H$5}Y2jwS7KvpRdP{35|4P>R-D-(njO6qF1&_feS$xr> z{+w!;inja}*xdRu^CvIu*FTTnY5I}OER}UyjXBdzv!8I1aOB8c3a0FtG*(5Whdy?4 zG}^@qHp!Jmq=~Ot4-m-0HACKuzH_hiDQ~~}#gh1$$^49{n--!P{8BTOPVPx3cT(kt zDbUjKC;9v3NS7)^T#sRAxnyebb=oyW^_gdHkuS1dFYKhUdM{Ev?;cWEn1A^K?@e6~ zY5GmXw)?l^N9>oh*yUJhN5eZ7W|_EBB%bkT1}d3a$yGx3Exm1Y?pt_k3Zd2>CH^*^ z&9zN!IeoX?rG5|M)2GhkPr|Om{$ihrzmybtC6B@)=1J3KJOegc-xo!=N+cYM*S25r zKG^XV#R(57f9(0N@u|_|hgvFpOSX)_4&KWLqYS&1k1siJ2jkcR+Tnm z|D7gAr}Z17r=8d4xGZ8+vvk@YTe&OeblQ|!cg!W*s3Z;vG>^Tfh-P^3yn@z6Fm&)k z1-FZ+mrpfIG94EMdTkXe+@c{Lo}s8=qf+*sU-8Hmhs8moOX{$48(mi47j$!vb6MTT zQOnAGJX7jMf{pcq7bAx78B}?(=2iPC?<*47=jfDT-l2})8;=N+;$Dc$<$kHp8r{D# z{_um=wCFo0wRrEx;TlO>Pde!w<_WJ97hJkAyYg~pWtWt9wD)rq&1GrF?!0#mnbWg# z@96d4X$t*6!rlTZuB8bOMuH9Q?!nz1f+x5Q?(S~E2@I}baCdk2;BLV!1PvY_xbEb= z{l4A(_djRn^z^yiw{PEDUDef9UES63%VT3c2LJN^uWwLvnI#UJ)7qR%=DTRDXFlGa zOow<-x=tqgDw|`~pE^QZ>&9A86pfY3i1xTU}%<3nS5_)o$#K=at<38x| z$PQSBC!cvK(peC{w;)#Wa7dqx7$Wzud?E8ld0>kY9YWF#ORF(nDb2cf#u6|~$hW=D z#EkjdF-F|af7hq)m7hp{AJp%(qX?{^`8Z~iui#VGQZ|`B6OO|wa@-z+B;`T~E+yb# z=`8i~Pb(DBk5e`vnFZ&lfz4=HcrM0@jouSCU7he;e9s1X)|kB$*T!7M+Au8@P`vfy zd>O3s<~N^r`d=vhlpCfRcRnV^p=5sLwOaX-WnHoFf_35owA~un54-QwAXmWvjZ!|P zk+A<^d}X7KOsJCgVDey+E)0CX@$4xe(*A>`bSOfT^8-L+U_}>wp){csrZnIu5pW-w zI@Oujxoan&tu{(@ILBWyw(rXUx;jE~+8`4?!+uD4w6S3N=H5~ICR$-f3^n{ICL)&l z0-C(o%R4ic`{!l@>5KEaLF8eup)#Gq3>~R7!7$6J;bK&1-?8puq#j)&;(!x=27Kpt z?MV?C4n)zJFr&6g1Tw+0Xjt^o|Kc^#{IZ77Ik82~B`UG#rK5Lsc<0z);6^}Lmqpmn zk&4$e*hBgJFyJ7bjnb?^l&T>^w0$v1FYjZDA7JxybU-5o1MS-p1M7Zz-prW_g=+rEMYi!!%$+%)4c?I7C2$|J9z z-rMnpN%yLaIqUNBj@4;(#iSO`moqSHt$2+qBF@A;*IOJcds&y`ZuM$CvE!!ytoT8y zyX3r+PMusYeZ&1zp@6)-xEib6I-by2cw?CIL)+x0gKQ`sp+U!IWn(6yf!ThSQSb3) z>SgVQ4#s9xM8Ax7@d}w5%_*STi?4rA(pr(_zUspyK|+eW>Ys7OAfPf;QWJx z(A|H>%dWlT`HwERwo4uA+LlS4=U)2AI$Z3|yW@+)fCD}bbo4RPMTKsmnX-Ng^Mscv zOxnmunK%iDukG<}JLN|H2E-i0Yfm7FJj4xQj(j&t0(V%VlQ@q#&8m(GFMN5qD2P$g zNLpTXRFe!P2W_2(jFzOerLSKqJ4vSO@_go(elO86S0Vvzg4Ly1WhT830r&nu3P-T!1&ym?jdz9%#kQ;vZ3~Xi@JF==C>KTMk?gKqHVRoZI;|n{J zrWiBP)4*jx+nnrF;o`8qFmuUjL@(fkewDDJy=H~hG;(If_j|u#1dtEg0>RLqcyIGe@=huQ2j0W9|kz{7%J$&z8> z62qS`!8tVJ!o1IfMaL79fM#-e;CGNQ)ehr)<kuE zTHN@d#Ogr|yM0?$v1+?yz9Lg?rtx|ner|(i$1JjrE~Gu~wXByg%wlGYXRZ4L3i~Ye zg_VuQa)t9JKd1I{!GsM#9Iw+PEJK8i!;t7HSQ%gM?v{9;c9YiobovqK3twR`qoJd_ zd0WL}BBv~1AMp#(Q8#xM&7N^2*|t|2JkTSgsxq3S6?lTvEr$9_Q@S+1IWAUsO3$sa zBu(Z%#Fy5m@R~6><6N1o*lo?B&zJ4W8JXz_saZ^ZZfO^3&eRyC<>fEb;8WV}V0M*QtGAsf9wY z9sL!HRAbuy*yj>Ui>mGn@Q+vQvK+ju{MaeYeG3;m6*68|#DD{^-ShE-Tni1eNEByv zCQfu!8L&#o$hl6;&7FT8A}dx$D{m78ag+yS`-K-a5!|Xa}HZU2VJu-O;emO-nx9Zr+HwVFBblhM_QIY$yA}I> z)e>v@%O?89+>kLpGjM7o5j<3;J#;WsS&5dzyS%|nl0nxRB~XLUWmw(m-H~cIRh0}_ zMVGbYy2GU(Cni$HC2VTJ6I#RCiRvN?!04l(Ym~DP^d)Vlb{(pdNVGtda;bkj5jwX& z`@-(>E662jB+iK>m`S)&i}~ses?+q(ui?09YFS&n`3^D4Qat)!HLEDJT%4N$Ztr^D zZixLne3AuI?;*`3tlb@8lZu2cUM^gKME3hr&M_nWv|04*KrenLI)F~y>9ijIFskElmOi*V9T7CP>(Vr}xRCPxdf$j$09zyA6w<-;nvYUiM2lpX z_Bj|m>eKUW>-QuLmwW0GVG)Qz9z0`MVb`1rzH)<2E9g3qz%`GBk`CuFi$yIfl z!!Xfyey`R+5zap$q2DaRUf!S>!iOx7 zmaOWSQb7wQ{0+&Mi=EEJ5u8_Xuy?dF(EW+b~{>fv7&xJ1pej7l{o%L3t`)&vWgU@i$-`{Z;|GLN_$(WZu`C-ym@j9GL{2kVZHkuxN4r z=G83~y+tQCUiza$)Kod&G*Q&h!St#Ac9WNNqDmUSJW0f-MX@4?SYR~9`D|#F-TGG) z*RJ|$@rPv#%L}`3XjWvDr`49dV-L#Jkj90b*6JrtvQ(vP!4dzQjnF$(E+=pCd>my5 zCB1|n??dzH#T=dH3@5Y-=c6V(WU4xJhjJ`2lShgWR7XbQ6(v(vN@*B|57=nYq_b=n z38D9Fb)d6e1ePf9h|DGwI9rvn3((md3(XPKsbG(v@=;@@)0;{K5^vR?dJba?Cbhu5 zQxbZ2J(Jx^NX&|HsWh#WEQ>0#leFtjLuMqNU=QbwkC@au!`?BT90kmeCpb%LF0_hb z^U7y=pyID#`M7maqREDsG(R%VaD+5|x6&?3$xGC0_&4d~oYptO9+Z6E z6LL;9Q@z0%5jGROeattbK-3?dp2S1{fznF*cs8zibXrZr_OR~sXdvi*LVD}#ivB9C zOZ@4q^W3^+6pJeoj`H>ZK_D)XQMa0X1!KtD4P4H@MYZP(AgdDb6n>1x)i_>i>8Rwv z^E6{|^Nl7xPr0VDDrRr{ExmW$XUGc~HeP=>=VO#lOZnjXd@*WY#BBk)u#X{N@|{C= z951gXJy|Fb7dDKwQ*C=HJSC6+oYAP2XNZG3Tj*-Xv&~yaT2jLxgFr5KzMCRaNN?Wm zN<*)to16Z9tCijWH!vg@IS*cedx~3Az%I>h_Fa1<&dGqDSM?_Mljd5W} zo4sSr9aBzH*%BbOFPVz%FcF@qqWa_U(;L)4%NtZNWNsQ_6S8T!>&Yi0IHiMRiH_o_ zTDZ{zTuYN`U2b)T-yP$#nf-t({z?LETLl@Eyz`IyKm9CUOWzk1=8Rn?{#A&$%-2>X zAEpWt`p##Fg*K!Rs;E1qs&frNd3PbsXxq>Czq^6eLh7UIL9+N8FX;&v>83s2o9VP~ zP&#PB)T?(X)xIaJrZ5^cvUF;Tm@Dxw07U8H1f9Umq!;dm&rw~-It99|;d44A&8SNa z^pC&*(FOs;K~VV+|7JoiwM>1#=vpH9%)^U;`6$#c-W-8*JdWEnKTRy$Y|3$JWF5Ln ztfR8`czrB!cVe#x9RIlP1X0V~PsndGou2v~_STeTJr#?V!v)Y>0_azo4J)kIdtS;>B_+7kF_tcHYOxyAfk8e|iJ;PryBO>|OCG`!7@G*Nxj^XzD zTS`cv6MJ?IGC2#Q4VA^qG&d(kQYS(7x8+SngZgf<+niD*p5```mh<79l4`Gz0MYyk^*{eJMA!kQp9{6M)h3SMR^cJgl4wcV#d4 z!1L_mvlZZ2!F>ddtgGpFO4Q3ztcBm13u#J;)ofU}@t{glP2iI&$ed@e;1HG6-CX9Q z&7!H(AES|`dv{$We_1>^>pp^5T&e=s99q%X7jGQs3e86b$WCim&MCHQF%qIBP}Vp5 zW!3`CklBY#N`9*JP9!o&s7+4K(Er%U$IH5RoYyFkbnj_xZ!x>8h4gZhPDi7Ij*m!ZW%`La)?-_bmt3ebqv;%DwkrZBm|2OL zmo5n9qLrs-f9YS>$bQ+BV!}OZDJWu0a4IN$3cf4^uT$98$mKPZE)Lf{S$E2uW$@H6 zkB&3PJ1`eNLQNV}Cybz>62wR`SMC(zOwO6aNrUgW30y3Itt+R+dX+P`hX28J?-ZJF z<5^&)J*I%I>BDCh2F_noF0sq^r24}P4Rfrrc)^}I5~QS2m3#tU^Sg&)@8ko_>U>P< z+Lw+>eTGqUl`Yt^znR&mi-pZ;p3rBYUg;t0e{l-%#9J)oq^~>YKt&0RxJhf-Yf&G_7GvC-hKnhhX zx*D~N?y>{{L^;pH*j>Btg@lU7HdRwYs0{H1{gcM^d-aLUX3goF>`7d_*_1bJ!G6`@Uk1~8>(f9G= zokI(@f|?;q$q{#(p&kR*aK&Q@mUW}&0zSQvuAipRe^^;|-1>^t?rD$6+3Hjz&H#O@ zgKM4qf*DE&qqN!(sT`=S6v?bbHD+yzFdH4twXed|Rzp)Klht+M{R8h&SxSx!<-j_L zRZ$3@?HccwH>hL!1v{TqCJQ;W%%!K;YQj%aiYxpsGZv|?UpcI0z!ZkhK_rXED`ww% zPxMxug>qZDteY+$-54B)K$fw-ik;Nazy==2R%Q08ld(ReX@Cy#A6&~fC|4~Stek{c z(UvI5@PTd(taHeo^P76j=u!nYzk3RQJjAG~kvJ;`t&n*ML67(+rHyF0>fo<5VPn#zSg}Jqct4)I6SDH2C8!cCH!m3| z3KLp!Nt#m(na^V!ZTM5McJ&>r&Mzev55O&PNaH-`QcIjvq~Tu1KfnEaFW{CbdjFi! zxy+(jF&{9#K6#t!nFE6Dh6`} z7e!In?W{Qp4n&-KS)1B|Z0uCXHMWffSeer8Ln3BRJB_KejsD<#hCNMFn;tU3dB*$& z5#UuDi;*O~$)4E(vtx?wi-z?+loyAWWVc4}f>rb2Y;)NH-|DZBHZH>E5Hz@h6!~Ee zY(<(PJ{>=YZvNUy%^>b(Ml(SI4*mGKw1OCA(*aKRp1D9h9vf@m+Z)tJD|8H@&d#ZH z%_2L-8g^p^{#J7b%!!6J?=6clzZlONVQH-^dl)=v`i%7Y#)nh-0__A>9554#+uHf$ z0LAz^X@;T`GE$x?Iyc}0tj^2cWrf$KYeCGhQOvMv7-@6Q&u^IPlxp1ju+Gz6W*qF0 zefc@ceb5YFrM!pLlYLGRPaDlW@+5LD%jRR*h0~L=6mk~S*F6@J=fAp2LBR6RTXjv( ztl^>^6|~z?Z9-zD^#B*ofaH^^DZj$SO#s`L=N{sEDp0RHw^p%~kh7+H`)EdgWYJi( zKUd;NSP=8kmoyMViOs>(SBb=k9O5MhWHiOl%R?MW66S%%Pp9x!jcb+|fXM?gEO=K= zbFmO9C*fVS&-*p;!FqOD{L=*&?s^}^i3M;BJ4RdcK9p8qG(HO$2brEUB48X2i>t!I?*E zFfON=PBwH(g-DnqX!@9OGgfT{8|gpT^k_2_X{AvBL$w^9mLZgz;yZh<-0dWTl52jE z--IZKO)7Wy7^g@}8rTc9DU)r7n&g^gH3A&QC0dDj6g8c*!hVj?Pji{8pp-RRy<=Sb z_6n@>@ck5>I8@SYxM^))tFmNR!z`|d=S1y$hHIJ(UpCaB7Na|?i7Cy2N*QI5Y?}Z- zdOLM*hM%y-;sXL?pb1m>mr- z?Nfc0P`jzeD3!OISg5vgHaXbmgFv{ZQa)lI1ayvCC0_=!35Rdc%i%eyD6{Pm6fGZ; zu%l_iRFe~#Vxrs+#&#~R&P-t}wp%IWR=mY!IHEr@PanERzZd*M){kRqwqG#X0`LMJ zCX|^5nKg(MKh)s1OASUHr)qHD<`uilp{TLq_J^=sp80ITu^EQa2&fO7D@;z^Yu0~j zs@gpTu1ueT5`;0XC^BMpdA*0_+=plwr7S4Fih&4}d zYT!-ydWGPR5@+#d9UHAosQsuDt{bf^e*h!tZ4Ks8q{n*-r^luw z3+2*syw>fj4 zBm*vTb4iBk*92)AMhW+8?C*>|r50n1KQp$!Zk&LY-z~o{uOyvU<0{EJ=9U_#KvC~9 zE%mIzx!N^X$WAX;a`XRE;+k(Z$a8%jKW}zs{o6OEdY$i|4~0Zdsdjh#OTQfI7~u22 z3TtqF@j&)C=v7EsMoF6g;-jXlG1(4pF+uhBRcGrt7sJwhdzdU zNj|<*9}~109d#RG@(ea2XOMryA*qN+DgjD1W8$T_XKB zlxs$@>qrO-&np4-VJk1F9qsaCf6oKF7 z4+p6EmdIdt1;y=h#E@sahy3p?8p1T`lqO8lfI8S`;gg^!?+h-rJ}nRCcS%31IsJGJ zlp}k(I3F+8CkEI`2DjW-^v*Nk92VT47o2fdCDA1SxOV6c1sAdx??lbV=UmqU5Fe)3 z$;V^%1k~%6f?B!C=Srl7SqK?yE*Xpgxo|E?+qDfBm8V^tR~Hi#{lEXMvL+MOL>8(r zq+*<4oLvPY{Q1&qG8a76*?OG&;-VQp`E@rpmnDARr0S|FP`ObQ^RvR{KnOfn=Css% z0;8ZOgRZo(ZHmMg@}p&=Uwe&?=P^`xlqB)@$}4Ak+n+|{A3b_~yEA<@JG(bG-|2tM zVq@zISaza@dV|`1&35{6zI`RdYL->?1_eIb40cMrKB<1ZJ=YgC;?*ZlJ6jD2JMNvY zk@cRJ=GMEP#%))}r>ea^;=>kl?!8<&ev!^S#*=*x;H_0cKSo=hli5wZOzY5JeS=cV zB1i-qqCW!})mlDEP{-eJqDIVzhP^=zsuv2W5&R?w1cHg|Ka2(rd>la#O^u3+EmB4xiE@ZGV7Mmb^9vJhS9`sl`a8tvCuVl~wv zZ4VMjdNbQTDYNi9k;xM<&nV!ivL>VB=v$hEMSJGM7G#0y2h6|gQQyJB!NbDC|Fc9D z;^7?j9VQkA1tkX@Hak4k2XR##YECW{7h{uPZngaSzKiz~ripXgxUL0e&i^b|g*+w( z{RZ`LdmpZKxee^EXnOWOtE=6;1DFm!yoyA&u&iI6+zxdH2)Fa^AR!7}F4_faXbcMG z#-VlN<+Anf%{hUrLn?PR=iv*7k|I1XcpiHm#A~BGU-$#)_igeY8(XGa06R5||Ie_y zP>s8<4;=b`23Vu&tSRG?if`3`*?+>fs{eTbNkXpwj}Vv5%ll8s|4;t^os70=Q7wP$ z43lZ;VR_*NpyRoZ4$m7^;lWfdBAN}8`Y7Uxa=B}V*oHtoPQ+r%MA+==3lf*KY!P>O zX1v(#@MCJA$9n&nUYZ}yc5%$`HyMIf`1oOYSj;K^YfuR*nKjXizO5bjs5ZuPOM*56 zrLZHQ>bDg4RBN>7%KS*I$Ku6& zWu-`5MNuO)+Q`Bp*$ns|T3St1;ZO7c_w4-Ig&yjK^}%j+YTnCtbI`x$H~ADr^LC}? zlxluZKIpYZY*lO@1lP`wcrRB$>N6xgS|x4`->MNR$yIW>`k-DT0pCOTH!qBC3|grZ zq{8!P=dt`Pn@7?JaD5uP-k&6e5PYvZPg)agVFY_|uT^U9_yglGsHSZLciRnj-w-&Y zQIV`x0UR>aVy`jLAJiwwu)EGEP?yaO*+k{Oh*uij?4q%))dKtB4pL$-B7|HE?rqE; zvHi`*bVh8;MR92hrSi!Wzg!$w#329V<0*6Y6S*vsZV;uPt^&S?vHea=?r-K2ZA3?} zIDBM1Q*S&Cv~sZsN$h%sfpo9jyC$2Yk1R9-#bL|dn6vhBPP<_pzcSJ?L43tF`$yNZ z`;MEkg4qn8kRq4WU~P+qIN+fUlsbA-zhck+d`hQf=pkhp)X_kXTf7u`rc+m`R>RE=u znObF{=j!zmMIX|vjA`8*@_3A%n zM3DM_dr%D_gtbo}C;jbiwioCTS*-r**8^{u{^ky+)P@2 z+2eF=Fw~~_0LQSHC|VA~B5Ul1rdHL5dYL?rgs**B{!saUi5%XMQ|!SJ*Id3)%aXvE zcMP?AUWBc*1PO)PnojoQ|NB~GP-^CZfm(@30eWY$WfU+mzjaHnQlG!~Y%XA*CobYd zn^9vjW-wdel)AzPm0J|*8ZCmpZqSmz@sApb(8>=(1H)D@gdQkDlf}2`}yW zi+S*w&>yK>OUVVbv)btAp4h^^{oWhY(55Z6^LyfnFh|BwKM7P?!qS7bc=OTUW8|}t z{m%t`k|W!8}89w0^$;r6{jnTEeS{8Tq2DPZqX75VClR*viX21O|Lno?P` zCrb7wmDMp&J8Y@OzA7a_dRju)(N}wTsf;}oPa9`1rt1$&+**x5-w3jsJgH`<2yhg~ zt)IwaTG(Q4jmYG*h)R^X(*WB~rj*AueFZgojCC2{&XhQfV&kIbrVMm8Ee;#Y(s)qNF8@mH>+8fj$!c{&ZAY6$q!o38)!~z;fxzWO=Ov!6lXj@G zH3$|%{2^`raWHx1WK8hRUT8fz)-5CkAJtv!4@hNtVx_-#39rq$EmJ{YC|Sy9T;V=D^L{3^MJ0@MVI!4FBcy{d77>@gm<{FpMnNBE4SuZxPoVIh$_fo9G#8f)M|g=Mzxx%u>e<3Zt5Yw+*k*;}H*qG2d06Btj)m`Orh0NvT$(@Ga%h>a zJCa#mgL|zp(nP{4hV4$l(TzmnM&0K)y*__Be z;>Nl&otTBFxpJLB4T$iMlDkN-8a!QNYi38tAj#5IA@b^BV7`nT$!LcnWL@>At=DBF z8}lOG&ZlMAhTVqKjIbqvZ{&=pTCeoZ%Q^c$kt*OK~gjV3x#wt$xv}?@zr;iYqr`SCm`3rWkKC(d6olEV4dL_P$qdYcmSDlIE4T<+U< z%|S*l0|W>e!Ty*d2l3nzmB#-;3%3Y>z^8r=aY}RVB3)BEC|0J9#bGftqWKtG_bx=G zO`mH&EeI0Em~P3!S8-+OeNGa~6xi^J4t#PbGa zJ`;`%vW~SWNJ|Pfba(A;z7$zV(yAW_`E5n2)?LFr(gAjWxz+is^gLgo)igQnMFXl< zb2XeO_mHE&lxQ2V+RsLv$M7w(doZ^ta2^C477)V)fB17EaNXY|3B=;&7h|6@C{~xv z(J3o=hcD&ybvsM^k9h;_ZZ)6a*xv~_9vC570xB*i7)56^8eJ8hX$Z3sivQYF%0G|D z1~I5AIkZ19{pGf23fG?mMqHoAxBZmZ=%uW3cE|LXgf5<1-=MAnw`EhJnNLikSka$w z#D34fHk|3VrzNA3XKZ(gG8akw%2THsl({+>8d#nTs0^MM2*;eG=s8GyjPI}-2whN8 z*MEchP{iMb)J^1Z&D4pZQdXxw>N%mpW@^snE4Y7_X!cajJ4;W(khev$p`qRA7|00A z0wCS)`Qrhh-Ci?@pj-Z`BU6ypd`o3l5ekv=7Tn5PV4>QIz@O@tB{vx8l|gWIJD)A{ zs6gV$AT~qoVxM`}m@J1{wT83g@Cm0NI%=am)!AjCt5*BLIle`KMOk~3!8YAJJx9m! zXGo81n?b^^&pDWTJQnUZr|oly#VNs{+mUc$**x$e|1CWrPs%)iqGIf+?`g?gY0^P> zW3)}@b?5X31!27XY2avP8;rnV(u{}NCt5##<>$0qvuCg&WVY<;C{Il|fEn%7@#2+H zJwhVGVKz1}gEA3Rv&az}#$r9-=bd`$5_lsvK|ja;dC#;O+rTg>TR{dITWVv**QBX? zogSOLMB0R$M9~e_0NZmCf}&&eA@gwj=NcskPN#;`vZ9X=5W40q=yI-3JQTBo z1;vI?1HA){fMntqnUmo^tjb<{(wqZV^538Wyh&5-YO$vFe^>nc!?7bW8DWdlb*&9E zwSMSnIK?JVRo}eT5twg~8+y;N!%2}}0Kwo-c;bPUg6yA)Z3_JQA~IVw7&|@KH75D` zV)1Q>XvP8g9D_%x5wBzT5LG6IE4ieDiXobPm3ujFR{=b1Noi(_gOECa#@KF25g8#v z_(z{Fyhm}`Z6>B#kXax%xzo~Ra6cTUUbS|y0tElKy&9Mf8HsG${63|(3Un@h>~rse zMtv&7#2Hu>kVq^4eB}fUL7mZB9;pMUTsUD*E!$mFC5sX z&QxUs*LsXU&o642$-G0PP~F-VcHq?OOXRM1Ts#NT_7Y2LLso&fXUg)Iys2kSN-xtei4A7(&>}*?V?NKiXhyM0@-ORrLD}%DSm(cKbi0 zsi#3z^k}NRxuhDH0nndp|qJQLQ4hFO)GgsjKeDaunN1 zwD}Ewtkc6eRLL>_*5K0o2Bos~x?o85^M^gGZy*aK*(P?X@&?r_j5ZNJ*5Qh+J>@mO zD85~{XG#PhpFy%&X;|ZPl5F_tloE+K^MwUs!3~In06*A*J$g;A)Sq!a4Iy!-$H%f~ zky9s5%PtM73;;(|on;27zp-Rs;c;SJGk93&qDv$RP_kEupK>A8k||tm zQzqE^iBo{`kph|<4On%U7k+6v%Ag!~xVq7D#+BU)lo1o4i>skkX@h~A0 zX6g@NKViDDCC(eUa? zU}8tijo+YhdM&Vy8`DJF|k+ELO{o zW>-RDZMtje;u4Va;T+{RhhnZs1-|Jr7q9uQ#WkB;hZKzU{6*(7%;Ewv%W;yv(39;9 z+_SIq8Hxa*yYL(2F9Y^PT3;`8Wk8&ksG=0zH3+{M#>RX})eUfTK6`IIs2h!*V|t(h z4j2UPug;spCHFE1((ZtJ;4ylPTU#j9`iuwkCwDTY3I|=rd`B1Oz7CT3btpzxA@_kj z=WM%CxC*$9`T$d=e3hRK_ygN*?<=k7bVts5pIB1ubk|KfXEYAN0XW|BOnOZ!`7PKE zjwXry;fPiW5Phsc3_5Zh8f2F*rOt-zt9yl{uOv12**7eTgafH1hM3E_g}wHVT7Chx z!#WuyJ{Ue3ISEZ{j9U~W01As62z_=`Tt~`H`Q6}t$I1ehu&9Tt9|{!Xeo&X+CjJ7t z44rH=qI`pL43XB5y-R{`p5zEvVK-I8{6^eCO!4zf#ybgfKqt=xXNPS^wLM&XPmr2=I8AaC!l9_|#yurzoVWd8%pw~zP^)kBm4hZP`X;?T_Y@Lf zUs771N$gEb+ab=D%~E~f_U@P8<0IqV85Qr7Qxs%q9&6;m8^R?YA~e2qhc!BA(~Ng@ zT#?1+=~Hn2No9axJQ^2f`#pa+oOB?A-e1B}r#9gYs>PrVwZgZ26S2&Sy3TQ>*B)LB zQ)vq-x@OifQ`f6e+KK~Ip@zMk%i-~lQsy#8OIq~9{A~@o$(Yck9 zZq0O^LiBZQ@zLVU_&0ki99I;cTm8WdrIP2QvITns@m1K8hw3ZZ*89l2Na>b6lCLXg z5f19Pio`J@;{=`d@GG76ar6K>r%0&tkjjoyjj85(W>KPccrC~D7Fi3&RkAS2mH_k- zD(6B6#(}@eu2=JKWmUTA{AZ`S%6w3se!v((FyU2nRea+O)zHP-SXYS>Xoy zgq$B5X!D(K2iiIa71t3DmfAq>gz?cGzke2d^COQ&q)mjw=A7~if2l{M0ahuwcxrz( ziOQ-aT1N1pEJ5Gm53HCyEf3LN@}oYa_R9mV8bF4Nm0$1?@f`DQ6`+D(0W2dkm2wu5 zR*!MvqL1WwE6%yGW1S{kSDUx0bOvZFO>+`yek|}mqEi88`*+F(_Iwgiu>Jp1R09f) zmv%Nr*6FAQhj?;-zOCknzEQNaD$X!sicRkmnF?oI;yuN?byIoa9L8ZcQ7S>7Qs79^5;5@sId7KwS$UxpRz%(P8nvr z?VPWY7oo>A-o}E9D+t!mg>ycR$#I`NtzIw2_`F~3rBPy;K~$OF>0zccWH`)La5`qg3#lLa&Hsz1My4DKl>V zR{Naz21Sh-FM#iW-!K^YN58mDHHrzfS-OQ-F6}|-$^Ao(<}c?mMGw{4oy%Woj#+}Z z{DS7qw-x{V^7ehYDstKgkqVq@tRSSEThh)arH-Uy#`z=>*)QEaP~j$zTJvGf8pyQ( znsohzJN=x7Lo) z+oEh%B)MdZD(LYYinsgzx;iXoc&z8st^mU z$=Z0xaiO?-m#jpbO#mg(OWH*j9rwC&Douq zR?XuADR(Wy{ek_JQSny`#jyqVaSL zOq=@?i&NcEx1xF$0jq0n9Ia(8ZYTC!u?o)6bV0ScJMb9ghCa_axQ5k2bH#Zzr9G># zO_holTRtnYyyB-pj`=g2b(+W)MO&>!COly*ttVtPa?CQWYne!R$A)De=v&~Pc@pJUR=orkxpd#K8`dk-P{Hm z#c`88qc!J=xBTJ`x&$o}W4Jb3We!U}M8t(QA|sD6s8naF9aFji{bn$#U}01tyizt6 za@LHg(P3l_lzO?eg56vRZUNe0;~a(zR#N>Ql^X>SF4x-U)inKTIDd{g6(+gTFW!8V z4f~uYM-?bHAJnm&?QDZegQ&_UufGvdyg_N8I#b~2>{M&?H_6Zy(6&MUWLmDqT}gST zE#s5yTBghTTQ^xerwm{M^kfM?*+v$Z4mBlH@gDwGrE%d|>B5HCwJVn=?9ZuR84Oj3 zibswhQKcE<2&|zD&4aqw4=U*AG}PkIXD?`C!Y$SX0!nllacwj!f$;cnlhVIvaJ$X6 zZBq7KYw$o5$SGBf-^+lUDwT3|m4JF}T=p_8#vP0>6>Bo)CLEJTs_&c|z$%l{|2TME#?~C zeUPyRLEAl5d<=Zs8a^|tE~JPiuF1avQj>-2GBXoQ>2mxRGM?~@kS@Z@0i(Y2$7tSX zva{Dh^b6nC#~88ynen?U7Gd``Dz~4NT-myGn~LFjiSHtP`6$*Q5ReFz^QWnUmQ8jGH4K<+=neH<4dSm?dUQAg;@agD(FL=mB0#+dZDz9X0mBh z0B@cKAXvpe5Mpr>f>@kjq2b|RA#PCK!9Whwg&fBVg9-Z%1M6Rl6DP!Vjkt<2JeP?J z4mGz#zFKf%AMTu~^LG6|#wM75jZG(qv9nxmndAX{Y6U9zQZM(L3r4M5)%>+(Z&1N^ zXDq)V2Sxb0Dh;BQ{Fw9+fuj%O^V<8>&%(P!ClVHY_raN zgW{=fi!bF%M118LI70NO)BQN!eU6T4Fusz{Z$(^Nq8|^V4iw5vX@ZRSZvrmpe@lRZ zm{GN1zAXlMAR>7Da|7ycDX2O~sef(+SHf!kTY)h$lr~WvkFo8y#o)3a-p7^M1GmY; z_BieF;r&V10|7J0-;Ja_PJe{eER|y-4U##m$@zbz|1I!g^k~786jFpxd*lD-_CL?F zS6R3L{;&G%>Yc!cqBSE*>15BMXE}l%D7o$^6jn5{(;+7?`N%{(%3bh+N}ia%p+naq zda!d0Zc-wZJj+gMyUhxfg4uIUWq;h3oRltgN`LF8{xR=apFPcbSE%UZSrAY+8=2WB zCEI1?HTz9!>P!qV;<0^JMztpAGV=uyNorp-Urf{bL2BCZx98MrsQWyZ0v)2522rW^ zG`FmQ|2*F#x|3v^ziJ24VF+Tp>pTJn$9Yh!>T(_m9eQp#G{Z;{dOwxO6xU@+{_;jp zWKNO?<_+uI!ucKdRd448G<|Md+MJLL9o%JUhbs$9!(m}F}ima=!gmhoVPn>F=Nf1Wj)Q}Jv}%rl!3^FXg= z81UPMp_y2JZ7gaE7xJdR}z|H9=F1STdswJ9-$ZB;Ik`SBBFn>a+( zHlj@cp**WGfp45;rnkN%Gx23pvqeKzuG-dJYJoi%4ND|Cd>r1b2Yih8>4xmLCsy1=+NlI31NeFJoGtT+-(wrE{y@XWo{ zRN|1CI|?j%BAcJ}7nXOj6ohhjzL$b~N9pG-??ZgUupj89Se+AFuqiR|0Mc#SmpzjI zE;e3n!zL>CO{p1h0A0-99>@bYc;;rJ!>+ZMB_zX5ic)E|slQxy2G5HCs)ovLCAk&% z`q2*M{QGx8X1Q`S3f(P51V0;X&cg_jn0Cu?uQ8;7tz5Haw<`{0!2SEr3ry*k3amy` z679J-{SMfjHT5of(&V3R;6CkC!j8=7HC@{_BIVk?%Uh2~#>g>KOV2du{ed0f&y3(~g>C0A1=zX<4HmgCAE9`=E>lzrmejF;wGj(V}N8v@&E_b?i<&RMbp@#+# z-~=)EeOy$pVI)w4e3<)z*WZ?LPjAr%6BMvMi?0k>^cjZe&X;MV2e+xGQ<3zB`IFlJ zdWjqFjIMs)Cf>fzgqxsy2?Hz`(VX@-!Fy0P%f9Ny>Gc5J@M#N}ea99E9aG3N=&&Q2 zN`>3J6N*?ZU)UFTto{=Wnyoviu=p`rEiqdXv5@B~*8Bv@c296aL(31-CqNjw|G<@*@D;d~6F{X-g4sXU@nH5d^pwebu7JiMHL zcwAhur?RdE2pBE_!GtVK%_p>Cp~4(WF*Zw!%#%la6dx6Q7&XqL$OnVpQA9&6nj-ac z&HoR&-U6u2wrLw~p-|kN;!+6iTC8Z0V!_>myA>&J0gAf^cXwLcEm-m54#kRd|GA&% zeP_P!oB3xllk1wvwMow1bLH4QcgJ2&&!`y8khs$`I-12VY|ZO2{&^EALQ)3eJv|Kt zbI*@DYZy)bDF3miUNcsx1FC)Oe_A_BLk%{7d1*3rWI+q(PuLOTg8BmcwmppgHaC;M ze+KvmZ?AbcKWJrP>We$ijnNQ&r-?}=KQ60pU(rBq3NVQB5!53TcJj4EZuPKy-Vb;% zJoIEa*4VfC7W7uj#??*H7h-Yj=y|sSCO#%aCgR3Au?suL%k2GP(gdJED7jb5Y(`!r zD^c4+(eYze7K%Hh`JTKu*muylOH&59fop0`yqqnr?^)fq;M8n;Wp7U>xTI<6e?}sj ztAe-Q*tX!cDf9%{SNja`dVQ~qKv%%tfOE@`NQ)iYc)Bt0+6g{U@>Z>hyQYdos%;Ts zTl$OL=@#>)&%cl=G?uV5Y_9USv;b5!{jd6ekdPg%W?$x>Layl@Qeyw5A{W>(yv&89QtJgzKDj{2xTWSmCTXtHFtcd)@Z5F%H zB!4+m)NtuS1}TSI{n)xPHQuSR7J>pFxLCQt(>q-2h>Y8&w!7~vNGW+V;9@mF9D6li zNLf68Br!jS95fjm_tT-03i`kD=NsQp{!q(mG5-Dr{teEDTdGE@L5u;>?D9~ z7ihU&=1$OS8gtI9?e;vW()%Vy_-@I1`5fq6sZp#ybNl=x@Xs6mf8ON%^9Drz8F1WJ z`5oM@dpB9`wnr|3 z-Bg6i{v!`h5Nh|p^^xqR#tpCStz z>+ZZau+!p$GXasXS4_(Pz0iGT-C@OYhUWIK2M|E3uy3XCUvAk>_vnl7x8AkchWO>1 zzE@t*7cLRO=_uXbEn8mV<6?AT#N`gZYws;1?V?>a%C8u8D%PzZMZWa;lD{N^D}{>_ zB)P0SIUdQ(fV_DhFt*gbuzuORW|Re9r6f-&ajFn={ddbZ4WN&H_JdCD6|9!RQ$4_K zegQqbw_GUP)o5%c_nOmOxlsG5;^W5j7coK+ooqr`Vix0D>pM7qNOhA9PQL<_Om4X( zL;#Ccp!3bKZRM(==4Om|06f=;TJ!db_|HW_b@o4R>X+4&j!320)q16aCf%hE#s9V`P$Y?F9J&T-u+N#O7&=< zFjX`3Z4Qt_mM$BUq|x_ESzblWC|rr5?ez4_*wnWNY|=Rb;cZn|y49g{4>Pb|<^~{` z#^V1;ld7)kBG0C`C|!4hV8qT#m2G|xuc&%vRI>B#%AZ;2SjJc_JH*Hawl^-1XLYZk zbQp;d3Up+JWVL<1q%gqIVbHF5Qk(=v7=*p^9GMdUZ8C_~ZZdqT5r{n(z$FXC*lXo& z&kvJ1E&Dm0JVQn!#p`_XMMmAHr!sDAY{H_({OHyCxui=_UcWM+SHLb}KlC`*q)56^ zPlmsx&A0%BonPSt_JYibP}697;*`T~>^eTm3&q~Fb`;!qp(?lcztvyw2;~4+9et+p z!8(GZj*DMvD#L3@H=V{w_5=Wy_kWA!w}vrNmSRf)}H!f2WhD_(L`Q&la23OR(b- z_9W_=Co>v;S?}N%sT`dqqIsP1z2Xh_n01Y<^5i0Ii^^7@Wor)7#41Y$)5tpF*9j!$ z!PO0JGLwu>$HSc=mY#YvAIqHo+0ayW~17S%I9wHz|Hmi~eW%uL*;JC1LZ_?{h%P^NN42dPrs+A}>uO<#%&*tbOqPYow)GE?7k~<>+dBwVE|mw- z#5Bm3zhx;`A9jt7a%J1%a7MO~2>rV1j|42N>wk?*CPCosT{dBvC*u8p`NVde2xSEt zXuYR+Ed4mJZ>45?iDF=v$nBa<0-E#Po>liBU_ok4)^G4e=!ZK{)GH&RzB^aD6j8 z`#`5)^WBDY6<0%|faK3xTXhHJMY1OTd4NZj8ZZ#7^yN2io;D?$C^e!qlEoHLrJYdn zP|Uq;&|nQNYI5d8O(Um9_Y~wCAnyJ!1j*)iHU}~ekJ}k`+1kg-NG12Pzkh#a?IuqN z*3MbxNMeZuyh zgdJ{lcD9?&nTYKhqiT}EF&$H1tizBg?@we)0TiRQ!+iy9c6>=@pnu9T4u zX*iFE(zqKf^^vXL$3%$bnA@7%bOz2;etN%Ri>7(U9NRauFeunrRl7U8t|NFJ?N{#T!HixgRUFpg^ZaJiHq_?D})#CeFWfCJ}`k&YP zSB6(~01hm1uoC-UaXWU0^LyfWO)E%hn1A^zDOhx(gLK<0(U}KkKNcE#<7JL)Z2ONo zYz>yzU`eYSC4*t7HpDI+?4`CGBErY`V~%XUzq%+-;UUc*&Jrn!ucDM;s`#DxG0FVW z$brwGF_r8cch}u=aCSkh1v&6legg>mpY}2SnPx(1%Qyz+<_ipNLmfkcI%chkZLQD> zL6q;U3ZH$mZq@x*P38Y;W^d>)ucSOH8yT|@7Zcx0PhPs^h2e|+BvCFGS(7gM(nrgV zMf3it3g{Rmm6!6yQ0$e6d{7Fvs+VJ2?_&Uz#o6$N!7>UyMDGl23h;9eCXefbHt2O5E4WY`2tdV7#LI*vNGyiV{CF(CdHRjs9?6&(U6RwF)HhET`c0$Q73E_Bt zjqQHe|M?+Kb{qJUEab^A-Y9=XiY{zX@&AU%{!kP4&z&xt-#q>6a7*^Xy3wg@;ptPJ zTKSLV)5jP{UX#G%R|s~KPJmeM^~W=6>!VtC@ZWUKpuFPt*}rA*cWC=eR9O?1&p8R`2NEzP29sv;%1qtQ-djxnFdw9AA0r3s|fDjEA z4n8$4x1^d25&@5C5FMw4l)7s|y{d+ZSwZhCJ@03?;KVJ$KgiOW<_+#iu*<@tzW-@% zk1USx7;ydfwcpz%+MG}Us-v|ueYV9Vnat`K{n9B+K>iyG@$6t-i&VPr&uzd)$!Gyu zSqy{R5QhQR`AheQ9UU!pC7$z_&hlt#Exx$tYS^WGVe^;O%=8~9eXQvVs_{&no%!@8 z+`5KG4>EGgIR!`Sv+D$8J}KmJlsQde85hxIyA^~)AlGj8=?}@*%*Mm8)7Hya`}BF| z7==f@da#;gxc+BcRAVY&DK>wT^AA`V7nhxcGidn8yx25tK6&mffyNNbyqRkiAYv(J zUv`#4(NNT)>3JNtuC`hs;vwf)cD6v#QFIq3cYzkaK}gjF5ck#$_*3?>r}i{fbeA%G z!4k@5TM^my(5WP-`UT}G35}U+mE=sxb4bMezqH2Umj5qWt#I=H7pX0^r#?6-xeGuv zYD!=7Q?a|qP0%CgvjV#kbqeKuQ~30q?C=gb6~sP(tNwoI|G4bEANtR0*eZZ)IS=A# zy+e_4|NqpABHo}1&TCujl7df{%au=ssmkk@aI*hP>;LcK z{~`N7U-(qiD85dS@p^bVeJ5vCdmdef@@_LAT2&yg_&SoT^v_IFL76y}BJf#sz361~ zNamxTu2#5DZX}Rl=UPF`olsRYUtJ|#k**Pm%V^AvG)uEW^ko?G z))1Ovs*0eDL{rVjqr`$HMuVxafRe_dV4f0-Za&Fi zv!)_ngdeR(kfH!CstXCrQI>J9_2sDbd;A8IQDuW`FXJC`ARfiJSlTkJ*!@==G?@eZ zqoSfcmHbsrhSb5);FUViOgphp=@Dd%f4u`iV7tag`lfnXz<+gn5nXwlGJPBCz5(77 zv~r67ok0R|bk5(RG?(2kUmT!-qm}bxV(Gnq0hiadfgHPWcjYg`;fb5vk;qs*-V|H*Wf$4(eduQD*lLqA&L9gh6$YuSaNq{4IBLV9J#-j}MgPM7 zx{tFv{rtn}%kJi7<$=L4o9m0g66vmYejN!}7!aC%F4pu9&Iv#rhORXO@HE)=h$20Ae=r z+N$S2Z?JiV)qw}NzCSnL&YK5EARaOKu$@uZtdt6_;5N~oY;lC1)Z!lrg**97h!j?K zFPu^FX6z3R?fukK9)j26x|ugvQwc3G3|g-7>Um2UfzMbnE*X>CJ^J-DiXnwgIr4xPS9aImI*SPW!?wZN&JW)NvVS-#_Y?-OGZFWhK zr*4WK=UhEZ@8=h)N^ZYsehBgm)B7A|)wVw9QeAgmQ4-VQ>JKKx&z_>s_ZeyJe`^Ug zi45BJ&vl+(NdNaq@-juEW(Nre!3$BpR}XTB4Qh@>VwS_c?LSKF{u>H6xG)$U)+}e* z%4Xzmf5QKLifBwk=5)wsb=~OGMhJYo$3+n??ec{t@@%l#7KtoZGfDao7sQ-oh-)-l z$yHaij4MY*sc7i34>7i)9Q6B<&TmV!iR|*>k;Wqc5#sTX^MuI0sF*}vM6$xwWR6dg z{_mR|BLasKfwqoO`edy`XHVn~F$#-X2xoWkLl9H@Vo2#i^S;YS2_;!Y$ZS&d6;Mn$ zAaYai3mbA*KtQ-wkp!LK-h{xn9Rm4LpBr4xoD_4oY{ps)iAx%pYswrjS&EZJr>Z8f)7;R=>6*sQ&dRcoyKm!Ff2I`eQUb1i+NbXxK(*9|9ZI7PL;`k4x54 z3v5W?IK zsl%n{kRwgGVU?vntFsx02ZqL4j+4pIJ8TH3Ibc`cAIuByH9(-i4u3Agd80_oCQquilN1reir9et#V_hN-U~$@5+j; zj3Rlh*IL91(*zUPd#6{#ntg^>YR6|=TWu-0oQg^70_sWp=tKZl zA0#)DitoPUw@$s4tX_c&@AHM9abmsA(Te|Wi^znY!aItcna?9D(n!zlhgGyivjDSF zrLx~N8rSg*EpuT-NIej%<;TITKakzszU=cgQSJy`7kQnZP#D9btIGnLybHkfR06UK znQYQjqgX2#MTORLSvxcvX&_s_Qf07J9vg@=$Y~w<2#{;zalM$W7!XABizFk43u`Al48v+^>k#Fja4CLz zO4Ynzz1Y_BckbC`#^IuTZIu(Jl`zou-^0Gj+dJkS?3_4v32r|ScMgRb>$*e-?`7l) zGzZ-mpDGH;71ZP3VktF*G3tu7-~R469_`ucR}$`zmYNGvfT#JY6jKCqGD`R;zMPOQ ze)s|$$Iy(`PM8$g_MCEnt+If^N_CO5v2bcJlI42;S0HNWQw;(v*q3kl8y&x*UEet{UJ{=%?(DN>V;AU< zeXgFQ={Ohs>5_)ml#QmOR0pyah9wx8iO<0&a_)!3-uQoiM4s(9ch=32wCr_3n_C3Y z#`>8#ESLL)HFOefIPiyf@8CCHu~@FUPGM{_>!}lNeaG?Y|3v$oxt8q1pLpz4DtKl4 zXKvf<+6yJ&4tt_kZyqlpinFBHn%o7vhNZO4g=;Jt=gS6{sCCyF^TLrd=ThN9LM5|> ziro8_@%fs=6N-;~l|M<2UkKTZ9_4h7bZd{QJOZLqUMv2+$$VKEGE)vM{vaPc$f)(f z=Bw4ctwY5x5B@N}zs~j?R6G{jpWft9tym$|nhI2)yD}di?nLLe9giiu&=0oDjd_lDm9+@0g)>Z7}+x_>2I#0OL=2AI#|Kl$o7BI*TRpLeM>d2J6LFUi2?r{l*^I(Lj9?p_u{}fqU zpWs}yHB&x*n1B}E0Hi^+h{C+s%I?mNw45jyf9ShWUWS_Lt&%>N6ngoQq>(-Em zLB7sN$y1wNn)>h75zz=whA-3M^Wb7u7Yxrz7Z`l5F5iLvg147+3lyg#&1gx64dW83 zM;Fe;pyFB?iqD(P#Q=nukd%PHrdj+Rt>PG|QtVH78Vh&HwO7*)+H%Y%)+zt^#}Zk}D-%NGeR4r2$IGY;U5_j_ zQzJF-AR*;*GD1E0O!9s>!M4x8S++IRQ>-Ap5ZUK&S}e37PJ8%tjz6A2CTd{rBC-W; z7rh{9OhfZV?~E`UESa?KH5=|s_ff8YUyjbc!bcD>ea}u)OUc$iWiYpt>ymXUwv5ia zHBvqj;{rEpGrIKdW-M}QVkgN~aD#wQwR)9%xNTCmQd2l$1~sL4c-XeZyI)*JF&2N0 z@-GEtn-1vev2Rf8tDHh^d_PQ<-|C`m5educROd{U+~kqWgPwUSiU{mAdKhr#_DYGa zqzsDfGk1u5edWa9O|P<(3905G(jo%XzkN~|8i=wvqnR}5`?(uY!HeWmjEiVJJfGMY z>jYR_cFona|IAjz{;T+)i;PE+7Jl0ixcjryGH{~zY6v0Akohft zANkKRL!HNiB7=R>7}iqJ8@&u8(;8eW5b4sVGIl0<{GT!{IgP`ig5UTuiVHb~wMlyJ z7YTwmXS+Fxcu$6>Lz0SKZeP`A0hU0N3AE)MOfHyiLuoJ3hudfB$s5FV-8DCWa6}pk z9tAVw&KG>?^=8!X_7B#zy3u7YreCFgn!EXg;qkV$ORZSVT?!W7^zDdZ~&QMaNH_?_d zmu&UOl|3nQVtCMpwh$ewq$X9rJ01EZO z6gm#PZ7OM>!6ba)gi?n&>MN4DwR%o03r;4iU9HlU?dU8r z)OIG3KA;cq;Q{#b7j}3Gko26<94zJ>{f~GjW~L!Rp^GDG=9Y^uo3`iz%r-0FUdzR4 zR%st3%I_(Zh0`hW=OSBmbNs2qA(4v?YW#~nvVB$F>HuX|Rg}u`oLT8uf-YFJR)}Uo zTOSA}M`$OQ5ZH&l3^|QjPPbTTyugsw7@6eklAVe0l`Al2C4w2Sv7on+FoRfz{#B$1 z|8*ziL|Birw1_3)ew|ceA!~%%@x82X*^z>AwRzJQ-o7jahZ~BPMUD@>L9lglhs07T zbdQA}5h%*dNk#vz_ekkNm??WxrGjdinB|8Xy~~G(6xMqwO7bE7?UN?UmmNGuRs!MF zTSLjhP<5Y10d{f7uO+w!QLZ~kCg*6dY3kF-wyWVTvJjhDq`WfodO07-brHSI{AEuj zwYN{ELU>M#zD`y(p4+=D4z;pQSI4I0vScU!=;5l~lJZjqwILUgb*>i4Zn@D@aLCb+ z0rM`Pj-Whw^{-hJNbR%|r9s8P4=ar|vxKR$@H83nYfbbqs8?GOm*xvNFxg#oautw9 zzJ=eSIsRI7M$S$*gg30f^IE{FoIJ_rI(Jcejnf{X?W=`}SB3HApEm+mqdE;pVLsW< zRehPChNyfsZ%K!k!HfJKY6%9Of5?y1Y3cS++8@`PljrY}_JP+4twMdWPn-Hyjn}!@ zU~X?{9i;Bk`86?3TS}}TeW7C>V>aN=h_a5I%T$6Lxb)Q!!kzbp)+xJ0ewoi>LWj*e z*VjluUIe0}_L5#G5f)Q_vN2hQ>+59{5|2$WQx{5_oO~C#%Sx#JCWWoS91GW3YjAt) zAF`uVR*ZN3UM)Un~G>WHaa@_RiWjOE%_(CLfyMqlX0p*Nvr zw=mHPpV&(mN7L@#+v4)l14R{e$K@vH!84%bI8vJxQ7_qEkHBw`-Rq)t-1Ngm){iTY zJNZx9#we4Dz|fXYX4C;QDJ;vIZvj`4zss&{3%irz;$6lo%))n&`h!geU9R^5{Bw9> z@WTg8hp*8`7`sJ>Hpnr-LA`??5izl%d=U9!9MQ8c*D>okr$L65EWmlLxX#%mc6pV# zW+$Yl(M^0af9166%)RbJ@b`gh9xS%zrsUMcoS~Ol-SCcqngccOf8OAhRo;Wu$cFeJ z%0Fg#M!-W=pjLcMkvLIWE4h;0rP*s1OR%yNxc>`CRZH!=Ta;twVcO^7c|8q57K$~K z@#%&V1LdK@1kKD?3QKjSShYkAs4u_A5$PbEt_>gX;x;KLbkw~on%#$iM=3|fJaT}9 zjK_GCN>Jlu;1qtaqpRkgEh}63tTHHMmglh%V=eQ7()7Q3BCcw5o?Mn76g^mpp?7Bt z@sJPLbP?s`qsGC-RL>1dQ13gZTjwm7TP1q&q*NxLU6St3Gk;CliRdWfu7Se)BUknJ zMLZAeDCN_j{>0M7pJJRZlE9&!l>d>DPBmzEO};&>#8cj3Sx~txAd#04$oa7B9}+Ld z#h5XS)A~6amdJ#`LG7|*bT(^AfV-fJsJgWm-sG*tLqW=6Nw)~Kew2CMzL&L? z%w7$cx3)H%YVTt$PA;ay;!?v@P+}3uR+ZT11@|*a$?W3;vZkxi=%#gK&ix8kCKY0& z=+l69wO4U*=FnBJLwV@hF8z()ea3gjjEp1@zu0!`Ra6veDY?hv32$dS7m8B$v`#W( zSnE{7d)LCihaah*DF9a*>Zu9WoGn&3i4t2bI99%FhCp*N%#f`P;U7vsX1WDuBW#GC zg9Id5r9Rd?)uJ6>fDKEVL#ew8CawOg(9c*Gov(*GTl`ZQO7`}G1T8BSy#~D(d>O(u zb57dAReutXd6}6m3GQ`y&j62RopuO?EU8lK`TG)8ERL%M;HtJYguVF*D5)4#fEm<|y2zdrWyu;hbwvGB1^~ z;j8AoQ>ZAS6H%;;=l4LSoPXYYg_|A~MYf-b*Cvej9=G)Dk^HJj&B(eE4fiXivoe9{ zJQO(a3M}UhwHVt!;S2}#^W$Z@e8dIXj+|W(EyJC@`A=c)yydehh?7ZBtSvzLXw~9y> zwQ|mJ*zpcnB!wuGUyJ{{3#A+H=+cI_j1mT<>9CA`Sgw^lznOC(n5N`_plXgWhMA}w z#YF*lv@8ZIYjRbMU%{(MO{5x-xH_cRS&V9Lx5Qhm0r3&6e;+uhy0I-Kt;_yne*!i3 zCg0k;8FqBmb?t4~7Uv@SeFG`m5V3&wng|ov-6t1dYla9#nt2s{a6wI1Q?buV8bd0! zY%?4JI0-A5t+%M#;&{1+|2oV_UCvOb$jEs8=*-FBD^skX^p)MdGO_6Is58ad6}$FB zn7b`wWBD5wp2Ct;DvO?);nW_E`%-|*YURBFdSP6Rg!G~Yk5YnEA^y9dS zE2Oj9OjGV04uIvlT0>aL-^H(Shk2z-0lFCONin=YeVH#&aozOC#ih8R{bZGwJ4E8! zsP@l2N6YT(FW`05BU02Dc8aJB596I7VJ>`ZYXdBH*b#6Yks;0HqL2NW)t&cVdI!6W zyy0w*?*d-DCjgG#=)e0Wb`%!K+|4YcJ6~h0tu`1dvpIwdV;gVH<6sb)j~OYr|2cw} zpzbC2_~Tj0jg|4H&P5bnaeeF>IiXOvn{Q~BEpl$*wk~9F2p;rN6qB^}Awij_wKdcd zEx5fQC3bzkNj>V7M! zKRzcbeN;I-PEyYBx@$OpH84?fH|Q&t1R2;Et#k=UUp%eD?XzEFYbHSiKKB+|F@JQ* z@y`qg-By3?6?n~IdLzui8r~PM%*OPBk%1!3gPpp~N#Q(fH&q=MEZHl4*(2Hsv;qq@ z?F~Qol{|j>bEPuh*cPa;jx6hty}HrVx&6-@EY9{Ci05R2O-t>_;3$$Tn_z=G!4amn)>s<_oB0+2Ss??%8UWR07rM+VIGpsccz3v>vIIxy zcNvco!$5E-eeuAuy8N1K@(<~@YoHGbjZz!e4-c16F2H)I2Q*VV8>Nws-QIT!u!~JH zB`AHNpz#zaKcy_>yezS-BIiF5_qCJ_nu&|mRJ2;sL#OZ`D}KZ#apx2BQTGGwT1V{$ z9e-++_7M_D>uv!DC>`Mr^h_7{(Czy zHPN{HEPrNosa7V{n12Hdj-thNC=`U=Yo2?^?FPBEUU1EHcW@Q?-96(*TD@;LzM{=x zC0r`cYq9Si!4EpG^yA^|mqc^1TFAy+qw%4KN2htuMHQ1e?7pqDpA%v##aYGgH2!%r z#w2CH{F+8C&V|n=BC^QP2&lLBk@TYY6X(mlIr(LpVTiA=otX$rYZEhjMFB}ygL~=r z;|fkxI7pLK6Rg>*UBOOj!?|JFqf6@2?*`IjI4G`G6sjVM8S{xm0|pOQK(p!e8ZljN zhfj!%9oU0{dgaBru!-^v1_@&#jGuc0D>}0j3JgOeOs?V*((=+2%j<##q4OJ9hc6kx zBQVV~UmknVn!gR(T5%D$IflI{z)UpZ6CL0a$a8ep-{x=_mI7rP%w$!K$+v(PsLr=u zhX7idB^y9!=}9kf7Mq%wGH$q;M?z*L-P#(yU*J%=HiM>FWSeEu@gB{@mHkRzbLhDH zKzWLbY?QMR?AbJM+zF_h(W3964Bf!$z*KpbrYo|r28pv8A~An$a-!1r4xI=bXe{^W z^|mAlC=t$+=BD2p?90-8E`-G@j9s)R_SFfzxf@C7R9V+n){5&XBbg@iLiqk1&)1f`280MTq{f*hT##i0hx90wM?d7SyEy=k z007J@L@Un(e2FKVLdR|fYQVPqs@GmEjSv5$6nwT>OzTY5=Mu|#yW~zQc^Y=^75^hd znw^nobl@}@g7l;`o2p_gGChzI2Yq3(4PtruWn4XbQ!?vHwR2p#a9AByeD3lMyp^T;i zEadWn{jbj+mfR8Pj5p~Fgoh7d&I?pf_aIwv>m29GXQ0f`4z|Z0GbTSk^Pmzn^Ov{L z+8kQpaNK<(Y$Er264kZR$1!^Un*cLmVJlfnY<@$-qfay3z>d`9t6B zI|Cu>Fa3huUkbeM7KP&#xXV$0l&AL^9sTV4k-YKEA$7i)Vw8}R=vpD!DW>h2*4RLV zv(Ns3ykk>Xrkp2(!PBlMA=_t=O%AWfkl25s7@ZJVF#j~W^6&s_szb=>f5d?w8Ksl0 z*%WOrv8j_GdBd#adYG+bQQd4}&f@}S#nKvYWk%tx{u>UcCQD(i#sh-pI`h3 z*C3~#w%uZ)y}?-&!>aW%rYVzpX1?Ff?QeMGegUe+Y1pr2br*$)NR9SAR%-3&!byGq z#2HZ{^|$e;LzJI29RjyS-m@ zvJANL8pt!8JZKDGbVB)GN0FLoxu(Jk7bz-~W?V4#=!NbtC}Y{>nIVrUSI$ofImL3m zrBb;{4cr+@!wsA2usfzte;MW$?5O0*K8ni(7xYq<$I}7I50G!|{LUdua^rk}*d@(= zmdteh@Lwy!?p$^Ohd`B6lb~QeSJ0%J&5ei+pY&19l|?#16&*v z1NAPvIueC*uAx5e872Nru`Ux%kQ2Li@wx)coH)Oy^VjXog!*4!b&Fw5KvSiOh7=h~ z;ziQO;vCbFUI&1NBZfX`45v5MvzeaUtgfATl;5wIZ-|!9KqpG>`gf>UnCT|6X_0G+ zZ5Q=;TQPq@kZQX@QLVe*?Dw3q z9T}<7Wi7>jn})e#sb(ql!q}dkwot8sU>dCF{5bl&J#owz&`jdpXO#g;F$Xs`;}azr zzZO}<$>Lk~LsVLA@5tYLuJmT2okDm_X99dK6H!p@V{_M>@yL6$2L1CUx*)Uw9-O1| zuc~$#qFn*+8}`mInrx7F#C1ISvB${LwE7D+Ujzo|A-LSZ(Bha*?|@F zRw}lVo_(QS&ooox^eOX+^aUY}FS1TNHE!Pmku)Jp_Qm+Kd$umKY=&@SC_)6Ue zlF1yEOJ)EOOV}{U3A{VPxKLkeREmFFK1-se4|Cb+3T1X}&q-i|WPb>r8y>}W= zoD_`hS{<%DQwE|`z;}-?q3?5`o#Hxs{rwP)BK;|m+jHz^X<`apy*TUFXOnj;f|4V!GGzkH zwH?tg%?qk3V%?(27o&Hs!DC^sjdsM5##*kcT3?ck&SjBf#O`N0vAASwwVMgKECroJ z>B`2)!e| z{_tYpMK|RugAz?K^Kw46)n3i0MDvPLVF%>}&@QF$>1nJkDdMf7=^gc0swFc`D@dsPz89Q^=hN&)2 z$qdM<8Oq-aO2ZxE;gOAS-L6Gf_O}fb#4gbKaBFD4iB-I}sslPMW=T0Nx6ymUtf!0; ztx_bqw8YQxVE{G!!{XYG)jbr(6ZtrRogQV4BZ(v%jw~ylp3Xfj02U-0tcmk1dyK%` zc<=534zxaxaQLCJ!1&J_N=~xOx^1>-;jH+`nevfR zf}N{we&2?<>bTHIp{9WBj?@@c3Wj)xzemsD}^1T3LCA}NU_=*P<{q=cyp ztQ?Us92h4CW@CglPEz%dn~>!c-?PWkJ_?t8&15#ciZ$0sX2H-%W;b{1-8Js>OO>(q z$VvAs-5#ux8!8w>G_%!gF|`GF&?F_;yo&oQ-&;~g!lMQeEMBFnie5?q?T-TcTgD*o z<+$HpgcQR1(5kA)FqG8yn8gai2-3xlU5PUNZKP_c$z)Wta#Q`F3BDj-CHk)QFl(8G z8Wmi3AKv@H;*-ihW8Vb_K~?S_q+HYx$W45s5dU5GPfNXf`8B#yGqy>kxmFpy9)drm zR+v<-OlSn@)tSkK4DOVK=f+T{7}NSv!-6Yp7kM-$_d)GO1(%SUhHLh(3O}#IS2Ju7 zuvVNkKg3f1)_lK1sdO9Oozx<(Q&*raK%SgF&j`+a58pRuA?kRxVP>M#HjNjMW}r=; z%^?1s(SuAW$aAaB<~yCS7LV=E->w|%oMGzw%~R1-BrG3-4M{2xkb#$E1YFsvTHwG? z0l&VZJ$^>8I3CqezoG*u{Y!O;*+!?`Pk<@pn4Q?75bem3a{qVu3 zG78rpn~Q7`8|8LDlZ+!0SH{Q6Y;dTpK+RTi(zx7+@&ip+b>T`i8MJp~xJS%c%|aKM z?Ynw?@SF?o6VPq)#SCdDJ%W+6u25B`0P?SOC1kG41;o1Lx`pa~Ns@b8H(k(cgWrLl?)Mh1R^^k z>=nihW;J-490XxZ4-yG_fDi6K4{P=oFvOZ%M~$3Cp)djx0E3mJ!*TYd4@37;Ep_s@ zNv=tFqv~TLYF!sW3Gw@kY>aK|Xl5{AICJoYtVpKFszCnh_tUN&E5q$D^-e&iOGx2} z7SW*DeAXiJ4fz}uEC+-38Z+*&DW0+B@am+^d6P(A9+9!WfsS{IeELJNA99c=Z6&4J zT((iPyINtI>`7d7M4X`pMAhec=RI%FDV>1mvfigMuzeMa9){{?g?gD40%QcM<;hRI zI#eV)c<~!94qrdsKqmUov7P{%BYm3|so4#-8t{lMRXy>`TSBD*Dev}5(y5Fi>C$uB zLq$XU*~MgKp6GwxctkUgVWD=)ZjsG;N^S-y0)I*+4;3nTy#Hb&(Y*e)35|g^M4mr6 zwAiftfMBzOl?6oyZID~9%LtpDOaquk#9jhyn-Z?T=4{w%q$~vI%>|Hr!(|s z%D8YDe7xI*A%ve~ft^>6iy?4zyxA|II>b=8S7y+%vEPK0T{PO-9+OcWr$Z3xVne*vA=hVIboL_x) zRjSfx)ta5{?wy&P>3$xO);3Z;b9v280E18{itfk5wjGgMb)iWT$ML8$E3supi`kf9c?|_PXJMUnG zOwj^ow{MrYP~8E?GnUekX&rTn0S1UtOP>g7!7t9;C^+4iHWi}SI+A3a(p7YM1mae+ zmuC-`*Q)SoH_|ZxE2(3SRJD(G7t92fm#BX1q`ci%XUPf{wxiwFD}Ob0U3Mlb+K!iF z%CX$9`3ty#lvtCw56*tZOOb`5&4mxqhS1O`<3t-TZw!G&fmLpoDYjFn32F3Ty+JXw z!5;o7vyiT7A1-ts<)=0P8*Y_vr)^DGA6QVv#Ma3_r@4)SUDW$k=CG=g5|e0-khoKn zu(ZPN6uq#^O!H1$&WfRw5K~fbvFYNA0y1B9*{U2?U4SqQj5Gv%+X&K2WLsV4w0$H3 zslT&4jc-DR>a!BPfz}`-=>lGh4FXru98aao70UkRwX$yFk;^>D;W(FCMZWy1H6C`= z(MaWpMq)x_XIzAlSeAu?Ful_+gVl`8lGZ~PFYqZuFIKt0$Frq?Z;t&vmJ&+Mh$FiA zeX=cL^^UCg>iv7shS(mgSgaOx6!oaJxpD?f<9vkrE`sB#d|8y^aNnbwSjqxQS>|)^ z(>!*nKg9t~Q<_y+4WD`mscoM`m}03-L0_@MoZS|R$pZLSD#xZ?Jbm<%Ik#}hbQxq3 zVLjhuKxBt!S6a^wh9b4m6`rh?6#FHfz8(2~3Ak^4msmFgQxhiX&}dXSW~BsEMOi0I zS8FEcu3WS=A-lOHX!61Q zb&CB$a9rv^W_{bZP`uV#LrMgsKDN19MUiXd7R7|BFQCA_(I>+oD)|Rn6yM)nqF{=Y z&>8JVr~#3>$>g-$j@=&C6$}Hhk#{Psffe>){x!Bk@4K@>E-P@y4);2rLegc5{_u4V&GxjhQ4jqb6YV zBOc=hIO8uO#hH>!+49%}&myOH5u>_Xm^h-F9(^uU#SUO}-|tW=9a~KrGn4P4sL?p7 zQW<+OLP>iq%@M58SZozy%DOf~`4d>nw^=9Gs?4 zf32x<+?fS`9R{Oq-_-=HLcAl3Uf0B z8R*LRjumWF+?j<{7%Xtj3I-yP6e^YFJTHRqtvP8svlvORrijQScdHQ;)Nf7bS(Qe-<#naa@YY`}%6SlzzkR0&mHxev%`FjGG&2+tcNtl< zV7x@YH0Us+IEASDgLsQg#0L*OSZh<;5q(Y7iBka9V1JPYjj^WILQ40+KCA#P^ejZQoEJyl<^6bplI#W=;T$qwdi$;yn8rAQcA2AhMHL1 zVaa@95dyjJ$mTjnU4}iJ&;meJk4Y#w~kM#z=d(Y_zKK8K%(icVwdauiykZ7(^LAV|iF4p=w%? z>Qu<*eDV|W=2)9XyscDFl*_ORLX6Z@S;d*E0(s$7NQdsFtrE%>#Zgp{NtD-lXGM<# z&^04?Usyp}u4qa=!j(%eyHgW+p$>^hb0Jb_F@72X4P^@|oWqkIB6~A60x>3vjI}%& zXJ9-TR+tq*9dxDM8GcxW;x~VeAH*js!y{LlbEw9Z6G$wVIc{U($G9b(qQaRxA#@aQ zYD5;8>hUuQ_1{U1he6j~=Zmshxr3i&i&C|(D-o56;sM6s?5;;3^UGsskR(aPk|IYd zQDAULt;X<*0|MfjAj&ng;CGBAHS3HLCKriAOn95H$T*6BOb{;g4lX%kGzs`W1t>Pd zTjGad^r!?94SkWp$|Otno2ZRvlV3>*UIn!tOwYpw)V1b&C%y{#y4<_JE}yA+3aLx+ z;bMr7R4`Y8C{@3&nY=7fBEy1RLF<~Mv8#kw7x6d$`bfmD-N7n*7MZLXuH-|-gouOb z+S|byqYzSn7)jRQ6s=&`^#%nNDBN$Vpx~F^MD=cAvSM)xGfbs@)S z#x_P2^E!^QsPHC^vY6pIj&x3G=N{NW2}D^{ijPrKB~dhXFQIp6z@_3_#E;u0d2%A` zpyqQ^b|nmt3649BT3We@@rd6h(K$#vk8Dd@FaRH*i;uX+u?5Y3@n=#*ucY84@{vRap;aucH!*Zkfs%^Cv{B`sJ7~F{YzkL6H0Z)EBq8_^-N|K%o6EFn!>!g&IqP zRuk{2IOCd~Ms?8HH)U%|zkPUAj?p4LPXkZuq|qJjC&pqcbNPNjcV7uiwSl_5XsAq{ z4;D&8hF~sU1HJkaxjZc=0u-ft$XxySN4|j>1|}OXVuS#<()OI2N1j(`XVckVK!5k! zHv%M-W9*(h+Q(&yl3;8(BiJBG2+AK4Yh#k>l4gtD3t<(NL<6ck1;LhjXuw>PMs%#EAGIeaCSL(Qxl1TzzD!lQQVX`1!wr9lSazk)aj zLAk|WKwz+-*B1Z^5t9J2f`P;Lfc*Mv#`wNj{+%0n{r{v7f8itg3+O%xngSQ(d)rTg zsvBiHLRZ%z>S8MWyb-Y~axLVZD*YNh-YV?I@E72k?R-@W#e{n%OM`a`8>b)5W=N5-qlqy^*Rv z^Ai@yd|HN<39!iVy%8&<=CfvWaS*wF{Uuj>dotX;1vuW1+67>!TRX~H7Q6W|WN8hJ zTR!YC$yOuvIHi6Vy5(t4+c<3p0#p-z3!YE=9k|{W4BY_WFlg(!1KvwoR|EIdBe%Zq zYyWu&)72j74@OD+DbSiN>&p%Zr5KV=hx%Z@!B^^1H=(5|+F}R?sMj0zTj1ynqUOh3 zEWf}yl`D_>;~#RB+Wu1O5+hP{G#c<<{6&M%_ZLtRdZxaI0Xmgi)$m*+ooe}#&wRyO zsm_yZ;bpenCRYzApv3#k_vJIiuPgV;>=pZS*snbI@}EK|=*XGbo5W{=UuW(W#p~B7 zqW()DGT2ZQ25n2lqBtm{*a|Tt#41~ClW#-r0reyLFF=TIY5K9@gBBD~2P!=@d{q4f zDDiDfKQMp5{{|qAgO-@EG>1 zK0g74FUs^U(sBPNNxc&hQmbP4t&VBT z9DKHt4Up81H%`*B?fS|cu49WSt|bHRw?KbpY<47@fmhQz+$UPKi0iweCe`z;Q~A~C zTJPHP2y?4%rR4CQrJemBpTp5lSf z_dZEGcGTJ7FtDc+Un>+a?)G6xj8HX6l7D>^D1VYWa7A^>&?0y6M(CkBXn`(`i@<}p zofH4wlxE|P?M8Tc}3EP$Xi!au0aJe(q4?qR~E)a^J z#Jb86w&W7m2e~7|cc+gD1H%2b6$2lkLgRpJ+`^Brx=(&i!)7rIc9wUMpwDpYg3g0W zKaw9vcne!!p^b9qq3o6Q?`qX1{fpfIw5lDM+yARiL zGM9c>p`)TBR9SS|N6V5q8Wnb5kMz&}0+_N2ngLvmx1R`P=q82B4W0b;p@-KX0^v^v zJ&60QqZ=vtK~9(Ow`y)qKZv>cbHbzdNon)WlI$TgVUab6hF95Neob7U=*2VOUCrA- z`rA|uIMlIFhUlS?GzTw$3V&3J>JL=OCQ&#U!t`$Lf7(y%RGM}`fL}?`gRd@dOsjWp zxTeU1rT0ehHve(*Iav85$EWD_Mc0*o}eW$l|j2c3vk3(~5{n!RjG73eoj8jk9>+m|GmbI44wx?CB ztxY#EBE}XqFAb&h^0~u|#^dpcm5@nAlEsVMX0}{U|3}Gi0%L6QYjap!NV_(dBckTn zpHp0)&Y8Lv2*3O?Yn;Z6?A@&u>8W?ZAfbM%M4l$zV&d$W1mjH)g;vT?z$2d`!vvI#Ky>rqdgBTPaIyJEqr%_UfjaB zgL`L!^#)5AKD$ETNYK_I>`9975R8c<@K3`PzWaUQ2A2n!8n84lMvM)5*h6YW$87L+ z@Xg2j5vN`@O0~oKE~|R%YO?G$&|`t{DCCPoC>H$it-TDgD6q1`)v=VM59H*pg#l?| zR{prcYmYYp%H~7%1X^E>-l3eZc=Awnpvi%B1BV9y4WoDMKUQI=z0osTjPxUF>sbdA zYPvFVS@luIA2HB)MJY3)tln^BKF1*o`?{ehR53Krbh6i>qS6HG1b!Y7QIfm^r3o5? zR&)Pur@J;zvSH+Sp+xmKy#r9G3v*-3sJK|;?!$})x_ZOX9dG<9eNe@i`GMDI;GZ_; za+KFXvCFO)L5CH^RlAZ*!h&rL2Tz)2th{y4vo&c-HP1chMS8UR&Is2xoH0R^uoC6s zHYMKsL)3pF|Iklq&P3jIIEEU8)5z1eZ`^lGtw^{;A{lN_Jo#qc2Xm(qu{JwN1p(H8 z+pZn?l#AK}ctU&sO4;jta}qWlp;#OP-d1?RVrO5u9xFXxRQ{2QrYcaJE4R$UdD1mq z`X`%s-463D3tSGoCO-Q%7R8^)?o@lWZB%Co=fvnBg-iA@1Lz!D?)>6wWK0F1{&Wnl zv0(>_Qfa+4`DA}q_D}t!tiA^)`()VBt7F}E_|DcMXtHk#Ez}{9_7gf@=9m*EYQsE5NLsZGhqit2X?^8 zYyGpRnDVUqWMWcQQ#tc3OFL^|77V>F5FAdL{(NKto(cB}dpSXyYRj0Q3_xNTyP)7+AquM#jR9NyNWgr(2;Un8sAI}wpQWsSz|IEC$Eu5yAWg)UN* z7TTOP10k1}aKn5RVqGdmDZiKTb!=QYG~G7p=}rBBz;QT=QHZU#!Eurv-f9?PCb-g=T{ z(SZ{0qgQr&`xNS7cLZKtB`IM>IqWQ(*}1*V_uvIoY;+1YsQXZVm`3!pb2?Fk_9byv z`Qy!p@;e$2?nkzVhir;c&Uzv9?=H&@=S&Lik{-?xZOu=lTUD+<&r+*Pqq)30)YF;- zf|lO4gO{|}HyZ`gzZ3&aQ;umrx}%1^)>I}kehC^M9(s#j=F`nF;t_}UYg)NWvD`2* zKYJB>p)9V&X+F~qwj6+9ar0>mTYhca=iOV4{6n6lWV1E>)+g;%)b)5bRy5Yw)KKmz z9Iepu;{=mEWQ9H+A5%CFW!pC77e72bdb~AzK<=}Yj)s>Y(zWc66N=G3Z7x2wC7MSW ztrLA`kS=G%DY-dU9895+1huf?8MKVCE?NK6Fz?b1Gn9->2RXJoPLSHYHu4RvOvJy!~4{Z2?umamP2uUmDlZ8!uo zO$fv%orQu?XtS4g;Lc~Zx0}BS>L`Qx>x#|{$D1;vH`=Xy$X=={Sg$zRw}n4|YQ+U- zN8-zxw1O^iW4G51Y0?ZNu(z)qiTkz#JRfyf2kW76bN{yGaVtX*os=+u^2S??7;gv9 z;R$hnGHu@ae)fE_h0$Ek{n8MQX1b-$*q}_K+xEnz(53fVVxA=F$>oR8ppuTLt@1@R zaPZEKjCZvjZiv*hFid*utiBE&vvZn9!nWI zYlGTuC%4AyE~bt)P%O!mKKV{BE!7I=dAv}%D)H2w-WgM5C8D8kJ~tQkHuR696>Yqe z5mdRvybRMEZP0|l1KU{d^th2gSdQ zET(RQ%cSPAgk3l^rBf`N=Ox^U$4}<~hb#H1I=?^o#RDt$jg-4O_lIZhh<|O~w;A)tRJc{gZzD6=QmUpljcaThC0@>oHq~I`=()_Q+QNf?VnR?GRX*&#l!J7N??tV zZmu2T>JM92(-Q8(GS9ry_J=XM;##r(Yv!Ep9xEP@W>7ACC`1-yM_G4WtTqY3S{1$3CT8j>shzz^jc`cm%JyKpnyX5DR9TCNdzf;kshrTzbdRXdY z$+!m$d9g?Q!He{IahNl%nLV1<^U&lOK7zfAc969(Y);h$Q&nkFuhnob6!M_s+4Zot zwd%}ASyE@nN+xGD?F%a;Z?TNwYhUo>dM&Zjv3iunc81(E;3){O5?~6`vamgi8*-dd zaF*+yE*D*>mIHK=05gBmB#6GBurhGl#W`mAdzMicV`gRdlp)pel0{W`MA7(=7lxSvHV!g zzhY1-sq+|iezTbzzsb+-mXr-3!-W{LgVPmjplqr!1s5&Zmd2W)N$4F%r9Ewe#) zip-GX+h-!_Uar^DyGcO)3m9sXFtWrd(EqHwS`4}>qXkYuEs^_<7S5F4%YC0p_c{5Cou8`1~E`q7| z+`@l*&P>JkrohE~&Rw{O^bsZOfSrFd0=qF_XKIW%u6|<2b%e@o_F4lL#Ddi3aVStw z`K^7;j1}*wZX25&w~L`z3+A1Vw{evj$G1|4FLloP50efcYSleGJWh8`8{n2T zLU4Ewnd|ElaBKb&;t0r>O09 zFyG+y!-uVFnB=vi0=(waT`$e^Ij3?ZYKu2WqEU&Ei`~56DZ05u+u0#U*5GH5GS^1| ziG&=4;OQ({Yz&I{B7Iod$rpTf!mn()Cr=Pt)XgLJCF60rc#$qVeSWasKJVIbq}t2o zPd9|^ikN$e4Rp-JOImKc-C_Uk$oNZyaB95Jq>C6mhN#mvyvhB;xD@@q2Xsxp`%ZlN z6GLxgf*8fRZ&nA(dup56gVspwE#&QA^*R|~7FU{Sea@s#5U*7>DS>b6SO8kNmi#O5 zC@!o4ozkjzd(8-kk}rbfN&2Y@P1qhi_KVAWbl#;xY$3j&?RDGDPxa&YXzCvMWoY&( zFW*C`LY1q)d=wUO80=Ut4L=7@LvxK-atfakfm-zPl<;9z69$tcAH$3nEjKBrMOw04 z<3mYiHfC!9`qo3|{fF-tKP)q2#gq?vt1|YN+Ynl5xT;V0r-Nn(=IhpNgMYljyF!H0 z7@Iv?JO*4!zOm;&rCPSxf98?A@x*ZKkQlbwoTNJ%2|U<#f^QyL(dT6j7r8#WAQ##e z(%-+cguR$Z(zo-pozQDHvdZP;*bCSHlFvKCAJI|3MfbQ7x{}#DabU4VdPuD%f)p~&f;OMl$C%@TW8-L+C`kx!czoM-re!YOuQLN=+C@r48uOnr zHIQBKP)>t7l1ex=TMLW8YYAibWqh!4XXns8HY?~J$a77p2|#TKC8THcFtcvD+_*c5;=Z%hgbX;4oU7Fw1?bS3>GK!CAhd-Y16x2`9evH zK=QLUp9(%yzw@KogQN;9s%zQYq!yp17^1U_ar%pPRH zZVf$dL31Pck-aIu7(V9i` z1flg)dWy4gWO>>C&dExld3vJo(<(Sb9yKRPiU&0{K}pU@6lR$xCJE-8RXAo!(`=NE z5m78R@5G$pRKso)Z}>&6u);hE=E?~{oc;&IU^R)=%clZ9Cwdng=lGo-44#OM;N^Uk z(~yEDO#;Z(kyb`&l;m9x4SfNQ;F>n01N&|ER`r|R_y@9&jU6A;ic0F4Y0q8%dLFv5 z{yw4#Rc}X(!bOP5+Tp-Cmgt*(6c5e9!@SpqV(n1=C|eW#V~Y*`JqSKw4RyukQKZ5p z>{$OP#ndojvbxn%uM>7#q7qR7^jJBCw7yAGxKCcxW!K_`1#EUh1<$=TXtwEL#j)EpwREGC3Cwfj~b63$Z2WQT-gX5(v1ikcz#=Do16_4 z0>0zpXYq0a*3s5r9ylmHoq4zW3qyW7EnYFf1XO+TU|Q)9W@J9gSn~uz*@lUuYi9!k zj7)#!z?>&*%(93*ZJs%wPlp`rPB5cuv|Dih2zu~m0Lw<*yXreG@Yk&4yftm(qX~1C zwnkuFFOuZRxs$lnZVj2}+@{d*Y(RgB8$HQ(1fa6pvpsR)U#`s?bjLRA zN$TpP9+b4Hx25lvX?T*ui@0B2*8&4_YRuQ{MH{jK5$Kw;na3pbCphqFw?ISc@^}2f zYO>TJY(F7D1q*#}gW1frLMC5<0R%v^q1P>w`hsD5xFAu&F9l^|j&Yjcw2Xw|3 zC<_Qn^M7sLVe&@~3Qr*15M^~8y*8gEvd^@286jjGdL{XQ z#)(Chec*jz{c4V#{0ZS|G*(^U7>~*pQ+R`s5Vs^0Q5z)7#>?Ay6;w~7PPmx}z79f{ zOGdXAu&VCXAl454RzZ}+>aGqk$vbY@ zk5up}t=k%;OK@>c)V(&;$)`W_h0N59Wowg4jT==oG=&uzThidq7AXa(Ey_D5zIU`C z!2FRP%E}5fb6Dzu2VH%xVYzh_JKsrY?T^^>G|u?&fY1qqrDh5reaZXveQ#cQ`}RF` zWb&0B*|7o38aG_-pPnnReKkcxDa5YAkd20F67vC^tZ~r7b#!WIw*$CZMH6drBN6pA#YN~hR=z68kjI8 z&Su=vwJp9RuUWB86WTnsC5!3Vg1Ch;E}qMLM=}}bf7-Gu%y|L{QO8lFGFmRilu)yT zW-p~At9|_6@y6l~*?C$Ouz|QZM1r__dt{yS;%Q|^AfOq>VEV+CgndJKl}Wv zL%A+!M`EL(2*O1Ais%g^=;D-TIb>{%1hhA)3EMTTJi}>hpN`!ZGJ&R<*GR+v?WXl| zFlwD!_*OswPd3Tb9sR)25?1^+Vo4QqJG=?obcZi$`63*2SYpRMs*{!d+eD&C#lrd_ z3!H1;{A+FByY=t_<2qe4@6J!M*PVvWNz!KbpWG#IJ2((ovLDRl8NdNNVHIrdHh^6DRcP*&H zn`?Fuuv&=ZL$$XCh3BoA{ewA1%IZ;i&z1 zPSpb^e==k25Ma0cNx^55OEcaVLWm>>VtAf>^>TVDPv@6D<3a~Ixc8T`A+1@3KZZ^F z|A{%?dS@Ye;w53S0xkdESkG6#5$5tC{cA>(gIE8uKFln;mp|nh*8&@)IvXraXdIAk zTY<;z0ELYPGApv>%yN7w$B&ZK1&f2P6O81s(WFa`A32}uNv0X4>t?m@?x5a%WM^&6 zyAYLf+sbW~6;mSo{g=%7ePvrNIXM_jOZ@zlXARUr?<_vkbCl-SGcQ9l-N-vGn82wb z)P={%3eD~fe~>NwsYjekeIO>oC}(G#Jx2&C{jMGm{h6S0kk%YFsBFvH-r^5FCh-aB z_IPaK4RL?$Vif;A+o?y|ZiT>A#x3ud+@3xLF27*8F%|8c|HMJz`e@-A=OwVTrzjO+ zykydqJ~&_Ix0S*347Md3N@i)Cmhwm1BY$a34_rV)^9o?q)&a#A5SB z8T|yTUM#RpKUw6kHuhYk0IIvtoCvUPLppD|c}&DXiZV>7n@)@f9aie0}&Mm3%`DOC-Cg-2a5 z`d0dynLtokTXFZ^%w)IjgZvlcq#6G>wmw%SnHHnQOeckSN^Oi(PGwl{t)$~%3EgU&4e!FDa|9vl zk5z%w_=-%QfmIXm#JYQ{lP{-<3O5%&)}iMhz&3fXWNskK=N%dXC5B7VwgWCA1&H zF=b?-ZakT96#J}LSbB-)Dj3r7IhcGFjB3`PMH7)oWJw3S?<3Uh?RNT%MzaT>4)Ed? z7M!G$m}z;+1F(^C*P*EbZkeN~j5Z}0I%jw+GOK|A7u~tG;&)+Yumgs@5)S;o0C$nR zNih^{yH;DVP&=NBu)V;!qZ)6%lUxsPOIx?~K#Yd3FIl{zHs5j!&e%qy&hMj75TS#1 z+obw{E@CD0dZyQEf?)5}1uNPHRHLg&4z>=({Tcy6iu8HrtIY(S3sra#0mvqAVWJiV z;|0K-)mX+;pjCoPv}uvfYd)3(^;LJi3mwvxH4o{>-iN8nvTQa{ZMWKVCn}tFr}>YHl8xE8I6`Bjo}k}XYKX#? zj~V*Avb2=Gv#(Bk;TTgb>B8**>p*bp`L=y%KN4{?$C+wZD-2y6TFIU2HQt=d_(#dy zJw5OAJfroGu%zh$mHI!5P+tsWxU9XV1+eA&vc_YhKfc@z(8#?Bek5whTtio3mPu#} z-@pgX&eD@;n$7g|e@EfMbhx$~@ViA?jh8L?9*ebGqJpiyIpy=s`g8=`J9E)?=k9g9 zLm_{A5IWU~gY}8(6uuji*J|f4fd6fL&kt;)Y95+U!GBzk-cPAlG_&i280u?bA<`E; zh5Vx7$MS)LDdYiXjyoGVti4T5rkDZxnwOTOEn|^XZC)Hxo$zA4s+WD9h@*<5Ts{u= zlItEcBez8jrNtXuBB-MWd@WAL7sB9Qv@&x1W<2wKnh$bXJ1;Yj+KfzJHN)*Bcrd(bfOleDcD%NP^;Ww1*ecGfBR#RJW;s%4cHn9t3-_4{oYl+jhYb;s0V!yq zoABxL-PTJNpO)h_8c@^&g?QRX^i=`l+%4<2#?O|@WisRfl16&->0@e#D5RO1A!y-z zTCMMRo<>k;ieo@o@LShy;9DJ^SWqsz9j6)Rm*K?4>xOGalW3(38K6YEpW2hm`?iqm zK_jbMn zR=JQvP`Zvgx7_iuD!@PMeP?<|uk3$%DLW^w7N2>Q2BmA8k8Fx*VX&hsLcgw+TqS)` zUCiex93U@{E}=~sNu@;1!|-e6cM=M!9|Nr*RNE*OK~VLek#A_VdNL-T&$yx0D3)u` zC?!33i*_n4EN5nM%YvXs(id(K=L8q*B)cr?uXydkKsUT$`dQ=0Hv%UPrIPPE!PE

T-pICPwhPHeQM zu6&4yn;j>3njLwcGb{^G1Dc@Q-sBw}Z?du-{3rny{3o>EJlhG#f9Ha2Gn08yu&=d$ ze zdl)93_M7`O1wuFG;F(!f_W+u(z~UNGUYqxY?!pT&ZeGtP$pc-bRU{LXPA5x5T#X+pG zN?i6u_2h`E69m>}A)n=cLO?C?He}(r?=eXeC*sYL^uVn)p7^5}`g=}oc;AR1EBZ|4R* z6lo?YUpRqO4C__+R-Z9Of2!4XfYe*3D@O?K77;?D$&wQu-4KnXB`5}N!v3e0@rUv+9V073eumik9}UU#u=9;Q|<&$=6Jk_Rp=-Hy_g95H;|kH8@+Cbt|J|W})!Qfi+*uqmyYPZyEZ8vM@Hg7Bv|`XJ_Aw;SinPwrTpqvZ=W)y2c>^Pt{9K_576Nf6~-}LI6qbHOU<$e3@ZnuW!od?`N6Fz3&{5jP^ zi{tR=z(Wx}8dP|)reEBd!znRGJ#AwYIs;<8Ip6^}9j5wO(?5wAYZgV=df=EzvnoMB zlum?A!}Z((Nm`TH#S0ZDh(CA0`P1LsToIkFU>1E#OsmgmvZ{B+A>&M;rxfC#JW~fg zkN)%ms_{hJj0nINVIyQ5x6AR-pMo_yu4X>SeOCrVUW(oa%dbU^(GCcK4zN~#;-90L zLkav^OBpmr2uS}Tg-dnOd75MWOL`=hU6A2FP$;In{ttmF)G zFM^{i+`RS`Ibl(6%pV{CH3Vt|_rlMXRDsNUyX$08x#7^#LA8I}4aZm8Vn$E59+)5Vj#BLMOdcKbM+6FaXGMfGX@0Lnw+_Px|4lG~9M z@IzDXLB;4S?)JJGc}|avl(V67gOuUw3A5k)b`kbp5IEn3F@OQE_;pct&36HFu2J5< zLy$GUSTZ>>Nj}hn2^Y&BY4)PgCX>5n*yD0=#S4|HHC@Ua<8riTj_g+oQMXx7TMWf5 zdtizFQpPr8WW@kYFRSV=Eq}{61sbHUz3Dte1y@v>O-8v`6@Icu;cFPSuC3P`j zMh&x|4;SAQ_`IB$1>QY^|`mcvkJr!cWcycAS}hjLIEK{LsqL zME(H0Ew86L%D(y$Gu4&Xy0JH~`1+~s=|-cYoeTx~8xmN0Jz^Yl;39}7r|QSX-u>d6 zfWwm>VRZxxu$TEdBdpV0lOxE&!dG!4VuoD5!q40@q?0(z#IM8GBCoh@*F{nF{=5O($mgMV{FAmcseyZ$=e3Fe(=4THy90rF}gbrdQ+=Dc+5o^={cnck;&?(MxRoBX_A=HmhUdA(x&lxSMlk0^6D zN4qoUXQM|*DUI)gEH7XV+@88!WYh@>Jf$IuYbPw*9oB29E2e#I7Y|?eVO+jXigeg5 zJW+RR?x~{8`nn+em{Q-am~pgndqf?#1=RkhOC9R7XCOV_i~0+o zz%(uyp5%5(i#?}1{lI5#i!m)Yl2-0XQ6K0krMU`IYrMm?o3SY6E3B(bK z>MbcS)QHMhe2p>Z;jSuofl~#1e@Q43jg36MVBfKwy+T8`QWa}s^X84^ykhCrCCoYE z;-Npv%ICe6kl(+x_Rp(A=e%LzVGphuT+GTl+0rIJfC^@H&l(+Mbu>SNP_79LCRN%{ zC@#%9ik*3lsP|rJFS`vW0kcI1C^ngn@bg@Oj9+tD9D?GV{LoR#-XW=pss(+v4qfn$!_X#l~!UdyHyX`C25=rSE77+4)Jlv z{R&x9!nnMhST7}8d?+Oe8Z$(!Iutsq-L#|1!?)zD)vbdGVT8Vi#JoqU%fWh*AvLh< ze8c2i2F7-xB*iQXt8%GK5{8R)*_PHfhja9b&wW|?Zuv4owc6vlsAWNaCJahM`(QG2 zJ+}-%r~sSC4Wrfz(phrqoBeFlchMZ65}`SuMHWI!!E2a@0KQ9>ooE;;0-Vs`B_uL~ z0v1Ciw8(b8V8^Pm_lG6!So)gI+p3=YtEVL^SdKT1xVdinoS}ELI@VA%C&fCmQMk&S z=8#i$p*Tqo3o_G@pUOkIk&UI?cc?G&T0e2}Z|777YXYmTl3SnkGC2aGFY?F~6j-an2~a9_39^yYZOKTV{em zLh-k1N}pHiYE5un7tzNY30(k0#el6OqA?5jO{RQ+2w2*6WmS3A%gz9q^?7c){uTQ6fgY z_Dm>a{jyw1A?}K7*v3UGRiYm3dVt=z$Olvl&%CTO(EJT*#{NTkw{ZVy~`fZf6f- z6z$nc^0nM0j->AWv4^{xC{r>@sAyJ~5$&&W+Fz4-01#NDX$`%8+ps<~T(c)LysjyB z+&PoKCTcp1ipLdx@^LJG|NV&5386@TV+S+m5C9TVt^xAUAo0Z~W z#F@81mPqJ@ImZ13Jv(NdOH=$Q5-}tYu+gV*J)p#4a!po^n zEaeBhO6fHe!m==@CAHAB6=yKUR5@*ZM-^Z8-kS?KIo|SXrl&QGgnnu~@xZOS zGH*3FZBE$j<35*IZVuj>xj-dv5K+f7Y_dtM%D^=ye16Zg1xC4M2)vP`Zs1<~TvVd6 z;BzwnK(hY;=h&@}gDtf#WuOlqjQB<>o=|MBXm7+aGF+rlRJ3qy(gCg{VAt-&y5Khp z_wIyJ>i~B9BrpCKP|3HL+{ZUF$j(j4%agPbdwnHuYjKF6;r>Mv&IEQ85_W5p26$}t z31uTkBv&+gFC>vgvf^X5kvwt+L{c3#hJ3ML)}d9a8i=Z)t&U}gw(E;F8X#Zn!_b3b zii<3SH;RMXK`Ah4lG|8@o4-X(dOwd*!WTLrV61>!vkX6+c}TzMSW~L~_{nt|s^|Co z*+cFt6>+vKy#eDjNU>|a*$9{0nDDXxQq=K;n#lcmQsxHcb)P$^8aA1eb}ysUdi3X= zS%w1yFFfq7hpZ(}SvJg+?1dfb3^zLkHK~PlZG=JF=1W5YnFAfP-8x^NQCH_7BAg^t zwWOz*Gr!ld#Xb{?v=rzy2ij*}VT?UHV0ehQfuXn{|IIz1dP&riWcHhndHq-hJ=@xV zo8NC!o^FkPyv5wXh!8=x!F>2fvfz?x7aKA!m~*0ndh{)qrfFW5 zwj6~uZsuiV*@0IG(kIq1pO{#X{Y@t&YA)IG-CsbQYCDWj6xN`>`8AFxvpT``V@{)l zg17xYBwlfgKYr%`$L!?ZeenyBcgJ*yCU@K1uu?d<>JWgUH{1VkeG%r345n7?k6pja zy!{32on#SxH4}6hL*TKV(-}b*n22`&r+p*CAEa0vl6%H>uYP+cq0hqlT-Qc~s958v z1n~cy>4xbw*0Ik;{DK(Phl_&=Cl|rf!)D~ZP1y6K!(G*_Kn)DL4xgE zhW-m6_i5uPybFTTLyMpZ0_pJ|5s-%A9+;&-k?8!yT;QO7p-m=75kzoiZ}M!ZZ1fFu z37~coAYn!gy<#48yo%d9i+yTVYGC!SX}9^JMSNo-`FAb5?%iCZr@3T8(EXrV7*j(r zMx(&69S?2{MM=f44MDbY=hJ-lX#SRvV^wPQ>BNZ2RX*+>A;@bo4$>ycH>L=z!3YAR zh~1m>K^PJB=Yv4_dr(C{7dUqB)ZFhNK#Y26?xbT`OfmDkw-|&(s%F|;0DT# zg9isC(g8pOodDzn3DW=eCYk&1$N#G^=pb|K|4ClFlj#2@uU$R*|2p-5J1+h$^#4@9 z1pbfzn>>Z-|4Cl>^ZM(5Yv=!~5)?H1oBuy~fx-VV+)pbC{KWs2k01Tt^8d&rC_Vpg z8JmN)bpL<8J{A5K`NTgA|0D2owa*0jKk`H<#s5V<;e~ejvstS7gZ=+w>@A?`=(YFZ zgS)%CySHfZ;!v!(Te0Hq6u07(;>AnR9vs?Y1rF}sq6c@EenVfo_jm9Au8);9WRmCE z*^^{4b0*2&c?utf(V8z$871-BTl=d{Z0s%n=Ke!`Yngt5OkF66z&~MEjJ?!);d6s`=(TxoKH$2R-$v*V>|KP2vxf>V6pizj%z$EP9*8E581}b$E;`jNA;VA1e5M^!M}f!`Vaz00Fw zc@L%vNWaUW;|EM_$ll9`N&>G;Ku%*6|6?uxR!xqV%@Jn9T5I_?W!Y2#g!?Ft{`2Ma zjC%1Upxq~6D(Mhd2|oFFGyUeM;tx6!Fg0+QH~{w0@@{@8ApGXg*8T!r1??OH;9sD# z<9UD!Y?F=ti-k%%=tyFww7(Bp(;3?x5lM(x{cbMgpwN?KU}3@$7K z8|$~dEdQ*wXsU1|Ld)dmD9fMr9Ij3Phary~zN9wYwHE!(oS`|nsb3&Gpbo9#{$Qk` zVmqXHQL|s5J)o4kiP@bl&p$X22!3Y`A5$`#@=E z1qnO)WNoFpEZw!Hm6wkcYFU7i{64cw_74g_q+X29t_n}2e0EN5!8CM=tRt2_na}g} zX5t;!3QXgoKfy21!)KiASSQyj9^VNXZM^qj|jxz)zlw%er&=l!>!l1ef+R3z5iuWTA`ts6&@#!;_if}6qM0+HE1(ek z*L^HkgLhd{lDO2J+t#@`Xu{vj@Q7Jv)m%F&g91;#c3OrL-ITbhBUy0z6he`5W!dgG zh!^a*Ur}tczC~1OqZaQX0u{LTJs7>LDWNQOqD$#6( zm-#6Ir@%?BN+Nh#C79|`+gSDnL4uwjSHonWk%_>siG6D39n7V}WVaU?N(OBy1h}!p z@24{n6?ZYeGv%yHlO)<31bh;Hjc6D;JTDM23OcWmK{uDIlm%~DdG6YwMC0dp?Sdx} zk$ajnI1%%u-vjvgOTjb5?l(b)Pq>dzrfNp0X<-H(V1gtscKs&p2;&2X!LzW6r}Z1j z=;+c9&h~msz88#n>q#e9^ztuZ#KspH3gL=W!aG{60h^o1YQ$=M( z;EuL8aMrodS)hOWx|K0$*3L3{0pZj&CM7oX3 z{-p}|auWW-+p7o00%VE)-ZE{+#Xe6 zN1f31B*c3p7iPHy;sHl`K5LT0%l!tR0HrH{umWC$4aayGg)iqY(GS56Do+0e5*(vX zeibA$N*xA=NR60h%2)5)=-3!EGhho47C5IucQhXy0l)17xvi7hou4}%N(O=F>Im}+ zeC|v5r045u)1RI5>{^4=2F&~w9~OG&c^K;G00Phe!T50?+UzjF2Nx4b+ye5 zjtQU+Kkn1N6IJw8zP5PvYVX3ao92O|Yp(aaRdAc^k$_p5DNA-)Hvn3Hc_ra7Z3Y#x zYpgs|S|qodu2LRi`$H#=FOJ|{GCbMgM+T#}YR!p!of2S1I^Y+mDm$PB*O8&oFru^i zn*W3`pv2b>Q0kF!18W%dd&Hr4itnVZtFl3DPyC=*AB>TGs0Yk&%Wq)yeb8s}f z{s$u&L*-=(xsYZL;i;kpo(4F@k$%~4SM2?eSo(5uh8CS4PTKVp9?ZY|-r*3;PB4US zvaz<8!gGML?LMII+S9kIXs4>hIf9#d`pw;b8pr`U{3&079x*}%yr96ipft5N1b%1t zL0ZXPX6UDTs?mi=%(wzL+mSvZ{BQ7M_p0c;;mpW4k~0ceUp@aUffC#>@R zr~Y(to!2@*)4f=+5BJ*#C*%AKOa5Q_&~s*74W}s3=gs%&`pq@gDJj(cZ+u?L?7Mj@ z!AJZq8CM~$)qi<&|5phSybo_R6~F-Aq|%E1WX{&`UvvL6p2WNCZLjbs-vEW{=3FNu=wnWJ;`L;${--Es6=|G#AY_lhD%e&+Zg z#aCCe^Zs=5;J;)3O^EZx5`XNlE?(#TN#DPz`v2f2;mpB@&FZ>!{sN`_o1p(UStN0F zSeLKk_aOEEld`H2{oj#~c9C!%?IOX!!Xy5RT_hfC8cAvmZZnsVXKxE?zaFWN{WLxP zZ5IguNnk$&0uQYzEg2M#=%`etfpWq0%poD=G=^FM7VnekA?YH2Z? zb7lx7#{Q)9hbF;}Y@|1484zx42Na`S6$c<`kGUH#^$63*9XqjiZmPEvvVS2K;)CQ` zo-$>L_P1tjb~TIkZ>sV7q(E{VPHnP9=Qc+Ked>pPpCBlCco=bM{OC&i6aSc3b1?E7 z4M@lnz1|uT_cLX#I(rz>P%GH=+@9X`;y&TT^YTfI#V<=Mgtm+D3tlxKa1ZqwR6e}2 z9oFU1&x>z#<;)mIEyTWByvwgKowGR=Z6j5GuhttSyP{6fA)p^c(4fQ;bNOIim|&$i zlbI^c=a!ivXw)_Zj&hSm&*WN+t_Fz=Gj|W3V?7^lOk=7I%4r10A6gj>cM7v{ z?(T#|t7+x1F-NRgO^sCjTFn~HZ*=;lSD$HeZuA<`uHfnPMIugP9Nxu7hIK2=0ZX*I zT@o!zw8|q_nUxy7?G!CO{{khIGkZt-Nw!Pi^fK7UD+ghIZbMjDodTc7O*ZI z-B}wEKTtT!0Uk~c`KbC_o~E+(wYq9uIq`AE1z~vhOSSo?D>mMlk2w%_eu;g-YVz$k z%iZEiKe=ip7Ox?xdu#h~*7UZxsU7t3D)UW1#hssOiWyil%9ypTz z+S|DHcvZ(?%=5$)T)|!6ytw2@i|Pm$J1AapK%w%AF$@@7Qn-a4y0U}2eP52_j9PfE ziW@U48V%2RG&O5om}xCNeKWn4qSA=n!4T?bn{OPl?MBGk?M?;nFY=$_@fIqkjRgoz zdO}B{`%0;zgWYW^hWux-wC9l)ngkA9Z?#J5C}PCr9`8W&{xZSdo%EzB*ew+LhNW^$xV|TFJ8y-NWP# z;yM^z9dgi+uL*j4LESle0k+ay?lgJt@)X!F6(|F5zK~tL{F3o#Gwi_PysggU!(#u% z=~QHd0EYCZ{n{g?gbXRxWx5vK5cO^ImfiA*qMygxeLp#f*o~{7w|$iCsdft3Fq?9@ zmx(yKzE$#DUSGMr?!%?1-hRLiEH^PZHy4i%QafTKrP$ql_(=Tl{UN9#wX8suZU;wp}-c zvCtA#B~U{P$|r7B_de>@2m5$kUJtw_e=nxC@)E`AX(8|nzL|F4`_2~+1M~d+7f8=$ zsqzkRE*UK8s=JV=vk`dcq!ws>$P5!t?h+l$kwv{a)ZyL9eif!neqPsn656+>WSgO)dUTu zl-J;`pQ*){?IZmsp^(L-fPAq+l-3OpPgSNYn|nZFz+HaPvUbb)&+W4~R9)Dh@(4`Kz^;4Q@yRHsuG)YwLfX7=2lle{@l+>b3BM?(*poX# z?(SA!VAbnVh`^A7vsa>xH+c73Me(%NZFSL(S+)S?dE6wZPxdegYsB^!DDoGmdOom& zs`lJk@y0J;C7?Lb_ts?gjIQo;kGMt3jJ4j9$ijn~{1!>Wm7-a3w9iW|latgkTWwUsw>&Y`ju-kd;Wf?n)C-u^PPIfWusvu%{g8{J2Q8Hkrh z-oUNvkX1uzRyb**wk=ZIgJXPhu{N2tY~gfE8Ry!`cf9N@9(gjsZ;rNDZB9#GP>X=byt_MpMwj-I?gDj&xU zKFYW}%@JLL)IsbQNIAqD8k)wUe&vcWMl(K@<`!z_8g>qZZ60dy5%*l7hL!KVF=urf z!+>!r_%kO_v}v6ZdXEcc^|+T+H%raoXA!xB0H04>^9V#uRO+|#MDO>gRK>Et6sFL< zZy2KV)%PTe?Q1J$%X%w1TdmNH@Zl)~JtF^BoY##~hNK4ECA{yzb!Q=o*6QhXV7QCT z$ZgmmuKIh-faY^kfdnkdhrkRLszIW4@yVXt_4dWwW9ao}Vdk5X`y+p9gzu$Np9OKjIh1+Q@=(L~aqTm1$ylp8b6i7ih|Ej?`}I!+0* z;=VO{WshJPK3vd651-#qe%z>!ZN2N1a#?xQ8tbX>$j$9O6`fMwOrn(Y`>Yk`4DpXz zgxvR^@&{u3uu2ECs@N5?1>~R89jGjG3udJk*VL^oeu0;ceMM(h=V*|nO(7pELx9RT zJYHyTM5N)Yl!3 z2Ve4?^bcd^Od*6;<3#$#S&b(nwQ_PrtQo=GLKOb_RrZ*XCub=Fz z#kR&i@TUq%3sU)hAx3jx7(d65Q3m$xE;&21amnn^O?mbv2!u66+yf)(xq1_GLl+P4 zt020#%F4@}xo4X{hqhBssZob1iZ9dXou6E|w$sfUqSC@WTIhdd)VQO_rTz18q;vR1qD#n*{0~eBEcFxG@KTO^ME-j< zFU8M;VA(34uWZy*5@^K3P4coB1#rWr zI=-4?S4J+_s7>g4pW~JPp`VDj<#oDKUeXKmH@j@eFomwhgR04hzawm!nwLgF@yjwHvba(#gme2O7vA7kQoQ=0LZm-~w7 zcG((8o+Vc<3H5BYm2gOSEC~)YgGZ7@Bz%1A#BEmgvh@=1JfPWpXF2^TrDsbyNKbl? zF6Bn;>L?PbR0OATk`QwI!0%-}xcT+xVao-G4TRrmEhY%iIcAkC-gst3v*|-Sj(7O& znHzC*_DJi4+E#H!xoqbT4awNIfdRMFGu9WZG%96Sq)ri@-{^t|6f~y^zYXd zTWK7N-5LbvVy48s+H>-FwX{)N`EQpKPMOrUTZLKDqcD$!?3A3Whl;;MCEQFdR5~11 z?o*>O-oq}W87<`_)sAvWVxzC=ezsW{E{H4|sz0nON5@-I9rDTyb1n907r>`ICYB$r z3lOo(eea(MwWdxT3`Bgq7}Nc#?0Uh}DJWVLV#WM>DTgqaQrFBUh}sxem5^G>ZSYz$ z<%v<)ZeI%zl^4>a_yjbFgHMrLx@jEmb=!?x#<%yPCF}JDK8wbs>&vuhN| zbVg?4D-sEA?&TgYnhyy&rhBZy#I~@RF*&T1xx&+l4$ZC`2-AJaW5Ms$##y^jp_|-M z?a@kAT^U8)y`BzBT~U}L``?b%3*#;(_@zlzM$8F84)_mSMZ~&%CX0UAG9`~~WOw@s z`j#x_N8aHwqS|(y*<~i!&|sxtN98JqEY=O>7pQI#=^d3%z0)<(ZsJF$t?V1B#reG^ z!CQm#?D~7LhZ@rZi;1FmhegQd1~Vq(qD!#~O8cl5XG8kbM`}(e1<`0Qk%oi&JHuKo z4wP35G_8xjK;KCe6fEoXSXe%l82I94(x!Zi^)=KP`+1u)!Ydp&xFYAm+P+2z@BX#Q z(Sg)Pl>)tQ^k{S$fuO{8)yDSgc_FLs)3_T~6tH&awigyiD6^T5S0O)itzs_z}1QZb^h3`ep5|w; z;>7aq#L~?eVn&!j{URcB+*GNn&Ok9co4jd?1Y1-H*?5UBrAZLTs0p|NDerE-A^CI7 z$m4ig8c+t6zr7(G;c0&1HOB8EzictFV`3|6I<^1{>X{&plOlBqm1(C0F%B#u*GUA@&CnY}g5H=nj;ISh z`TNTbn(MixRJPkE*=~E?9ofJ;9Q}2^%jv|$4RjTaFykWrBs-d1eBM`L(MozDvQd1r z&@a$h?8$WBJ_@sc%`HSJf{G_(>Th^-Xu_bH?N<-+U;M$Gr> zq<3LPbe6>)>@qzpqfklGmrb_5y`a*t;WntM1?Pt7Fb%MJ*A=S7`ZV)nZ=3LI_t53I zQH{y#5cx@io34%?Djz8!MgMmZM%eyiWGvz5XnBDrVfPYVVn2=*?zatTJTXn|tj8OU z57|R7<<_837&`_$X5T#HQ_8#-x6cGcsvZn;C(dd2P7m=&nS9Q-maTW}&PTw(P1#n2 zXEw;Y6j9|>GKTX4gg;xW5EN6y!+Q(%-r%-WR+RggL_@4DvIoZAOYt* z#ggpegAPqk%^SRuZ#i7-f{kL`)>$uZ1k-|N&AwG`U~;bWiDncpqMONzjqK>>h%`S$ zjp@g)=!K63PPk6HgeWq0e9EJqy?r6ED69Edi%qglQ!1BNg2b@F`f-tTc*$ z7@C~e&9n9C5hhL}=RLs7ck1d#?00iP7#qtZ6E?Tv9~obu!DVdTMczDUY&VV_Q+)_z z;tAgr=8N{=D&>6Z9vU+?5$?NBd8lK zKkO{uIxRV?@r-ZB8C}u&K|NE2w&aa@pj{J9?w?d-!-pfo`>^d@k#wu`E|-0ksJ18< z6mK2(4!isC30Dp4K5b1G{s`$$87uypY3y*^iK&oX7&?V#`FYQo^1AV&qu$n(BWQC% z{E9K6YK(T_EY8spNH;$@_D0TK%*qV4a+FqbA%k-n6EAvE9LTcUVhzLX) z-ryL60+TP3rJIvbr5p0E@=vUUtXH0 z>6+C(+(LeVN=!doJ~^|-IhhSm>xZwkhjWQBME-WaHFK9~8srC9^XW8cQ)mQcMq6b~ zKdd#}lrDU2)^FV}ptw0EN%h|}ZfMwEB`bnwAHM;!{-_%l6iiP~&&bF~wKixly)0g& zTQxi*Y0?K9Aw!3Os+uHtDb!p6n>n1r-h;S1Z`G+owIhbA+-;yV(xBJe<{Kx61VQ2( zB70r#`eU#xh~rviz7o=sMY;H1eC&%?e6z1-XnH}t>`$Fm>Gu#wP8+oHGEeK)o4;zN zh0P9u`G{1Coo-Z#C1F?TQ~ztuEo{4aP9r$3l62?Mv(xfbga%2)NsPLM`gmk$w*Y0h z`VNdTfhOX45!jjziCGt%9+%W)&BM4}8D5mC#1tXN)ao=$Q}Zx$Db&!hrdA<-%4i^Y zZ~(hIbMb>>vaao6!jG-y?sD5J1Ke2E+n)ReFXPAC5SHkdGSIpBgTR!E_~PySGx-hJ7=eP=W?1l1l+Vy2#=BrmDmCn^(Oy($hWXr* znz_uW9md^@HvzO-`AdFy;{9SzZoFx~Ar{PAvlt-y_lP<8a*r|LM1)8?zU2-nYs`wO za2Na>2J{>{0!jAP;(C;4bM{uoGcj?J#nm3@3(a%e_}!;WnYl90XD1U9q8FdB+~(~` zsstW$sU{o*L}l&!?J7*~ke6P2jL@4zjMO zOLeu-yc0Dg>v12{PDz0G;wLZ6wbUbSF1zw7s+r@YUj5?$8w#OoMffs7X9$h77t* z1xcB$xkx!5s7b{^2s|ohjHJ|E${(b0Q^Yykxg*sx@no!Ei8u%#puXT;*Nwr^%5ld} z0!q>PXvj#y)@)?nAVLx+t2u^?5qJLr3JT0xQ`;~MY-^TogVa{s_%$lDI;B!u_8x2G zRq9O^d~@cJaCc!+*j1F^O^`XWH5{<>u6b0QDRgJ8qS(H-O}=<9S|8OMEt5#O04@TN z*5)gq?WiEK_)EeVlBiV!ovVGGP9hw;D0C!xs!TfU^|;)=mnX%=yg4lC`<$yrC@8Sm zIO66{Xln`F%4XrZs&QU?b|Fb9!TpTijROOLV_m(I4~h91-F=m-)o7=TNQc>7AHDuf z-VYSat%RKFXTId+AZKG2f7KQ(Ql2D?m{0zt0I+scx(Q5J)DHvG8(VoLU-xKdV^0a( zMwze=Lqx5u$ge)yP$-+@7P}kS>HAvoi*6gAoNt|6vne~qnxJe*2oLxjuJHGLzlkt;FY~f#G{-R zR?860S|4Hb@vXTwgM7IPF`W+mTusbrCFuBbQ&*l6uB_;F{7%X9k80>9=1sdk_QNFCB2B0&QBISC{Rt zl&@0BYMtdj9_7Jod;J12pd{iyjq@ErIikRL9kf1p47++{d!CE;Q>$#)w$7c2@fZW#HyI!yC3PChaD z;GJ(2UNcTd1i-2*K5JSXU(`f{$(^pM2?~^Ov5~t~Omi2WA307RMR_?9XtB@$J8C{AkSkf(4_ng?v zNF!C=9{V3T#%nK0R3dz4O??&utGzc^v>N|RT74U(NrDJ-4;foRSJ^cm$YE}XJ{El) z2(-!8-yy==Vj=hG+Mj{sM=lVELuyAUXbQ5KA|8P@05-zgCc+yyWeWBFgS~mc6hRUF zBNqh-;m<-aU_~S(&WeEpCQ+0>+kquL}0jJob)q4rO#%r z!usD~^hAv@F;BiSA#rmV{s^jDrbLueI4G zt1$!rL;blCyJDlb0FAmZ=hM#K7<~@#*ODKz54CR2xEs6OgH$B31{qxP5+t5`wV%#` z7q4^E=J;*x21UE8w%0pu$DT|$69@7;Q_kre+udCwJ!{Zr-=|mk;HXN~O0tIv*hzU= z-|EiuS8~zHa`qF=-BGQy(MJ?xogicQBlB z(`{Dy%*mwya|z@eh17s6oBi>gz`O(F&#@T}SAlMbq30XJHv z@P&@9>;_}Wz@~^l6H6EdO7>424P;GC;xKCp2o^UdwZGz%To#}&1>rc+zFoEW(#=`C zdr$3v#qAW;1b7I$Vdmjh?0=DmGGN*zWqrlnL?i7Tg=xwyhBu1+^aG`4^gS{m#SxpP z)%gdQd@0JLNhDY1E6(5ge6Wv1M;I*|%ueP>(yXVRvQieuYh~achkP3Axo;NEK z+bS>Dwb0z2p~ZHKX^vI0iWrm);CU?jKQS!&xZq;Iy4oDpt)tOh#McfJDHUO{P!xq0 zPh?GNGm3*q>)TvY(8kZ`tnJkljQ?sM9I3*b$j%KG&9Ny|0xM=r8|}$z9)-MS8S{Rwx##(RTwT>*-PyhA(fQ$z8QOL!$=HJQ<`T~M5Q9s;$mF=D8uy84(9 zy*sN4PxG8}gsu(*kNiz695#*e_!nuo@6Jx$0Zmo+P$!`@eSdrA! zY&w}G%eF`uU2#%xdlL#&W0HXko&HDnpV}Kd@crJYwb-+e8P2e z3#vyIUO|grg@TwlQ1f0z6T@ZTp2S$Uf-W^P1LBY-g@J(1uvyZo{EPPa@C@4Vah7k)wyhs4 zH@bjl=Cb-naE7i~yC2^v^#&EXn(MyM@m(%tnOdL?P8uk=_8@TlGT>KP&7%AWn!;vQqM%$9-{KarGBWEVx3%5GLE?E{kMq4e zz~M?9b;rP%8od_9)2!7+N~~qm$phHHp{~nsYqrxnC?tHCY6Z9DT<8Uv`u5)X+f=xH|`E=ODb4=V(BY9p+z}-i8DEm zymsOzhlB=w)2igty`IHDrIT)(BHR1ybt?KH$NdZBCxZ!`o|k>iZIJ-h{X>fv0tFKEH}tHjuKOA$`9j* zmVu_TE8Xo%o@);(y?HkCpn2z#wVF)1`*ZE8S7DriNt0KICSDSu42Z+W&hCxkMjbDj zMX|((sYaB9=;#-pM!DgeTgVpIaJ=v6*zK{b2oL@AgOU+cmTJ!};JF-yEVuZUcd`aj zN*Uk7qk=9Yh!Ux2=$V0!tvU9)Q-i?KI^>I`#HMYEe2+aW+Ix~-^<<>fX>!m-n*X-X_Ig;^nn`9T zLU`|xZ90KGcjPK!ief)6BPjhN)0l^x2Au=%9g3`FZ^&AW`Ur~+U%Y$N>m3CGE6*lR z+2Ks7@0?aEis6kGDnAB{Con(46DS1wF%|qC5<#_@qsAvo@?11iu3L+pp?pMy?I}xl zygxP5>Vkq;4vaR68RDpz$^9Ef_OUZ+1vfjtxt)dv;rK)HqA+Aq=MEkOqDm^73?ikT zYP>g8KA8*F;7y;68L;+(qP@A^+B(nRJey@a>Mx6g!4cQO_w8F;g)P)5OVtXTl47nX zK-@fq({byT9FS%5g~Py=HPY27NFY-=Ooi)X3*EHlkZ&xU65%t=bf2ls(ns@3=MQsy zv69fX1Uw`^`r}w>!RU)DQZJ3j?p|{JNc{z(=jb?a$c3!>_lj>xWVKxeZZ-ez4P)Bf z1jL-%lb6p*$i148)^CEDKP;(~#YJe7YHpAKd z;1$CQg1U{E}HsM_(1nq)@IG3#BF3ClR7AYH{i{2L;Q${qhMo^6Wh}M`01N_S@Q86*CYUK?DhgHxxE=py`Y>3RI*%I|V-J7J$W9#&D$K(LcpDR@gx4<|*kn#4L`m z%FIS|7%J5s#9=p!B2g=X-!CeW`&hQ~cn7wC2WN;>Hyr>Y>~p#SYWXck z+FJA8F&oiQ(8znC%>*@98U+3aMm49rxzBa!W zJXbU_DMlM}6P$z+2s!WR-hF;CP5jfr(SQ!_zE>(w+NaiEl8Rn7rQZQC$h zNIkGv$@N@4@^jbqpBF;TuM)?>&z9_@&jBH0pNM|Z=HZ+mz~>*cx@3aaJP2Zom`IDQBXMzsTHte6UJT$b5(P%>bF}Q230sHL>%H_@@w)P2j3d zqA$L7?fuMix+Ar@NJXe2GN8OK`2~7;UFXdxVjh9me;O{eS>qD^;1TTFcY@=|bc3nZ zou6p7HP^*&;R&SQEY>T5_Ny%7;CoeupA(ve2(?Sw z>SnRixhJ$rZD%kL9~{nokA@b_K;%x z!Ew=#8yR*sJ3HXJl*zc>FlL3r3YYRo7KHL z%qtz_&8gnYZL|A-n-$rw29|c!-6-A5Cq8Gk6%<~zJllYZx z4U<-;X#XB_>-qCqW}c;;PkNbFoBk`IXApgjpdxo@`*osP8+Dc=_PjT+J6+vV{DR7w z&c^1L&%^z;zTDt-FmpCdIz`;uvCK)k=49f~)NKckm*JHO#|}K~@k9OA8uHS1FdJ+Q zPxv?ridf%EW($+zjLoJ$-9~fdr0kF4oNk>nS((?XG@5C>m&JTH1{EI5g7s6gt2Gm^TBavx>O)32K(HzTCbg z@I34~UE89_lj!a%>H3~gb@>bQfuo)UA;pJPMAVN@NLQ^@7?FcrTCnd%M(Z2gi1d5` z@6N7tEwpC)SqWW)k~L&;o8$r~_7I-m-00`m>{^V10t#~F^E6`Geazs51YyL+2u73~ zS6|n&Tg*6(;kMmHJexX4N?}Q+9Ld;hB$tZH%^SrPJwNFg9JlV4?XpGRb zFFIQiEU)){p5CavMb@xvTl*ZL$^8kShNL+v8W@#b+XWhzJabb2-fUfiG zd@G@pCum#`n;ZR+Qj{76cqs8L%3l@a@GLNhqduMTL@jWSPIYUvb(K4_yq@HD zcL7%8=*8@grA%ZLtY@B?Lp_M~xi3W|_Vd3ov~UGgX|RGB)eTJio|BtgHAPu5jg1%U zkuNxnTncSDJ5l3nsIB4}cQ0~3?c#|{8i70%H`Tqq1vc7I=6Tx|R%}88krp#>H9N7+ zAG<*v?JE3=0r_5g7SsSUjK{X=u)M*GQsB!3l`J~O9R%-nilg2OeH_gj6d`@wSt_#%}@Db9A|6)@kJ{Q>I$g_PJ zKI>QnUTA{+XzLBMImbg|xl>*;65q=XH0!AqlJ}}uTNFkgad02w;g#2Ddsh)=U2-f< zoRe&J5ot7sYK@7^ZiNCdC1Mb8y&c%c%)s-sroeAu=5NWMO#AFkHmGG$%2djwI?!Tk zq{QOU%C}Qny+hMv(Rm+Re0Zn)n#&x8&5sZDwiK%#f`{&dW9RT>YcZ?E+VnZ%0Vu*6 zf0-NFq-vEYE!zWLuG9loj*XX3aU#6JVqu1nM0t^wxgy07p$pHFOhZzS5JUTRnSuzO z@55`9FEmLZMqCDsg%#vyn`ax_q)sjY_~5Cm*p;&{T6^ch5vBgNZsAGBrcE9n1(oEh zv5wI&LQNMg^D?evcANXX$?x(GXr!g->M7$s42Pt(J;6pGSS9tNv{|w3lATP`Esm^8 z@2+1Z7HDgHG4|S7DJ$*8A@^LgYs3!q7r|ny*_;d*7B&`#$!hiz&+njx0;}!UJLRLF zq6~;>azm}pHg=YI*fg}0cI?#I?VEmqEwB9tg3b(HoF-kpwL3;|E&jfuN01-TEP{kYzx>FJr}=~ zFUw#Vbut*3vTQd3Srz47u39{dh(ra!RDma-qRYf*MNOhJOy;)6t)kj zvOHLiAy%-2Vm6mm%hBanI)Ce55SkySHeY4ZcvG_aM&&3aNXV}zBF#~T%!y>WiUeBc z53-UrH~_J-=4 zE2dD;jpCIqTe}GiY|d@~7aow{}mS(O6IjOv>+W~as06p<{)9h}rgmXJy zX05@^+WlP0i$A=OcCCltRD3;*tXA(lvEcagZ zq%2XR63VAV@*}NDTfj=Ns-><(y}L~G<(lx|_r3gpfu!|$MMVGhi=Ycex6!-sFW7Ht z2|DVk3J7Hc`Eqe)x1)o>4!!8;22`rW(n@6=A)}eP&y1oDj>u3vTJgph!yCW_zHT#e z=8KXLy?6ub?T`sa`Nb1asiy4EvKORcx@=yzGC z;HJpHHwOv(SMWN|=JExF;RluBk+_{nUAEB;dK2@N%2IHgLcj6Bc#gqqkL26GC|0zR ziUz(Ikd2{h#B-Kd1U*aZ7tv+z_F)=Ct6D2BntsYn$C97v11p3iSmsAvbhT&&*e+y>b(+mwJJpjR_S8gIAO75L zyariHNULb+NFbgHEbi$?!g}19df7}wrJ8zF5YFEWIgiF=)9Svqz6M2-Ix2zESb@ZQL!JP?m| zr&ixV_1;s@hjd0OdoScIDYwb)_N@9>l*I5v^hoq{>$!G7HEiap2-Q-Kvqd%XsG7{U zvFd)=Ai`#)kh7uiDCLP5zm7Dildv^9VrY*dfb$Dr&43jX?b9qC|5(PJSKm8GW#nv* z;7c!ze0ECl++8^4z7k926=eweurOt=MLA}QWb^IIPT5yTT!m6Dqr1Ko786|;zVjWh zjh)XUY$z~8sGdW;)aG{HT@~Ub#0`w?mKzDNQ_ZcZd@a7C8JAX!R*$HbEP-^lV^GbT z44k=a=QzdDjGym6nZZb$XB_YR3Y_!%>gj8>WK=1p-j~FDhaCwOUQ&4y@xiykT=SH` zAm|ls9ZdZXRWPX=NjF^qWo4*+X3Wa`_m3e*cvTT`{Cz6!Kctoq_upNFPz<;33eU=M z$3cj@bR~xcC#V{M>1*CT{h5UNe0MX7m#^fd23)>YDtO*6zPME0x8P(m`Z1Yc+)n~w)Gf3oHje14n58( zjW2CHv_8)4VZ(qP_v9ggU;%4OkAE;YAQ~P?4Qyab9`)mvys;zhXQn@o)qiiw1Hcm4 zfH{c<4^xd7HjF_dspMFnA~TItqt|-9v0q4gF9k8+XmuQj3kEy-- zVul(0;2CPgWV;+9RI0@?RDdJ{r)^s91qDpHiO5J{Cltdz%a=RN-5Kc4;D#3=9UBHS|&62UuM z?S+ppEEuAj+7w?MXf~KeiuYYm?f4C>4ywxAR=MCLmA>1kcc3(P7ORbvL0>tl^5+zh z)v@vKCL>)8+jA2;FXdC7)+%+80j3P*5KRgjdh&j-i)cXpKc>Dqs12^^H#h``;4Z=4 zp}4zy(IUm&DHL}N9^6B5FIK#`yA~*Jr8uSVT%Pyd_xtW2nM`KRoXwowv-{gUD~oxD zX&HKkhuSjEYaOl>HY)wDh~VXf2Z-ha0+?W_@ZzjUd7q{ zr}CxvPWl=9%jXR$-RRXI?a!8Xs)gKliZ4e?5|?kQeu-D*9*9>JzJ2}ySO*zATi(g! z2qS4d%77alHQAVwRMCm5uodSr<8_Bp+Hu9JU)SD9=$sZld|VO^vN}z913^m?zmmVa zc|1WnulsKVPwo_h-gf^r^X2nu;oJZE_J6n5p&AX+Hg~^M?d5*kA^f!P{%DEo)4vNf z{#~d?ZrzDL0do3wZi&2v2Av>)+_7w@;Qa)P8&-P0y?MXS5HC4u5rKG=eaZUqNn<` zzbiYhjyIqADu3P8|MlZUb?4%JwSdd=|IC|9T8XqXGmc?ZsXtkX9owjNn~lU;pon%xHaC$1g-lM z-zU|`aQD_9BBCgO5Q%oHeU(+qidRhv(nJ01$3AK1+%e%`CUaEe(BzA*fLW`Y*7u?` z-~r=sVg|@l{M$56)Y;+NV~e-DgOcrRbH#aJ<^+o4h%Kn)bljj}HP?`NuJDxKPVR({ z02i3ypb3G$hv;VD3Mb|zUy6^k0&FVgwA%`{OB)>|`Uzs#ElW(-i}t$fG$Db*OP7(FN%WJ0gb|O=`r7M_WIIVSg+BB!w3*ur_ z@)amWNa5K$8Pa6Gn?|d+5FzMAX;@ZEB^@6rx~K>sCHS*JwchKKzZ~vuUz_5Qvc8^d zFkD+uVL0P{O29o7p258tF!Nfbm4>MwI_0cm0eKgDzAyd`Tb5@6;cl#Y?4?E!csi5z z9}1sjUYwK9yV7X@B*tVkmG-XZPG(6m${}2Sv$UFAE30c`cDSLmmJ+us-zOtx(w0iI zjam>MkwU!Mto%i9z3ekp;0V!uEPPj<=|N*;rQ9d7@Zi*i0cNHNOqDvR0J=3)ABkq3 zMbrkK{6zot8YD1!Ui}V^R9Xx7-FD_MSp=8A!1X_XdGnH3rFK;xQ@0 zg3DH9eNd-dbV!M(-?KDzH1sA8R(6 zwt9|&ZvG2+a3Ytm?$)hWh9Dxu&KU)m77m+n@@u@j9@Dtsjp8vM0Dy`h^Oy>Uhs%~x z#e&Gz4Xv({pA$*>)nqBrz&KM(DAq-8vX$s5x1s>@GRuWCFuP2dA0bOf&6>zcvql&jfdFR(Xm#&aOI9F z?(e6ix`rBwdMU{wIF=J*UY*Gqfx#YBtEu9LzeQR{mUwvS(-Y-Sg#Lw3^U@ak40K(` z{D`V3wONIgHxW$xvpDS( z@yS636(cv9QXYqNElTt8$il@d-#*{acd;+1w|Xd7lY)*7f4z`-vpES{&K^e9TG_Nw z#xF+y`BwVdJQ>=$Ob~qsBXXN{InKH0k}e@X&Fb>yyv0#eBF${_HQ0aRqkRJ>2icN& z7ck1m?Iqqu6)Uq^M}ulmJ;Z(iD=c#g{jg~5M(-bhsrl_9!&1oenU6P6EbBz(jvpEj z`B5+!t87Fug4S7PcUMKMM{S;II>-p8`9(wPv3_LA_zXDsr-dEHg?TGyA{N#_99~>m zhy<_CI(s}7$UkfkMag@GCaz7N@h%C>ef^sB#ZNo%DfdAK9m<4-z3$5blKY*Q| z;&SV$xLaMS%KqJ$%-=W>fWasMSfPMEO5Fd*htc#9UE>2b%Y+%d)Akw)i|oE~O?3~? zIh<^p+J57BPPEX+F|#=F8)O-G-o6=paip^NaQy88Svu80B~Jo->hrh8LW{wyzi<8{ z#2Igg2!v3{PR*hijH)i{>48J@Ok|BDv6m{D~guspK>{SSDR(wg=PeIra2~oE7D`ME6 z)PDdDss&!UK&fb}p_v%WyL}xL@o1Le@fG|aWkiU?)Tso)<5RV%rQv5WbWJCR#Xf)A z9zXINeb9A>&mWy#&z-m98i}&CCWm$@zB70!sEkn>ZWWNsq#F8cnkU)+p0? z#HO6e!~N;^k=AN_p8xeQ5n);DNkxqk^1urJ`vHIz%=(I+XfgR&K4+Tt{@GvqV#e^K9UblmR>kV22M}*> z$jpjLw?Izqu7mOD4058-!{*^3eU^qDDU}l8UGB;J(e+6Z5%+5zRmS3W#hQcj-{M%B z+v0XmMMBMgM8W}KT}|Lj#jsYWlJ!G=WMUo_Dhh-=>ANvXDJt@w(cU8!LNb<~2*RdK z;rCCc{Ky{2pA-m4r+E!li*^uW>+dMp6G-b8cQ014af<37B1mHO@;O|{$9xPI>h6;% zH;*IADvXonOg#wkPdO|dV1ZB^MlR}xGms(eZ3dA6;q|XtSs?va+{FRv=zOL0T=Ok2 zP^q>tTWC@1ls5S_8>_V|pMBmQr)zr#$L3v9uG_I9hIeT|1$Bq&TEsB-=ky=aPUP|K zy1P(eE-c06Oz*;K{3QyeFGh5YV$+rQd~$1p%{{z!QAZ^Gn2P8kg=5cM1iAm}mnS>9 z0)4wLdKo(*dLh+74gC;6$OuY2p%Sl;^ACpt;H9W|*@F0|QX(Zq5oXlpFLu)}vbt8X zf84yH0}v7EUdpX92Ovz|216;UO#n|%$bsl zpYKDwBu5{=0G*kG)}AKb__3x7F~LPRfXr_{^t%K^uNZ!WSQoH%SB>%Lv5JR8COu62 zeVG!c{3n$ye|3=*#2btRF)!DcHV|AyTD0OQ5)m`%bF> zF0}LEK=r=1v@`~Y!7q{g;KM5LuO`3NBzq@wn2M)=HdOJl=!YlE)n4|nPKSxFg)n-}_Yd8ObxGyAMBRi<95t3w)6TA3t>xk~=_ z7YaKAh~4$RoCrJayAu)5T%BF{rzG9NK7o%W?mC@po!@%PgIoQ#x}`1Qb?9X*U7_o@S!RyCdBT={Zx8O9e<%q$SN-9eo=gv zaO#9Jo(A@E__>D7Rj+lvllK!bqPdosNUDjtX?*BR+9JnUtlIxiyl>L!O&1(n{6LRmTeTU)(O&*jze|i)=8##i|e8 zZYeU-!b;9qFQI<`C!O~oF_tm030rOJwjO5;7atH;vnVg#Pv6=m>vhYPr3Jq!jc5+~ zQ$7UvqhHgZxM=t~eLs_Q+f!{;6mH?R2oFyb3$zUiLxI{FWp7>>u-DQQ<#DyZYtuva zWvNS3dSrTFYt(<2_jcF?eM!|at!}Y))dR2%_pQ1$?;?T>Sh2aq0J`;ypo1Z z5qVp8%U`)qu4>v+=}!;2BDIPxmcNRObyY|TgI|kb56f*W&N5xk&p*HSs>HJd=L$K@ z*G2wC5o6+od%J*=-)g;Ke|PLIV>xNPtBCj9?wa-xkPdt9tF+#kiC3(~dXrR*n8%joe$KM;Wn_g=Q* zrvBCSf4vy>E(3H5fEinu>*MGVCmKU^o_bjKi@ghWBQOuDYf6W|zLWVYnibSUD zX!@Z$JzT~Hq05O`oj@TBPTu_q>AO^7^5~+tL5qH|dI$J}GKqp$-8Fop(i963H#c?@ zQS#+Q(cgwl%LYk)Zx-(0JI#;>7K?0V=!8Vnwiyb!N2!WslI0XCV081Q1za|{s2@=R_&WO3o_N4t_`mxvszF>-ADDZqYn#!LxVAlJ}nH*X6nBs z)3F`#g~PJ*2lZziZpFXKK@5-cAz`L+)N%1}TrGJd1gZ3WA95KEz&l!%QvA3CUBMTi zgU~``3pl1Sbw?dwsq`96BB_ZsIFd)c#O(XIR+n%7E$X#$0C|RBoBD-!`2&t5g*BfN zp4xhv_Bz3c;AyjO1W94(xud3gu;c2&0NQ9Ov3vw-)J6O_gSzhGIzU3Jc|77jI5q;? z{jYgfqfzU>s52>Q1%sk$O2`kGXu!7fb5L=3Dx}BwnZ%gE8m@%ey_ZEVH;WEpRSC`c zH2^tU$N?LF1@3X7my07VGw+iGY74N_{Pn#qSL}iv+i!$hWdo~6nSvEStkJqoQqw6> zmQ2F_r%&?^Emf^T0rwS|Y4_3R(X|X@={#*Dlr45xC0bVdOUEsuBz{EhfY!Izf#v}H z{%`jY4vaMBLf4~X(WjjWt)^S#LsQ#Z_%YK)cC#Q2WG78kN`y%EBMmLZyjTO)@9QwL z$Z~;LVXX=a58nDeO5=q-SfjP}#A#{fjm_c3<}j1{y11gQrMEE$*M&;)j0{V*k8*e$ z5N#998!nsG97BGm`QE`**#g}kXq(WY1RK8SzK6} zC0|qBg^jWS3B@t4q*eHC*@_wwy37q;$tlT6MQZa%Kru#o^#&bMg@|lzK@`c)$ zy{jv_$7e+N-WVbHnb{xInZt+Xy4|a|Gn-nnb!-D0AVv;5T!mG7rvX8^gh!iWZU{~yyraY~%&Q6`>AsHcDOF17Tk0v@JT=95nhXU@*#O~ zI$zxFI&h{ZKdWZ5$pnATKWTHPuN7=ybJVhM#(bJpKKcB=Iz$%-b+?rgHPnbgve18P z?`K#jE0QtCg}3?8ip%2s`vb|Qij`s-d!IkXwiAfO)K5!a+e%;8vr6gNBBlf&wms^l zpvV%?B%9Vqi3DI@4#74xplmY(<)$SJT&m76I**<}|q-9h91)M#e z+!m$PflpBg8X9Ba_c$jP+H;%31y5h(!5r%{o2DuVdTXbL=gv_=F4=;IxTQWo*!56X z^7SA++o>n8rjpaP1-y1hPVPPtwr7SuI&w3LPyLq1kf$?)9EUkPhd&;5XdfK8B+cFm zO}t@7I<;9l*RFSfM9C|DF;zylDgH3?O*E=xa^B;XXBMyg8*VwB@p!o(ntva-B(d1q z0zl-}=@53>-gZV#i!=Ye2blWV&x^0(n6%1|1e_l#m!N8!Bd1GP-k-W825jj~AW|;XD_184 zPG1YY6n66a9HFWGR^$um$&p2qxU6KuZQ%b0pf*~RIgGk7c>?>Tf2eavfJ?fo4_^@< z=Of1NXw@hrLh46^#H1VD<6(pF(e;11=_R~4eOKi#?1$L)&hbFK#XRB7f6TNWYJ>7{ zwDWw&QIcX%cvSij&y(s^ED@#W-IZ@CJWrwWIW*&h&rk+K8Zk1m%tx8qytyd`7Yh&H z&|(10DBTfV$(znJ<-vlPGG*rTMfukSDP8c2Wig zAd&Sxt`?&HXvYgfE+$*%^!h{n^pCJnkV=|G!?iIbmmW{RXo~&ESZ4tMhiwA7U zT=mm&80s#iu$yEV&5gt~kS*mr_^~qhtEI^_<~%!s3OA<+6oxR`W_*$|m2)07OlQh0 z$bkIOv(z||>#mX768@W4=n*p~{J9@uE6Eh%gNUwMXN6gPq;lky6GI}szmP0_G5iK& zhAOAPvnG#L?w~`zCD2DHx&|!74_TGsM8FYF{|i(K-5gl3&DXsG9EGT8&XHuktkYW- zUCoHO8(kZ!cOt1sR|fhMh%Gj#lGEg6QwUl@@C?fUnHM-C6Us(rnFMS-yp_ z9&l)6baG{LUvWf`)H)|px&8rQU!Lnb{~Kc1D<*Iom=pk}p!gsb+#ZD#DEagtH7i#s zzm4+VGd>Y_RKamOplw@2-Dv1r87t-6%78z22&ziXmtDj{<@Z}+T|E7lLFtVgyC<3) z^HKL`$VbMV%<8&_<2S+t@Y@T~4Go9<-jU2xlOq6Wf->Sy;oIpMT%$=&zd;`QnTGGB z=(|roAtT~GHy&spKn4ObP9qfudxpu^&TCl_EzD$&7YebBO3NkL*yZ=9^i~{__Lo+c z$XQI}<(u9buv47Wl?RRneir&5V?P}8=a;rzvoPPT+KmZ3JI%N)*>=qs$#BekEDUz0 z5f-B!GgQe}Is4WZs^9|+TgK@pF;^y6&=Y3XOy*l>i~+)-pV;135Wc&8IocU98(Q1N z5=%i84sVsbv(+azsh%A$mm_P&>CvOANw>vCwg)KDH^z;;20qLj$;$Q#BNykRUVy>Y zu7f=_U)8iIqIHEL{8zX4*@5$M{m<1{*(hVu^vPHMgIGxs&}=N2WR4>6C``WTy^Y~q z;_>J4*FuBq(ZKJizEiHLsTFnGdrwoh$Z%}RmRF^I9y<2HFlE%ZW3bQ5-R0>>QZ-iy zH{<+BdGM#JY^-%D!Sg$&Xk_VmqE8T_xcFDJL^n3IOY1thwdloj87oM4@w|LP@RXG0 z8<={C$Zg@VVkYS9xcH8hMrp@a$_%2Jw%rcaEojJ6P^DBtVp{5=cKrvSA2Ko)(o5E_ zy0A0kFD*hpXG6P=wRe9;0HUU$(xNQKruPUD31SjJJR7bQ{BtsbrnGt4rAr!X&vxse z8&2Yz9M<8?UeUZ*@6mFD6~ucBd*&8jxpnen8jK%$EN(`4 zc)chLWjm#$ISc}imOT}nD2Q72_j=3O>Qv||4j*j1cvasaSN>363?*=>Gmdrr`#~iP z03lw69$vWDjOWC|EbEn@yCqVaAgv@=s&q^H+ANuC`llU{T-_x8*dLWujkOM2M=h5x zB*OpevR%#EM)VDm1dzHcx{AB}V4bSEwl-DG_ZH3oo*>YG3tsQCP!dxqz1Ge%5Ih5l zg7G*>W|Nke;PykZI&vXB_73Tz29|+5v#*G1n3kJhK1bFhJ54^Y-Ei>qxMlq<98b&k zHVK*3u1VH*2wJI4lSgiQdHZC1ib$V^h;Iz^tv|e>Q$E9o9+b8Ij`1L5j)M~iQql0^ zNIT>!%4&-{f=Mn?j*VMDoecuu*S(7Tbf@&HI3#W5J&yN?6gxr*d6MdOn`RpoQq%^P zoP~d3EEA(bQ&yMW@`~D9o);_xT zMkPwPqkHpeZBuL4+8rO_6SBhvp)sz27zhM8$3I-b@K9`q~j50Nh#BXUu{yO1th2S z707kfr`xHh`zHg|*`Pf78I1$}Sg0i!p)nwfVRKz)Hy3U{-KvY*eQ|LgEU88N5t`u5 zGdB&-_g-OM|D7AUFj9{iZ=_K_5kI99=;=g_k@{u2$m3pyacZGGpDXT5wNE8GpEa3W z!GiWOy;Bh!y^EW|byMW82jyZEhea!*Qxj|nqd8LkHtlxjbu%xk$Xr4e{HDTjj zD_@T+!)$k(h%52>-kylt4gjfg-4I7KhiU_n(Ex%$eZ`P%F1(#b02xdgVJItd> z;o>vCDUQr`A?Fe=j3GI$WiMnL5lQbFXY6e=xGE4uJPIeQp*=iJt38Nd%0-8WE}#ij zv)|XR&7;5uB7)W026B|M=OK7#WU0U-o*vfAD;_hEJ?(zV*ua#v0!er~ z44#7n^N!APTw+yzt3jgK9fQ$x-wF!S+2rmACdD*AP;%P;fT^FwBr1*_7B{8)CTfm7 zFC{tfLzaQpP6qXXZoCf6h7zm!0wVM16J_GZYiPco)MId)qS?d{tZEgy;|%e{CLIGC z7In~FOT7y`WwFusyrOkQm%I^@`Vq8f)eaL3&d)2q{uCVV7@pV2PRln@WoX59F%87m4 zyx&^aPgLlteMD8ENL*hFaV;1z7*Fe%Z2B#GP?>}#m!D7SG}qwGq#TJwRo+T3eJ}=d zXeDi~+-|rd%88bH#dQ}zuIMpkhNNZ~>E{`|F@3_j47%M$qLXph#b?937ue12H2J#Sj4%etawL6#j|24GCtT$)Dt%L&AVUAqV;WT(_m{@61u zrBH721;-OtwM_4P@1v)kVY#jE3MsiMYk>1UkQTu_aN5krT>s88`-s;k-Dl_I$c|$z zSdprkvU2HC8S-kcHcWp>?M;6Qh9S#L%zu*kuSG}-ML_UxurP~&IWZA08wNNZ8UD*n zU|6UM1iA%zJQu&EwhGECD(e`YM13k1qx z5xB&EW4%%KC_mF8k0IlZEhN4%NYa- zVWYKARfWtzv_&pqk@*<}tq(T*m46u76T?wLQGnLhRsNixG7zg^Ghvo8gAzW)T9GfL zE9M{Hu3!=qvB7dGEK^On<@?09jZcFc8atM9KW;=w=YJa+8GEWT#}IZadrE#}=h?Ba z`Oqvujt~5F7|Fc?XG7@RD(qtQQwuDhvmj4V1cTO`v(DD*ym5PR%Xn}UsF;DYR~SH% zJ_f`Oo01{H-ctIy#ba;E@R!%M!z$kz@k}`gv6w#NA#Dr z)#;IMvu8_|{PDUpRa8p99=_;Ed>xX`A8^WcY&7H@RU`Jgj8g|B#BAt%9AZwH{|=-j zDs%{Egz7613!oDmn|lLU_aB<#JCj+u)zUGJgpR-`ts=(XEyNVOSqHD%X0xVm;n^)O zD?@~NXTxbl_<4oZ(8EOh3b$5k)>jPmDMf&W3{gKa{+>huz3b6h@qmQxF_$+ zz!RuzC%@+Utv?bwz1&CYo&fCTn20r2(ioZS&dE%cU0)V1$3afo|&Yvh39%-Sq6Cd?bsFQXfT-`-pZHe%)QZK%X8pwDnWZtQdb(x zAMSHN=Dst`<UjCh^901;#Or@ zz1eiE<2uKF^AbZ0nIe0Jg1)?o5YZ%Ouc(ut5WYw@4?}A3HKd0#p2Py8e#YM374S9P z6aNEZ7bipA;I>lv*MfXD0AAbK@h%7}s$RN$Y`E)Ak*PXkr$e|0EtO-OfhlzkDHAn* z#pXzsme%NOZxE)Mi*zuXRl=8#0Y3yE=fhAUfk3YoEi+l-A- zUIXE-;-xrNxDLg{YB+=&eJY&7A zTA>;yus5mZk8G4ec6McEYf2} zihMRTj`O8F)9IjYviuVR9*7#jJ_BaR8yZML8bm|XGXUtb=gTBcWkQ%yrHYPItQi!O znDqQeoM|^>L4SW|Tj`|=R4;zw99q+?L9Euws$(NdgZeBEOqWLWp!SlJ`#w7>%*qrZ zb4V(M@~BlLJs{1^ilqOHpgu_2zzkE+{4rFDNkD@9_RywoKZVw|{SF8pZ0~D#J}b=k z#3G@?BA9jiR zlp_q}HBURvm5Nr4YZZ-<8p@BowEv*`qg_b$?D_$325BKoc15Wy^K z035g(rQ(RAUdvyZH;=GJOmPyLLK7ke?=fMULHf}*)Z|cUI-Pb-#EKmO=w+NUr{w0z z)qS!sttXC}arXraYRIUp(`OXfMkS=StT;~sA24`&f?iTy|{K<>G}W3 z#`dQHuCq@*HLSo)QzHHsEd*3-Z$p?S3ylpgK?WR|mfPivj*4*T#)pO-PjfO29n24L zMH+ga_>nVU7ECF!)}$Ik$+nwn346sny7RO-5uI|188`-p2foc2qLL^vt=HF6X5at( z3k_hjqZUo*lS(l#Y!z)It;VREI67kkU8~^cJlO62HP?_@(xSM&=Q2$P`iRI%#WOiP z`+GV$(wN2dG(VEmoAO0W1?~XrUKTd$u0^D*)gF9n(7eoyO48SQtt+w&=_Adg8wLB6 zFx)CSM?MO<3dw+PAS><&xT)(aUg?BH3-v+=8bMNJ4h8lvzlq0Yy;vkIr{jJZGs6eS zy;^w`#?h?tsf~m?Di$Nl9xrocdj$I&(O-36ZNes&?Wf+8hZ<~5y!VU%&T{gItHNSc z7_Ao$6Q3nal?nSq@!uD(5?1&Czq3t8zvp)dxk=$k<_UM#arEANS{o0qO+W^OjMWEI zmK$FK;dm$8)Ne;QQEAsQ7g`JSf{pSGcqNR!@W5y3L9KHK8iwj}txsp)r!WS{P#P-))ZL%lP{i+6YfMdx3jxSVrSY|r^q}dd15+xNWbrs1h{CLxeLqwA zk|WR&^!4FDG64~;*u1(~#`F|TLtJXEhp?1Ui7R(H1JLa4(QY?v**Alj#nU&4rf*nH zkuAvZlaIU9Ayb!jT}1a4&G>s5(vAPzLV{VNWEVe^rlLD_bt9L0s66YvkJdFv#vOHy z`MLbL0r`Qkoc%2W@}ALq)M0S7BqssC16u^V;x8|R9!sM}P_K3`6hpW(oSD~QJkHJEA2Dkwsz~tKs=$2Cf+VaA0@OV` zD3mUEs_Gy-+=~$T!64=FjzV2Js-dCM`@5j>%VxKjacDMoM_h!UE$R@o)V)$y?qvRB zR*n>qHn4U4NLVv164sC8X>0FMqA0C6fvC9aVEIuhdbMm9e!j~;k;P+3v!8dKomA(_ zqGiYSaeH>qP~Dny)6|jvrWp1u(qu|CAum#04C$1J(xu^LN8C~O_80Vt;hDf81uDf> zLpTKvX>A%8bmHL@DWv4*w8!s^|*7ESe%gLLB^5KL7QW$Z+=DPU+Jx%3m z9JZV|mWeel7lLYymo`QbJoq{kmCULWhKP$l&*z=?le(*+N#ATdqx5h}=a1`Trbk}_ zg^Qsec4ef$o%`iS+B)>{gDKC-?lNUCBzq?1t)<;RU3_W4$fR+6nw4$&&qfCwt7l(CkvMQp;8jP&!RW6_BNlpv1y}>!cN_ zsssW_N5hD%468brT(uEIq4~`A- zk{ws*y)0lq{5;|vI|pnbIh*0d31`7x{4SX{Yumm|PcbF(dmp2#wXgbDaFpCtg7vI% zWsnm`E3J%&HpO&P+UuamyUhnG>WcOegT*+UJpo9Ta$Cs*aH3Uu?e#Kb^TP_ghnc%G z+8G+1&g_NjiKOSR)*kBR)s*2;x<5k?V~62oJ#<7B%ePx+#w7oCi4v|0N8KLgxuGHS z6(3p)fGnWb73V}~U0~QKcv2Gtl0>X-l!U+74E69}aVyiCeum!f9v7?|on`K?)LbC~ z3@4axsRE|r5WqsK4`Y=!?0lZjZ;2~o#nMY3cfUc|Rg~UU9sUFO;x2Qov-I;#Fk1Um zw4-aT?X=?3k^Ar`eD`ZrNv)qG2%-^0DH?0$q=_<&PY6kt&#;Utz&zxg48N?mV=~IA z8osquP`$-?JLb-FS{ioOW`e0wd&D5hJ5j2b-=lDzu_QO^7mefOl!!l-T6IXyA>%21 z{ZRv{NXtbI({Om%d<&N7uanR)kmPNe6v5&vRUk()nQ+mwC=9ov*Bs2mJDvn*!P``u z8|sc&$aeUtA!x=TC=5=Xe-0SREf$}@O+CQSn(o*w#k6FI z%$Ck74Q$Tl^1!UxAq6vBH>@%D8mP55Z=Tp-FB(?dI|-*Lx><(ESPXAd)|vzPeLgB% zz#k7MdWNgWbyS}f$Y@!*#c!-LoaaSsk{cB1lauj~Nk(uq=adpvwR#BZZrC|^$^Z^9 z84fcK+0{r#n%~$T*)M>`ggL20B#*)?vfFIl+$vm2j*9tyO5Dm&jS1%v0nauOTt%Z~ zfeYc81ynf@^;X=zpnWGh9e-(Jff2OD^hQe)gs)LbZHSt4C)v-?B;Wjbz1*(d^B zwajMI8biE$l`CEzlT2kNv&)p?YxM)mj@`Fnm4ac)_KV$@#FjUc5$5y{fa27|V*_cS z(pbGkxu%xF+8g_xA`TyCY|By1z`!AVcCBJ{K0=Cr59ib4i%Bxel6E26rY-Ml@r}A& zD7S{G--ioT)%^>UgIgvzLN!AK zv@1`pMg=D(N6R0quhx&~*Yg&VFu6w~N%n~QzE15#slJ8zN08&h%cV7z27XEVrMhBaMG{CAYC$#?+rWY z_oDJ!%v17g{Y%x`tilO^+~LZz0>ws;^a*_`cBJeQX(2q$^t1IDV*#`m=0lORTbd6= zjNELD+{H-e$b#9uO_7=GUS!3VlRfxz^LXUgZI(EWO>M<)BP<)!6S=}ArJj-kvNKu^ zoGr4H?~$iYdP05KRc7Ko7iRuQJZNKm@F*pCdT(bbB7M=|&zFkPgi4n2Czh!sA}73O zZtl8_DBT8JD)Xhsbz|_E>fyWDZX^GaEznsWES>oS@x&OXjn_?Zi?>3taQ^BLNb;+f*G+wqk^CST}bCTndn;j~Ys*4ozA-mxi3fTHp zK}L=|YPezYODxXWuNHNljcTI!LIP3#b5Q4aE6*KU+NL6 zahU%~27u4*?mhWVA( z6`OBy4oTFLnP|1vUHGCXELb!G%c++q!$EFh+t4G!sIL=GDx9fZ%i-bMUvP&A0WOy2 z><*=-$=d*dDgLE~T^#vJ%k9#+h3E+>SvOQBpqLOd(b(7^|u6GC{a` z{I>#jXFCng!;X`{0_>%=($Ki^{ESE=TrBzisu^YW-$~hDb*Z;Q0Mu**pa?9~4R8JM z&;cQbcDzMCNjeKED7WM!zt-g1i8M1hJ}o=!Ouw-BE5}5k9D2k6{GU`y4(aOhJ8mzj zv@||vT5rvpTKQmonO~ zCghf#K9#l4exI$J=|V7!HN^Gc)B2YzQWeY;;O{%1X5CRZq)>3w<>Y^AGNO4$CTAgdHHjcGIflk^ zU1a6T{cd~l$*`EOpO^7;Zb+i%bE1{W9V z4kI)*T5N)?63U&Bo|7p{?Mvn?LY@mqQZhlAG(~>yO3#A>UFShHjmzoc3OJtfpJFwVrW5>otp-D7t@4d6$tMZ|B`5Ol7 zjqm&j28$0^WN`dxRYZ8bd`0JXZ_{fm&vd@K>KoH(DxiRI1X z*i0a9S$Q%;NgQ6YXxe0JpaOEk&c3l70BlT(cuVVnrJ5ck1V?7gR$SL|Sb*gSO!b1u zMx-1GP@GPb$wE7uAxo~3pTC6JwtJYa z=|l)a(hgzwNN%HgY4zy}xf^4y0GY-5hzrI=5eJn&?JY@uLQ)ePadLnj(;X(TP^U2($XL~QuP+O?c}83IPDH{~a)A6*UaV1C8kkIIPB6Nl zC6T!tG6ng=|Ay*_k^%7g>$RBYlHwj5fD7yaw=fuM^3b?w@~0q)f6@7 z!2a*95AkAO;KNDD##mOIWC9E4S)LjQNAs%VXu(A@{NuAM@O6<`4Tu$F!$K-IgFJ{Ks0~Ls!Z~czZ*6`~U`3JQ z+mg0>_ zToJx4-l2oJgCx|vZ6|JZMOg)fu$E)_u5hvWL^CTbr7~FXv$$C8^x}#Gwf90YCPaQQEQB(ncDC+`^{DABV zl4<2Pc^JiOQ6u#?Xt*X3(eQY zNC2<91!%9byx?2$v60M}V}`u*+PS`xhpTnF65SC|v(zBu`VtaP~#4hYbAjp9CpqIxjQFce_Ar*5lr6g28RKIoO{qCzJWJK#) zC6{TPMDb(T8go#Cft1m;PGh82Q5yP;ps1iqMey+~LMx6J9iGv)sG8OLUW8Bj14q#$ z$yuCNk%PHfmmC5hhQ*OBWz&VyhOA@@@aeWZba+OH?16=1!%kB&BR=Xjmj?~q0;N^D zBSidGJYeBt7~!8{j@!>x^-1x6PrUC~&j|0H$e5Q~b`D0j8jZYH8zrSM!}6{KYs#I! zgo5A^vXHv*Xez!%9ves^DQ877!@S#F$-5E99ZXWaqnB@>76p-hvdaZb6`TE*rvOL`RKnZk2K8~fz@nFII}ml#-WipDvLaS zj2q9fA%tmsIO()G&fh!u0j)ff9yK{Y?s!KDA`NJW39SgVp$BNRVlQC)SB`@llH1t0 zQIH*K@7Q!Nv&m79+KDUyTTN^?Dd=7KvIdwI&TcQ3VfLucUOlTrZjlWwF*Da)|77!L z!-Sm7ny^9|N)MRbqqKd`RbGa?8%0rxn9`Y^i5JWM`y-WHWymiQ^|z6U)ykzD=t-gp z-%PMXsUjN_bR`l2GL)iOcLu>FxRgE#RysA2+3#of$Rp=lcgx8Up#srZ%zQRCmqZ z?qEI`UzAPbb_C@_mjR~jW$vp|*X$w%%a7aHm(je75`_Um!owkXR|+9M6cQUzAF~Wt z#sp*6UC%Dm7qN1@uBV5o8fd0@9$fz-Yez8hc&xl?*5aP&h(TNp_-?f>uu%(R2^i78 zO@vLttL;?PEyke9ruIjpqf&h|g7K2sjJ#v6d9n#b1@ zkptbBY00nygxwXCer+*|lPRN`lt<`0i{}`7-UvcBe`-W7F@_!DCD|m#toP@ zdrR@=B2UX=Cz+OfgKzHM&zP$cs#bi^@lPW7Nhjr8S8X7=5cBN zMukcavR8%9il!@|T-4vF9?$h6?MpWtc8vhv8eW#BR?f81Vf?m|pJ{yPzQ}=~o9jJo zcC^a-L6-rm>AdvA^vitj3&X~NTZ^=mjg|Fq3xvZXb`NgKN}Y_s zWt<6jsE{rG#zdUsWP$!|)Vq|v%_2!$Tnc3$jz?z3_K>G>Vgx;R1H~NfmSkM-E1o1P zZ8MiG7xkcYQe&9(D*JXcQiO>&DOl1_MBfZT>xJr4b?PIN-sj<}kp|kfu)c+f&2#cL zha`Ibhfk8hyqZawaw?^H)=Mgk8N@pzl+wy9sq}z?yHA(gnt-<|Ao5uiTX_92N!GQ_ zUL-^Tc*%3no(@1)f9M~8)I{7`sDEG#{F zKXpZgxHQ+zytrx9(xP41w_bxfH5yKUb&$_*hGis zJk&2!SzTtHx$VFyDTE`e&?1COS0uD^smu&_H6h4Yrr}Z3*s}Oix+Ghhl;o?>PhXKi zMDPb0?qzNWXU$rNUZs@S?$8u5mIe>vh(Rl8_Mb+M7`k8;q4UG_JXA^4GPf!LHrg!p z*{yiX#ReE=r4~sE7iN6Dl11Zv=6?~>d+~8@~fXx^;mjDT~wFxj} zjQ2&Fa$O5%+DqNqhFHmpdobk9Gx*zOLH__EzZYIEI29mYvbCw3jUvpMj`Kk%2TGU< zO#594PdA1fL^&`7tr#kj3-+|{N+%YUPo9p2DzdV)8boEo&8S%t>35N_#e{Q|(leK3=?7nBs7GaY6I&O30%}V(>^QFcxswfLa4rMw^6tc}i*ad@0 z0lZ%O(L2DHCIhp%z{eNosD&K~b}w;Wuh2f65`pBJWr{jSgDQ2S93F%r2@;a5EyR;0 zuk7VXWN?UXVI1l*b4zEr+KIEo5OI*J5!G$&#A8~0=!YmbbD5~bQoK#*kcbHa3&@%6 zOq_lMNnvo~mj^zf!-E3B|Dk@eY*5 zJ`V~0xz)9c0^8*4(W9X{9E8(=LTExpotK?>bl#CT7(|4nJp&Nbb0pArq|zXf^Oz@{ zAOa*JNR*mD0atho)NINti7M{AiL~av>u$UTX(^!|So|?a0GMeH%=s!2vZMv5mU=MM zsD!dlKX9e}AXrSFg}r_*yjoG7N*|2Sq|p2XR;TV=rLvK{9K*LJk80}uC;{gaPIzHW zS{aNTT`MGq{e}fiGg&t~vD%il;Z&^z*sFmThaXKVNT3??cj2hnmU#xP6nEKh^4aT2 zOdtYO(8(flAMa5QZY4^UH;4hghu?z7xG4~#IBQ=3MI;0v*e!67^PQ2x?h)tr=R(M%q-bX}kJg7JHLQ-J> z2K&V9uM?gGYDgh2Onayz-vL*s z&qF6VA7O2B>8Z@G#n*tqlY~(H_NY7^t3tq>^{hC&TEmz_1J{ToHk2K;Y5~X5 z$3sp4gJfVkwFJ0hLD0<*tzuRi1`0weorde7Lf|FR+?zN%&7KOE2JnTt=|mtF`wd2k z2Ja_r_!Av)uVz$2dZ`u54+y3r>{2U4Lj31Xjoap`=a| zBU4&7Zo5@wO>3@l(cXa>E+ZpAAR}aTsnIR$zv`jr(A{_7>5Y|*(#X>uSf;Us0G|8| zQI@1P&p2&bo!t6~Dg^E-grlpa8KV?q!Ie7EkKuodVmBN`OCY7Y6kV;c47cmgH3!Hm zud0DVtE<)F%f!iaDp8)(&+3sm8C8{_sS8gb)#r)}_sf;tYX1O3D)Jn29Uf&T8%oT+ z9WO|v5g11$Z_H(9sfMEHP<#C+6&J}S`U9=%Kg#;2Itwa*APx>I4lfplp4RQ7$h($iY~# zNso1jdKqd|a6t!5p7B|{EjZ~5aAZil7Wp4788~IiZo2XKsbhmz1hW;-TI5pTB-tne z#i5^E(O@0#de|MGbT^|9nvQ@w&FE1phwRq5709L~(>a<_(~8x=wUdDs#(|n~jOZBP zKyhzz?WbyUjc-Ama{Q8bdI70x8`YH257m#s|orAM7@*NM;YxbPZJxW=Io)K8I78v*oQqrL7u z=}}HZJ!&mBhq-E;5wE#%*r!YxLiOMZQg^m{>)M?VcaG;Tlfii6x$!ktD=sQ^Tg`H+ z9!e7z#r^mkju#@iHOZmcxim|d^(fnC(wuQHtlS*XBqWaYB(mE|Rzbk#>%~1yY8V4Q z^4f;EY5TE2-msEaW8jZ5Q!6$k^Qo8ts-H~M!22QKGg?uP22|0H33dy$325q&denLr z!c_vOwNvSeG+}I8O+uJi!$!=Tiu5<}VzyzyNxm)R6w0EdVQatH?3x8Pv zfQ^x+UATmk&mSnQVYF%vc^%FWg>ZuHmeSgqYxbwA>Jr!n8bzpj+eAWZp zeW_REPW#o#?#HD>GEGFiYEnmV3YP_LuS%LvppTW>9bMAzn>6X~RrhgZa&nQ#%U6aM zwhQj>M+g=Gfa38G9q&$fE-Gs+3rHbz^E56i-2VVUK|{9#k>CV)EtmAe*)rURQlA)BP)!j% z4H}s?L&uvHDYNB1MNEmgKT1_dtE%v|2$W96PN}{`=&{|S1PuBXhi~nKt#o4S&w7qy zkRj*~lAK0A?!$|UkQmhm(yUO8BKplk5&93JQQ+o^dzB5_ACRwFqvC5q^bd-_msLI} z`qIDPY0C1In|_I}b_3$QVGo$ERS&YXi=3XtP6hK9t5QMEh(47zTdVuHxTnC!k>+hs zWwrOr6%aZ3oOY;+ao~6b16lw8(wy+G&l9{^U&pIZi!$^JQ1E~U1)>^xDnyjCo*I(LEYHroCworDM;Kd(ABbx+*-WRh5@_i037MC2OXwXz1PY zs5Tc**URZzIAy%*<^K8!2_;RW{{XeZr5fl@GR;kn{{YN|B*PkyQEEhCA=`uCc%c~a zxl-XJst-7dghABj%1uK&)Thlar50TwSF1X7pa1|G6dP1Eg*fm$Cstz|HF!bA1?<@M zeJLBX(P>t$BT!OrNfeV!WW^M-SW%KwO*p3<4-_1f$-=98!eJdQQRi$%{B)^i*G1{i zr8YFv-wr2_F9D7t66__Y>Bc=(3XERKFCe$AM4-ts_Al#4l^PFH{{Z;e%>a}7vGa+k z#BJBnCMspy$Uemyc>u?<)bw3vq+|0H@^bmDQRx2wHU9u}?M0#9ADX~EfltuOl+$X}c)QDNVMw5mbc&t-~~<=+W4=@=gL$9`WeXCJ8fr2a3mh5LTg z#fz!@)?4)&*QI#-4SHDpMFA3XK4y^$T?^z?Vq0HuVt@l5us`=vGy+$ZBAb{y3{cjB zq49*40w zc4J!0g9>%v!-grOSO)^wa_@7>fHD3bHX(uEK9m|zppGw>ftwY~p445fTwyE}gz@Gd z@7kH`mNX3lj7R{FN4;agsGE=AI zH7Mn$wQ}DQ1r3S^gf0Y(IJM4iAH2tZzg>@4?`v8*!BsLJ3S1o2*&2 zpA{`i&n_77YOJ_ETt_}9JRX$egPsEtS*9D5d4sKRZ<;UeQm5^!JdcXB9Bid~P6EO<3mTpunYpA(u;Rn+ivPB;aEl)c3?ii2u` zpr$6<+JA?VHqiGU_r=1Dv7qJlDI@?XDI$2M5~@m%BSA2^oQb!9$AeX6!Sdoc@hz$1 z=A6@x4r?`(C>OY?3?~SsAr>6K9`pVl!X=&P+4!QY0@iBPn7W9I6e!T7neZiH#95ot z9xQk@R$L!0BcB9u9B^|^Y0Ws`=Y%yBjESIIqLZ;H#8t8h_>8~d=ft5@Bao?5rCX>_ zq@_4r#8Tp=Q1FKwGNh!EnuMgDCZmE&=`N70cVPbj5oR#3 zdNiq0skAhcLx7Z%!SMmjWyEt>R<8u+ocKo^a^TWy!OsY3BZMmfH>b3Ua{mATdcWc! zzz~k5DN@|tDR2jX;t5GK+knvGIpD*A&0$)+7F6epif>N`JWgrh6X6#~q_KP+{{Z4_ z51yjlqb!(oP0k=wG@L=AXwbE*Rb|9;!G(OO&0$)+7F4;z9Pv81m~c6)D_4TboN+kel{oO6a7u@2 zii?+KXWV_L&AbM@-PeEdX3|VLj^#chC5eu-P@%G=;FC$lwkYpeC_ET&Ijk#Jg3G|t zP}hTR3Mr_YZ%SBhA6I9s049dgPYG`MoQA2?y^u-M;G*Q0`B?fo` zI70P>YVcWj5}Ha%Nu;Exr-n{Tmg~Azsw;(iljZdP033%iqp8uU4u05u(=d#J8aNVC ztyOmjX0BR=1BI0WD<=rNGVnOzl{oN>K51y`;O|d}^EJLs-gRLL!w2~P03bC>o#_ev zZRv>SaVMQhnmpV=%+^(f4Jg*SHLHx1LLUQ;8B>l8L8t!!+jH8XN#EX_zLb9(fFI;& z5D4hhqj9Q_OjNRV%rzi@+(XQyj4uNvs3{b>(yeucYpi@@jsV=S32F^z%X;6^v30*8 z^-!Z9_1t#<04nnbMETURhi}%dQt9%Qp${kCvzj#@3YH@GMHI?9lnpA?#*(E+PEC3T z{T1eRk9$xE+}iXg@b3ckD^W?!QS>0FmiZpf^FLgm(sS3os>_brTRuv^ \ No newline at end of file +215 diff --git a/website/index.html b/website/index.html index 859a3b5d234..8770c8a97a9 100644 --- a/website/index.html +++ b/website/index.html @@ -1,7 +1,5 @@ {% extends "templates/base.html" %} -{% set extrajs = ["js/index.js"] %} - {% block content %} {% include "templates/index/nav.html" %} diff --git a/website/js/base.js b/website/js/base.js index 1722d2ebc2f..17f4c98b39b 100644 --- a/website/js/base.js +++ b/website/js/base.js @@ -58,3 +58,5 @@ $(document).ready(function () { } else { f(); } })(document, window, "yandex_metrika_callbacks2"); }); + +{% include "js/index.js" %} diff --git a/website/templates/footer.html b/website/templates/footer.html index 7cca9a48d90..d69f866b4dd 100644 --- a/website/templates/footer.html +++ b/website/templates/footer.html @@ -3,8 +3,8 @@

- ClickHouse source code is published under Apache 2.0 License. Software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + rel="external nofollow" target="_blank" class="text-reset"> + ClickHouse source code is published under the Apache 2.0 License. Software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
diff --git a/website/templates/index/community.html b/website/templates/index/community.html index ede5f84e883..ae933331700 100644 --- a/website/templates/index/community.html +++ b/website/templates/index/community.html @@ -4,8 +4,8 @@

ClickHouse Community

-
-
+
+
-
+
diff --git a/website/templates/index/efficient.html b/website/templates/index/efficient.html index da21ce4e03c..275e0bf1c00 100644 --- a/website/templates/index/efficient.html +++ b/website/templates/index/efficient.html @@ -4,11 +4,12 @@

Hardware efficient

ClickHouse processes typical analytical queries two to three orders of magnitude faster than traditional - row-oriented systems with the same available I/O throughput and CPU capacity. The system's columnar storage format allows fitting - more hot data in RAM, which leads to a shorter typical response times.

+ row-oriented systems with the same available I/O throughput and CPU capacity. Columnar storage format allows fitting + more hot data in RAM, which leads to shorter typical response times.

+

Total cost of ownership could be further lowered by using commodity hardware with rotating disk drives instead of enterprise grade NVMe or SSD without significant sacrifices in latency for most kinds of queries.

-
- ClickHouse is hardware efficient +
+ ClickHouse is hardware efficient
@@ -20,7 +21,7 @@

Optimizes disk drive access

-

ClickHouse minimizes the number of seeks for range queries, which increases efficiency of using rotational +

ClickHouse minimizes the number of seeks for range queries, which increases the efficiency of using rotational disk drives, as it maintains locality of reference for continually stored data.

diff --git a/website/templates/index/features.html b/website/templates/index/features.html index c7ac56c8b09..30d682843b0 100644 --- a/website/templates/index/features.html +++ b/website/templates/index/features.html @@ -11,8 +11,8 @@
  • True column-oriented storage
  • Vectorized query execution
  • Parallel and distributed query execution
  • -
  • Real time query processing
  • -
  • Real time data ingestion
  • +
  • Real-time query processing
  • +
  • Real-time data ingestion
  • On-disk locality of reference
  • Data compression
  • @@ -40,11 +40,11 @@
    -
    -
    +
    + - diff --git a/website/templates/index/hero.html b/website/templates/index/hero.html index 35eba0c4ce8..5166d303ea8 100644 --- a/website/templates/index/hero.html +++ b/website/templates/index/hero.html @@ -1,21 +1,21 @@
    -
    +

    ClickHouse is a fast open-source database management system

    It is column-oriented and allows to generate analytical reports using SQL queries in real-time.

    @@ -40,7 +40,7 @@
    -
    +
    ClickHouse is capable of generating analytical data reports in real time
    diff --git a/website/templates/index/nav.html b/website/templates/index/nav.html index fa8fe780654..6d851aff2dd 100644 --- a/website/templates/index/nav.html +++ b/website/templates/index/nav.html @@ -4,8 +4,8 @@ ClickHouse -
    diff --git a/website/templates/index/reliable.html b/website/templates/index/reliable.html index 0dad88e4021..c7a7146050d 100644 --- a/website/templates/index/reliable.html +++ b/website/templates/index/reliable.html @@ -6,13 +6,13 @@

    ClickHouse has been managing petabytes of data serving a number of highload mass audience services of Yandex, Russia's - leading search provider and one of largest European IT companies. + leading search provider and one of the largest European IT companies. Since 2012, ClickHouse has been providing robust database management for the company's web analytics service, comparison e-commerce platform, public email service, online advertising platform, business intelligence tools and infrastructure monitoring.

    -

    ClickHouse can be configured as purely distributed system located on independent nodes, +

    ClickHouse can be configured as a purely distributed system located on independent nodes, without any single points of failure.

    Software and hardware failures or misconfigurations do not result in loss of data. Instead of deleting "broken" @@ -22,6 +22,6 @@

    ClickHouse offers flexible limits on query complexity and resource usage, which can be fine-tuned with settings. It is possible to simultaneously serve both a number of high priority low-latency requests and some - long-running queries with background priority.

    + long-running queries with a background priority.

    diff --git a/website/templates/index/rich.html b/website/templates/index/rich.html index a7091831018..14b2f86e75f 100644 --- a/website/templates/index/rich.html +++ b/website/templates/index/rich.html @@ -3,14 +3,14 @@
    -

    Feature rich

    +

    Feature-rich

    -
    +
    1

    User-friendly SQL dialect

    -

    ClickHouse features a user-friendly SQL query dialect with a number of built-in analytics capabilities. In additional to common functions that could by found in most DBMS, ClickHouse comes with a lot of domain-specific functions and features out of the box.

    +

    ClickHouse features a user-friendly SQL query dialect with a number of built-in analytics capabilities. In addition to common functions that could be found in most DBMS, ClickHouse comes with a lot of domain-specific functions and features out of the box.

    2
    @@ -22,13 +22,13 @@
    3

    Join distributed or co-located data

    -

    ClickHouse provides various options for joining tables. Joins could be either cluster local, they can also access data stored in external systems. There's also a external dictionaries support that provide alternative more simple syntax for accessing data from outside source.

    +

    ClickHouse provides various options for joining tables. Joins could be either cluster local, they can also access data stored in external systems. There's also an external dictionaries support that provides an alternative more simple syntax for accessing data from an outside source.

    4

    Approximate query processing

    -

    Users can control trade-off between result accuracy and query execution time, which is handy when dealing with multiple terabytes or petabytes of data. ClickHouse also provides probabilistic data structures for fast and memory-efficient calculation of cardinatilities and quantiles

    +

    Users can control the trade-off between result accuracy and query execution time, which is handy when dealing with multiple terabytes or petabytes of data. ClickHouse also provides probabilistic data structures for fast and memory-efficient calculation of cardinalities and quantiles

    diff --git a/website/templates/index/success.html b/website/templates/index/success.html index be14ab33e58..cd22fe3b1da 100644 --- a/website/templates/index/success.html +++ b/website/templates/index/success.html @@ -3,7 +3,7 @@

    Success stories

    - +
    - +
    - +

    Experimentation

    @@ -35,7 +35,7 @@
    - +
    - +
    - +

    Speeding up Report API

    diff --git a/website/templates/index/use.html b/website/templates/index/use.html index 3c53a54b1ff..edf4a28cf67 100644 --- a/website/templates/index/use.html +++ b/website/templates/index/use.html @@ -3,7 +3,7 @@

    When to use ClickHouse

    -

    For analytics over stream of clean, well structured and immutable events or logs. +

    For analytics over a stream of clean, well structured and immutable events or logs. It is recommended to put each such stream into a single wide fact table with pre-joined dimensions.

    @@ -32,7 +32,7 @@
    • ✕ Transactional workloads (OLTP)
    • -
    • ✕ Key-value requests with high rate
    • +
    • ✕ Key-value requests with a high rate
    diff --git a/website/templates/index/why.html b/website/templates/index/why.html index 95273ac48e0..291df2c6653 100644 --- a/website/templates/index/why.html +++ b/website/templates/index/why.html @@ -18,8 +18,8 @@ Fault tolerant
    -

    Fault tolerant

    -

    ClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows to avoid having single points of failure. Downtime of a single node or the whole datacenter won't affect the system's availability for both reads and writes.

    +

    Fault-tolerant

    +

    ClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter won't affect the system's availability for both reads and writes.

    @@ -28,15 +28,14 @@

    Linearly scalable

    -

    ClickHouse scales well both vertically and horizontally. ClickHouse is easily adaptable to perform either on - cluster with hundreds or thousands of nodes, or on a single server or even on a tiny virtual machine. Currently there are installations with more multiple trillion rows or hundreds of terabytes of data per single node.

    +

    ClickHouse scales well both vertically and horizontally. ClickHouse is easily adaptable to perform either on a cluster with hundreds or thousands of nodes or on a single server or even on a tiny virtual machine. Currently, there are installations with more multiple trillion rows or hundreds of terabytes of data per single node.

    Easy to use

    Easy to use

    -

    ClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows to express the desired result without involving any custom non-standard API that could be found in some DBMS.

    +

    ClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some DBMS.

    From 3b2c83b1664414a9ca8d15d8aa5da5b793c1deaf Mon Sep 17 00:00:00 2001 From: "imgbot[bot]" <31301654+imgbot[bot]@users.noreply.github.com> Date: Wed, 18 Mar 2020 16:17:23 +0300 Subject: [PATCH 056/115] [ImgBot] Optimize images (#9728) *Total -- 48.84kb -> 48.77kb (0.16%) /website/images/index/hardware-efficient.svg -- 44.52kb -> 44.44kb (0.17%) /website/images/clickhouse-black.svg -- 4.33kb -> 4.33kb (0.02%) Signed-off-by: ImgBotApp Co-authored-by: ImgBotApp --- website/images/clickhouse-black.svg | 2 +- website/images/index/hardware-efficient.svg | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/website/images/clickhouse-black.svg b/website/images/clickhouse-black.svg index 695d0175685..a0a607dc0b2 100644 --- a/website/images/clickhouse-black.svg +++ b/website/images/clickhouse-black.svg @@ -1 +1 @@ -ClickHouse +ClickHouse \ No newline at end of file diff --git a/website/images/index/hardware-efficient.svg b/website/images/index/hardware-efficient.svg index 45988007f49..f7f06d5a962 100644 --- a/website/images/index/hardware-efficient.svg +++ b/website/images/index/hardware-efficient.svg @@ -1 +1 @@ -215 +215 \ No newline at end of file From d4e274195176f9719931b974364ef765e458e716 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 18 Mar 2020 17:55:01 +0300 Subject: [PATCH 057/115] More grammar fixes in docs/website (#9731) --- .../aggregatefunction.md | 4 +- .../en/data_types/special_data_types/index.md | 2 +- .../data_types/special_data_types/interval.md | 2 +- .../data_types/special_data_types/nothing.md | 1 - docs/en/data_types/special_data_types/set.md | 2 +- docs/en/development/architecture.md | 52 +++++++++---------- docs/en/development/browse_code.md | 2 +- docs/en/development/build.md | 8 +-- docs/en/development/build_cross_osx.md | 6 +-- docs/en/development/developer_instruction.md | 52 +++++++++---------- website/images/clickhouse-black.svg | 2 +- 11 files changed, 66 insertions(+), 67 deletions(-) diff --git a/docs/en/data_types/nested_data_structures/aggregatefunction.md b/docs/en/data_types/nested_data_structures/aggregatefunction.md index f6f86ed37ef..36b18167164 100644 --- a/docs/en/data_types/nested_data_structures/aggregatefunction.md +++ b/docs/en/data_types/nested_data_structures/aggregatefunction.md @@ -38,9 +38,9 @@ uniqState(UserID) quantilesState(0.5, 0.9)(SendTiming) ``` -In contrast to the corresponding functions `uniq` and `quantiles`, `-State`- functions return the state, instead the final value. In other words, they return a value of `AggregateFunction` type. +In contrast to the corresponding functions `uniq` and `quantiles`, `-State`- functions return the state, instead of the final value. In other words, they return a value of `AggregateFunction` type. -In the results of `SELECT` query the values of `AggregateFunction` type have implementation-specific binary representation for all of the ClickHouse output formats. If dump data into, for example, `TabSeparated` format with `SELECT` query then this dump can be loaded back using `INSERT` query. +In the results of `SELECT` query, the values of `AggregateFunction` type have implementation-specific binary representation for all of the ClickHouse output formats. If dump data into, for example, `TabSeparated` format with `SELECT` query then this dump can be loaded back using `INSERT` query. ### Data Selection diff --git a/docs/en/data_types/special_data_types/index.md b/docs/en/data_types/special_data_types/index.md index 14c2406edc0..30ab4d6f061 100644 --- a/docs/en/data_types/special_data_types/index.md +++ b/docs/en/data_types/special_data_types/index.md @@ -1,6 +1,6 @@ # Special Data Types -Special data type values can't be saved to a table or output in results, but are used as the intermediate result of running a query. +Special data type values can't be saved to a table or output in results, but can be used as the intermediate result when running a query. [Original article](https://clickhouse.tech/docs/en/data_types/special_data_types/) diff --git a/docs/en/data_types/special_data_types/interval.md b/docs/en/data_types/special_data_types/interval.md index 8a8f507ccc3..805a5e78b85 100644 --- a/docs/en/data_types/special_data_types/interval.md +++ b/docs/en/data_types/special_data_types/interval.md @@ -3,7 +3,7 @@ The family of data types representing time and date intervals. The resulting types of the [INTERVAL](../../query_language/operators.md#operator-interval) operator. !!! warning "Warning" - You can't use `Interval` data types for storing values in tables. + `Interval` data type values can't be stored in tables. Structure: diff --git a/docs/en/data_types/special_data_types/nothing.md b/docs/en/data_types/special_data_types/nothing.md index 3540ecf861d..75c09d90aec 100644 --- a/docs/en/data_types/special_data_types/nothing.md +++ b/docs/en/data_types/special_data_types/nothing.md @@ -1,4 +1,3 @@ - # Nothing The only purpose of this data type is to represent cases where value is not expected. So you can't create a `Nothing` type value. diff --git a/docs/en/data_types/special_data_types/set.md b/docs/en/data_types/special_data_types/set.md index 560a4b4d01f..2311e55df8f 100644 --- a/docs/en/data_types/special_data_types/set.md +++ b/docs/en/data_types/special_data_types/set.md @@ -1,6 +1,6 @@ # Set -Used for the right half of an IN expression. +Used for the right half of an [IN](../../query_language/select.md##select-in-operators) expression. [Original article](https://clickhouse.tech/docs/en/data_types/special_data_types/set/) diff --git a/docs/en/development/architecture.md b/docs/en/development/architecture.md index 28ddd71f4a2..58601739846 100644 --- a/docs/en/development/architecture.md +++ b/docs/en/development/architecture.md @@ -1,20 +1,20 @@ # Overview of ClickHouse Architecture -ClickHouse is a true column-oriented DBMS. Data is stored by columns, and during the execution of arrays (vectors or chunks of columns). Whenever possible, operations are dispatched on arrays, rather than on individual values. This is called "vectorized query execution," and it helps lower the cost of actual data processing. +ClickHouse is a true column-oriented DBMS. Data is stored by columns and during the execution of arrays (vectors or chunks of columns). Whenever possible, operations are dispatched on arrays, rather than on individual values. This is called "vectorized query execution," and it helps lower the cost of actual data processing. -> This idea is nothing new. It dates back to the `APL` programming language and its descendants: `A +`, `J`, `K`, and `Q`. Array programming is used in scientific data processing. Neither is this idea something new in relational databases: for example, it is used in the `Vectorwise` system. +> This idea is nothing new. It dates back to the `APL` programming language and its descendants: `A +`, `J`, `K`, and `Q`. Array programming is used in the scientific data processing. Neither is this idea something new in relational databases: for example, it is used in the `Vectorwise` system. -There are two different approaches for speeding up the query processing: vectorized query execution and runtime code generation. In the latter, the code is generated for every kind of query on the fly, removing all indirection and dynamic dispatch. Neither of these approaches is strictly better than the other. Runtime code generation can be better when it fuses many operations together, thus fully utilizing CPU execution units and the pipeline. Vectorized query execution can be less practical, because it involves temporary vectors that must be written to the cache and read back. If the temporary data does not fit in the L2 cache, this becomes an issue. But vectorized query execution more easily utilizes the SIMD capabilities of the CPU. A [research paper](http://15721.courses.cs.cmu.edu/spring2016/papers/p5-sompolski.pdf) written by our friends shows that it is better to combine both approaches. ClickHouse uses vectorized query execution and has limited initial support for runtime code generation. +There are two different approaches for speeding up query processing: vectorized query execution and runtime code generation. In the latter, the code is generated for every kind of query on the fly, removing all indirection and dynamic dispatch. Neither of these approaches is strictly better than the other. Runtime code generation can be better when it fuses many operations together, thus fully utilizing CPU execution units and the pipeline. Vectorized query execution can be less practical because it involves temporary vectors that must be written to the cache and read back. If the temporary data does not fit in the L2 cache, this becomes an issue. But vectorized query execution more easily utilizes the SIMD capabilities of the CPU. A [research paper](http://15721.courses.cs.cmu.edu/spring2016/papers/p5-sompolski.pdf) written by our friends shows that it is better to combine both approaches. ClickHouse uses vectorized query execution and has limited initial support for runtime code generation. ## Columns To represent columns in memory (actually, chunks of columns), the `IColumn` interface is used. This interface provides helper methods for implementation of various relational operators. Almost all operations are immutable: they do not modify the original column, but create a new modified one. For example, the `IColumn :: filter` method accepts a filter byte mask. It is used for the `WHERE` and `HAVING` relational operators. Additional examples: the `IColumn :: permute` method to support `ORDER BY`, the `IColumn :: cut` method to support `LIMIT`, and so on. -Various `IColumn` implementations (`ColumnUInt8`, `ColumnString` and so on) are responsible for the memory layout of columns. Memory layout is usually a contiguous array. For the integer type of columns it is just one contiguous array, like `std :: vector`. For `String` and `Array` columns, it is two vectors: one for all array elements, placed contiguously, and a second one for offsets to the beginning of each array. There is also `ColumnConst` that stores just one value in memory, but looks like a column. +Various `IColumn` implementations (`ColumnUInt8`, `ColumnString` and so on) are responsible for the memory layout of columns. The memory layout is usually a contiguous array. For the integer type of columns, it is just one contiguous array, like `std :: vector`. For `String` and `Array` columns, it is two vectors: one for all array elements, placed contiguously, and a second one for offsets to the beginning of each array. There is also `ColumnConst` that stores just one value in memory, but looks like a column. ## Field -Nevertheless, it is possible to work with individual values as well. To represent an individual value, the `Field` is used. `Field` is just a discriminated union of `UInt64`, `Int64`, `Float64`, `String` and `Array`. `IColumn` has the `operator[]` method to get the n-th value as a `Field`, and the `insert` method to append a `Field` to the end of a column. These methods are not very efficient, because they require dealing with temporary `Field` objects representing an individual value. There are more efficient methods, such as `insertFrom`, `insertRangeFrom`, and so on. +Nevertheless, it is possible to work with individual values as well. To represent an individual value, the `Field` is used. `Field` is just a discriminated union of `UInt64`, `Int64`, `Float64`, `String` and `Array`. `IColumn` has the `operator[]` method to get the n-th value as a `Field` and the `insert` method to append a `Field` to the end of a column. These methods are not very efficient, because they require dealing with temporary `Field` objects representing an individual value. There are more efficient methods, such as `insertFrom`, `insertRangeFrom`, and so on. `Field` doesn't have enough information about a specific data type for a table. For example, `UInt8`, `UInt16`, `UInt32`, and `UInt64` are all represented as `UInt64` in a `Field`. @@ -33,15 +33,15 @@ Various functions on columns can be implemented in a generic, non-efficient way `IDataType` only stores metadata. For instance, `DataTypeUInt8` doesn't store anything at all (except vptr) and `DataTypeFixedString` stores just `N` (the size of fixed-size strings). -`IDataType` has helper methods for various data formats. Examples are methods to serialize a value with possible quoting, to serialize a value for JSON, and to serialize a value as part of XML format. There is no direct correspondence to data formats. For example, the different data formats `Pretty` and `TabSeparated` can use the same `serializeTextEscaped` helper method from the `IDataType` interface. +`IDataType` has helper methods for various data formats. Examples are methods to serialize a value with possible quoting, to serialize a value for JSON, and to serialize a value as part of the XML format. There is no direct correspondence to data formats. For example, the different data formats `Pretty` and `TabSeparated` can use the same `serializeTextEscaped` helper method from the `IDataType` interface. ## Block -A `Block` is a container that represents a subset (chunk) of a table in memory. It is just a set of triples: `(IColumn, IDataType, column name)`. During query execution, data is processed by `Block`s. If we have a `Block`, we have data (in the `IColumn` object), we have information about its type (in `IDataType`) that tells us how to deal with that column, and we have the column name (either the original column name from the table, or some artificial name assigned for getting temporary results of calculations). +A `Block` is a container that represents a subset (chunk) of a table in memory. It is just a set of triples: `(IColumn, IDataType, column name)`. During query execution, data is processed by `Block`s. If we have a `Block`, we have data (in the `IColumn` object), we have information about its type (in `IDataType`) that tells us how to deal with that column, and we have the column name (either the original column name from the table or some artificial name assigned for getting temporary results of calculations). -When we calculate some function over columns in a block, we add another column with its result to the block, and we don't touch columns for arguments of the function because operations are immutable. Later, unneeded columns can be removed from the block, but not modified. This is convenient for elimination of common subexpressions. +When we calculate some function over columns in a block, we add another column with its result to the block, and we don't touch columns for arguments of the function because operations are immutable. Later, unneeded columns can be removed from the block, but not modified. This is convenient for the elimination of common subexpressions. -Blocks are created for every processed chunk of data. Note that for the same type of calculation, the column names and types remain the same for different blocks, and only column data changes. It is better to split block data from the block header, because small block sizes will have a high overhead of temporary strings for copying shared_ptrs and column names. +Blocks are created for every processed chunk of data. Note that for the same type of calculation, the column names and types remain the same for different blocks and only column data changes. It is better to split block data from the block header because small block sizes will have a high overhead of temporary strings for copying shared_ptrs and column names. ## Block Streams @@ -55,15 +55,15 @@ Streams are responsible for: There are more sophisticated transformations. For example, when you pull from `AggregatingBlockInputStream`, it reads all data from its source, aggregates it, and then returns a stream of aggregated data for you. Another example: `UnionBlockInputStream` accepts many input sources in the constructor and also a number of threads. It launches multiple threads and reads from multiple sources in parallel. -> Block streams use the "pull" approach to control flow: when you pull a block from the first stream, it consequently pulls the required blocks from nested streams, and the entire execution pipeline will work. Neither "pull" nor "push" is the best solution, because control flow is implicit, and that limits implementation of various features like simultaneous execution of multiple queries (merging many pipelines together). This limitation could be overcome with coroutines or just running extra threads that wait for each other. We may have more possibilities if we make control flow explicit: if we locate the logic for passing data from one calculation unit to another outside of those calculation units. Read this [article](http://journal.stuffwithstuff.com/2013/01/13/iteration-inside-and-out/) for more thoughts. +> Block streams use the "pull" approach to control flow: when you pull a block from the first stream, it consequently pulls the required blocks from nested streams, and the entire execution pipeline will work. Neither "pull" nor "push" is the best solution, because control flow is implicit, and that limits the implementation of various features like simultaneous execution of multiple queries (merging many pipelines together). This limitation could be overcome with coroutines or just running extra threads that wait for each other. We may have more possibilities if we make control flow explicit: if we locate the logic for passing data from one calculation unit to another outside of those calculation units. Read this [article](http://journal.stuffwithstuff.com/2013/01/13/iteration-inside-and-out/) for more thoughts. We should note that the query execution pipeline creates temporary data at each step. We try to keep block size small enough so that temporary data fits in the CPU cache. With that assumption, writing and reading temporary data is almost free in comparison with other calculations. We could consider an alternative, which is to fuse many operations in the pipeline together, to make the pipeline as short as possible and remove much of the temporary data. This could be an advantage, but it also has drawbacks. For example, a split pipeline makes it easy to implement caching intermediate data, stealing intermediate data from similar queries running at the same time, and merging pipelines for similar queries. ## Formats -Data formats are implemented with block streams. There are "presentational" formats only suitable for output of data to the client, such as `Pretty` format, which provides only `IBlockOutputStream`. And there are input/output formats, such as `TabSeparated` or `JSONEachRow`. +Data formats are implemented with block streams. There are "presentational" formats only suitable for the output of data to the client, such as `Pretty` format, which provides only `IBlockOutputStream`. And there are input/output formats, such as `TabSeparated` or `JSONEachRow`. -There are also row streams: `IRowInputStream` and `IRowOutputStream`. They allow you to pull/push data by individual rows, not by blocks. And they are only needed to simplify implementation of row-oriented formats. The wrappers `BlockInputStreamFromRowInputStream` and `BlockOutputStreamFromRowOutputStream` allow you to convert row-oriented streams to regular block-oriented streams. +There are also row streams: `IRowInputStream` and `IRowOutputStream`. They allow you to pull/push data by individual rows, not by blocks. And they are only needed to simplify the implementation of row-oriented formats. The wrappers `BlockInputStreamFromRowInputStream` and `BlockOutputStreamFromRowOutputStream` allow you to convert row-oriented streams to regular block-oriented streams. ## I/O @@ -97,7 +97,7 @@ There are also `TableFunction`s. These are functions that return a temporary `IS To get a quick idea of how to implement your own table engine, look at something simple, like `StorageMemory` or `StorageTinyLog`. -> As the result of the `read` method, `IStorage` returns `QueryProcessingStage` – information about what parts of the query were already calculated inside storage. Currently we have only very coarse granularity for that information. There is no way for the storage to say "I have already processed this part of the expression in WHERE, for this range of data". We need to work on that. +> As the result of the `read` method, `IStorage` returns `QueryProcessingStage` – information about what parts of the query were already calculated inside storage. Currently, we have only very coarse granularity for that information. There is no way for the storage to say "I have already processed this part of the expression in WHERE, for this range of data". We need to work on that. ## Parsers @@ -107,7 +107,7 @@ A query is parsed by a hand-written recursive descent parser. For example, `Pars ## Interpreters -Interpreters are responsible for creating the query execution pipeline from an `AST`. There are simple interpreters, such as `InterpreterExistsQuery`and `InterpreterDropQuery`, or the more sophisticated `InterpreterSelectQuery`. The query execution pipeline is a combination of block input or output streams. For example, the result of interpreting the `SELECT` query is the `IBlockInputStream` to read the result set from; the result of the INSERT query is the `IBlockOutputStream` to write data for insertion to; and the result of interpreting the `INSERT SELECT` query is the `IBlockInputStream` that returns an empty result set on the first read, but that copies data from `SELECT` to `INSERT` at the same time. +Interpreters are responsible for creating the query execution pipeline from an `AST`. There are simple interpreters, such as `InterpreterExistsQuery` and `InterpreterDropQuery`, or the more sophisticated `InterpreterSelectQuery`. The query execution pipeline is a combination of block input or output streams. For example, the result of interpreting the `SELECT` query is the `IBlockInputStream` to read the result set from; the result of the INSERT query is the `IBlockOutputStream` to write data for insertion to, and the result of interpreting the `INSERT SELECT` query is the `IBlockInputStream` that returns an empty result set on the first read, but that copies data from `SELECT` to `INSERT` at the same time. `InterpreterSelectQuery` uses `ExpressionAnalyzer` and `ExpressionActions` machinery for query analysis and transformations. This is where most rule-based query optimizations are done. `ExpressionAnalyzer` is quite messy and should be rewritten: various query transformations and optimizations should be extracted to separate classes to allow modular transformations or query. @@ -123,17 +123,17 @@ ClickHouse has strong typing, so implicit type conversion doesn't occur. If a fu Implementing a function may be slightly inconvenient because a function explicitly dispatches supported data types and supported `IColumns`. For example, the `plus` function has code generated by instantiation of a C++ template for each combination of numeric types, and for constant or non-constant left and right arguments. -> This is a nice place to implement runtime code generation to avoid template code bloat. Also, it will make it possible to add fused functions like fused multiply-add, or to make multiple comparisons in one loop iteration. +> This is a nice place to implement runtime code generation to avoid template code bloat. Also, it will make it possible to add fused functions like fused multiply-add or to make multiple comparisons in one loop iteration. -Due to vectorized query execution, functions are not short-circuit. For example, if you write `WHERE f(x) AND g(y)`, both sides will be calculated, even for rows, when `f(x)` is zero (except when `f(x)` is a zero constant expression). But if selectivity of the `f(x)` condition is high, and calculation of `f(x)` is much cheaper than `g(y)`, it's better to implement multi-pass calculation: first calculate `f(x)`, then filter columns by the result, and then calculate `g(y)` only for smaller, filtered chunks of data. +Due to vectorized query execution, functions are not short-circuited. For example, if you write `WHERE f(x) AND g(y)`, both sides will be calculated, even for rows, when `f(x)` is zero (except when `f(x)` is a zero constant expression). But if the selectivity of the `f(x)` condition is high, and calculation of `f(x)` is much cheaper than `g(y)`, it's better to implement multi-pass calculation: first calculate `f(x)`, then filter columns by the result, and then calculate `g(y)` only for smaller, filtered chunks of data. ## Aggregate Functions -Aggregate functions are stateful functions. They accumulate passed values into some state, and allow you to get results from that state. They are managed with the `IAggregateFunction` interface. States can be rather simple (the state for `AggregateFunctionCount` is just a single `UInt64` value) or quite complex (the state of `AggregateFunctionUniqCombined` is a combination of a linear array, a hash table and a `HyperLogLog` probabilistic data structure). +Aggregate functions are stateful functions. They accumulate passed values into some state and allow you to get results from that state. They are managed with the `IAggregateFunction` interface. States can be rather simple (the state for `AggregateFunctionCount` is just a single `UInt64` value) or quite complex (the state of `AggregateFunctionUniqCombined` is a combination of a linear array, a hash table and a `HyperLogLog` probabilistic data structure). To deal with multiple states while executing a high-cardinality `GROUP BY` query, states are allocated in `Arena` (a memory pool), or they could be allocated in any suitable piece of memory. States can have a non-trivial constructor and destructor: for example, complex aggregation states can allocate additional memory themselves. This requires some attention to creating and destroying states and properly passing their ownership, to keep track of who and when will destroy states. -Aggregation states can be serialized and deserialized to pass over the network during distributed query execution or to write them on disk where there is not enough RAM. They can even be stored in a table with the `DataTypeAggregateFunction` to allow incremental aggregation of data. +Aggregation states can be serialized and deserialized to pass over the network during distributed query execution or to write them on the disk where there is not enough RAM. They can even be stored in a table with the `DataTypeAggregateFunction` to allow incremental aggregation of data. > The serialized data format for aggregate function states is not versioned right now. This is ok if aggregate states are only stored temporarily. But we have the `AggregatingMergeTree` table engine for incremental aggregation, and people are already using it in production. This is why we should add support for backward compatibility when changing the serialized format for any aggregate function in the future. @@ -149,15 +149,15 @@ Internally, it is just a basic multithreaded server without coroutines, fibers, The server initializes the `Context` class with the necessary environment for query execution: the list of available databases, users and access rights, settings, clusters, the process list, the query log, and so on. This environment is used by interpreters. -We maintain full backward and forward compatibility for the server TCP protocol: old clients can talk to new servers and new clients can talk to old servers. But we don't want to maintain it eternally, and we are removing support for old versions after about one year. +We maintain full backwards and forward compatibility for the server TCP protocol: old clients can talk to new servers and new clients can talk to old servers. But we don't want to maintain it eternally, and we are removing support for old versions after about one year. > For all external applications, we recommend using the HTTP interface because it is simple and easy to use. The TCP protocol is more tightly linked to internal data structures: it uses an internal format for passing blocks of data and it uses custom framing for compressed data. We haven't released a C library for that protocol because it requires linking most of the ClickHouse codebase, which is not practical. ## Distributed Query Execution -Servers in a cluster setup are mostly independent. You can create a `Distributed` table on one or all servers in a cluster. The `Distributed` table does not store data itself – it only provides a "view" to all local tables on multiple nodes of a cluster. When you SELECT from a `Distributed` table, it rewrites that query, chooses remote nodes according to load balancing settings, and sends the query to them. The `Distributed` table requests remote servers to process a query just up to a stage where intermediate results from different servers can be merged. Then it receives the intermediate results and merges them. The distributed table tries to distribute as much work as possible to remote servers, and does not send much intermediate data over the network. +Servers in a cluster setup are mostly independent. You can create a `Distributed` table on one or all servers in a cluster. The `Distributed` table does not store data itself – it only provides a "view" to all local tables on multiple nodes of a cluster. When you SELECT from a `Distributed` table, it rewrites that query, chooses remote nodes according to load balancing settings, and sends the query to them. The `Distributed` table requests remote servers to process a query just up to a stage where intermediate results from different servers can be merged. Then it receives the intermediate results and merges them. The distributed table tries to distribute as much work as possible to remote servers and does not send much intermediate data over the network. -> Things become more complicated when you have subqueries in IN or JOIN clauses and each of them uses a `Distributed` table. We have different strategies for execution of these queries. +> Things become more complicated when you have subqueries in IN or JOIN clauses and each of them uses a `Distributed` table. We have different strategies for the execution of these queries. There is no global query plan for distributed query execution. Each node has its own local query plan for its part of the job. We only have simple one-pass distributed query execution: we send queries for remote nodes and then merge the results. But this is not feasible for difficult queries with high cardinality GROUP BYs or with a large amount of temporary data for JOIN: in such cases, we need to "reshuffle" data between servers, which requires additional coordination. ClickHouse does not support that kind of query execution, and we need to work on it. @@ -165,17 +165,17 @@ There is no global query plan for distributed query execution. Each node has its `MergeTree` is a family of storage engines that supports indexing by primary key. The primary key can be an arbitrary tuple of columns or expressions. Data in a `MergeTree` table is stored in "parts". Each part stores data in the primary key order (data is ordered lexicographically by the primary key tuple). All the table columns are stored in separate `column.bin` files in these parts. The files consist of compressed blocks. Each block is usually from 64 KB to 1 MB of uncompressed data, depending on the average value size. The blocks consist of column values placed contiguously one after the other. Column values are in the same order for each column (the order is defined by the primary key), so when you iterate by many columns, you get values for the corresponding rows. -The primary key itself is "sparse". It doesn't address each single row, but only some ranges of data. A separate `primary.idx` file has the value of the primary key for each N-th row, where N is called `index_granularity` (usually, N = 8192). Also, for each column, we have `column.mrk` files with "marks," which are offsets to each N-th row in the data file. Each mark is a pair: the offset in the file to the beginning of the compressed block, and the offset in the decompressed block to the beginning of data. Usually compressed blocks are aligned by marks, and the offset in the decompressed block is zero. Data for `primary.idx` always resides in memory and data for `column.mrk` files is cached. +The primary key itself is "sparse". It doesn't address every single row, but only some ranges of data. A separate `primary.idx` file has the value of the primary key for each N-th row, where N is called `index_granularity` (usually, N = 8192). Also, for each column, we have `column.mrk` files with "marks," which are offsets to each N-th row in the data file. Each mark is a pair: the offset in the file to the beginning of the compressed block, and the offset in the decompressed block to the beginning of data. Usually, compressed blocks are aligned by marks, and the offset in the decompressed block is zero. Data for `primary.idx` always resides in memory and data for `column.mrk` files is cached. When we are going to read something from a part in `MergeTree`, we look at `primary.idx` data and locate ranges that could possibly contain requested data, then look at `column.mrk` data and calculate offsets for where to start reading those ranges. Because of sparseness, excess data may be read. ClickHouse is not suitable for a high load of simple point queries, because the entire range with `index_granularity` rows must be read for each key, and the entire compressed block must be decompressed for each column. We made the index sparse because we must be able to maintain trillions of rows per single server without noticeable memory consumption for the index. Also, because the primary key is sparse, it is not unique: it cannot check the existence of the key in the table at INSERT time. You could have many rows with the same key in a table. -When you `INSERT` a bunch of data into `MergeTree`, that bunch is sorted by primary key order and forms a new part. To keep the number of parts relatively low, there are background threads that periodically select some parts and merge them to a single sorted part. That's why it is called `MergeTree`. Of course, merging leads to "write amplification". All parts are immutable: they are only created and deleted, but not modified. When SELECT is run, it holds a snapshot of the table (a set of parts). After merging, we also keep old parts for some time to make recovery after failure easier, so if we see that some merged part is probably broken, we can replace it with its source parts. +When you `INSERT` a bunch of data into `MergeTree`, that bunch is sorted by primary key order and forms a new part. To keep the number of parts relatively low, there are background threads that periodically select some parts and merge them into a single sorted part. That's why it is called `MergeTree`. Of course, merging leads to "write amplification". All parts are immutable: they are only created and deleted, but not modified. When SELECT is run, it holds a snapshot of the table (a set of parts). After merging, we also keep old parts for some time to make recovery after failure easier, so if we see that some merged part is probably broken, we can replace it with its source parts. `MergeTree` is not an LSM tree because it doesn't contain "memtable" and "log": inserted data is written directly to the filesystem. This makes it suitable only to INSERT data in batches, not by individual row and not very frequently – about once per second is ok, but a thousand times a second is not. We did it this way for simplicity's sake, and because we are already inserting data in batches in our applications. > MergeTree tables can only have one (primary) index: there aren't any secondary indices. It would be nice to allow multiple physical representations under one logical table, for example, to store data in more than one physical order or even to allow representations with pre-aggregated data along with original data. -There are MergeTree engines that are doing additional work during background merges. Examples are `CollapsingMergeTree` and `AggregatingMergeTree`. This could be treated as special support for updates. Keep in mind that these are not real updates because users usually have no control over the time when background merges will be executed, and data in a `MergeTree` table is almost always stored in more than one part, not in completely merged form. +There are MergeTree engines that are doing additional work during background merges. Examples are `CollapsingMergeTree` and `AggregatingMergeTree`. This could be treated as a special support for updates. Keep in mind that these are not real updates because users usually have no control over the time when background merges will be executed, and data in a `MergeTree` table is almost always stored in more than one part, not in completely merged form. ## Replication @@ -183,9 +183,9 @@ Replication in ClickHouse is implemented on a per-table basis. You could have so Replication is implemented in the `ReplicatedMergeTree` storage engine. The path in `ZooKeeper` is specified as a parameter for the storage engine. All tables with the same path in `ZooKeeper` become replicas of each other: they synchronize their data and maintain consistency. Replicas can be added and removed dynamically simply by creating or dropping a table. -Replication uses an asynchronous multi-master scheme. You can insert data into any replica that has a session with `ZooKeeper`, and data is replicated to all other replicas asynchronously. Because ClickHouse doesn't support UPDATEs, replication is conflict-free. As there is no quorum acknowledgment of inserts, just-inserted data might be lost if one node fails. +Replication uses an asynchronous multi-master scheme. You can insert data into any replica that has a session with `ZooKeeper`, and data is replicated to all other replicas asynchronously. Because ClickHouse doesn't support UPDATEs, replication is conflict-free. As there is no quorum acknowledgement of inserts, just-inserted data might be lost if one node fails. -Metadata for replication is stored in ZooKeeper. There is a replication log that lists what actions to do. Actions are: get part; merge parts; drop partition, etc. Each replica copies the replication log to its queue and then executes the actions from the queue. For example, on insertion, the "get part" action is created in the log, and every replica downloads that part. Merges are coordinated between replicas to get byte-identical results. All parts are merged in the same way on all replicas. To achieve this, one replica is elected as the leader, and that replica initiates merges and writes "merge parts" actions to the log. +Metadata for replication is stored in ZooKeeper. There is a replication log that lists what actions to do. Actions are: get part; merge parts; drop a partition, etc. Each replica copies the replication log to its queue and then executes the actions from the queue. For example, on insertion, the "get the part" action is created in the log, and every replica downloads that part. Merges are coordinated between replicas to get byte-identical results. All parts are merged in the same way on all replicas. To achieve this, one replica is elected as the leader, and that replica initiates merges and writes "merge parts" actions to the log. Replication is physical: only compressed parts are transferred between nodes, not queries. To lower the network cost (to avoid network amplification), merges are processed on each replica independently in most cases. Large merged parts are sent over the network only in cases of significant replication lag. diff --git a/docs/en/development/browse_code.md b/docs/en/development/browse_code.md index 0a99f5128cb..187010ef15a 100644 --- a/docs/en/development/browse_code.md +++ b/docs/en/development/browse_code.md @@ -2,6 +2,6 @@ You can use **Woboq** online code browser available [here](https://clickhouse-test-reports.s3.yandex.net/codebrowser/html_report///ClickHouse/dbms/src/index.html). It provides code navigation and semantic highlighting, search and indexing. The code snapshot is updated daily. -Also you can browse sources on [GitHub](https://github.com/ClickHouse/ClickHouse) as usual. +Also, you can browse sources on [GitHub](https://github.com/ClickHouse/ClickHouse) as usual. If you're interested what IDE to use, we recommend CLion, QT Creator, VS Code and KDevelop (with caveats). You can use any favourite IDE. Vim and Emacs also count. diff --git a/docs/en/development/build.md b/docs/en/development/build.md index ea3f68355ca..01a7b196465 100644 --- a/docs/en/development/build.md +++ b/docs/en/development/build.md @@ -63,9 +63,9 @@ This will create the `dbms/programs/clickhouse` executable, which can be used wi # How to Build ClickHouse on Any Linux -The build requires the following componenets: +The build requires the following components: -- Git (is used only to checkout the sources, it's not needed for build) +- Git (is used only to checkout the sources, it's not needed for the build) - CMake 3.10 or newer - Ninja (recommended) or Make - C++ compiler: gcc 9 or clang 8 or newer @@ -110,9 +110,9 @@ make -j $(nproc) ClickHouse is available in pre-built binaries and packages. Binaries are portable and can be run on any Linux flavour. -They are build for stable, prestable and testing releases as long as for every commit to master and for every pull request. +They are built for stable, prestable and testing releases as long as for every commit to master and for every pull request. -To find the most fresh build from `master`, go to [commits page](https://github.com/ClickHouse/ClickHouse/commits/master), click on the first green check mark or red cross near commit, and click to the "Details" link right after "ClickHouse Build Check". +To find the freshest build from `master`, go to [commits page](https://github.com/ClickHouse/ClickHouse/commits/master), click on the first green checkmark or red cross near commit, and click to the "Details" link right after "ClickHouse Build Check". # How to Build ClickHouse Debian Package diff --git a/docs/en/development/build_cross_osx.md b/docs/en/development/build_cross_osx.md index b8e38b44e1e..010e939f8a9 100644 --- a/docs/en/development/build_cross_osx.md +++ b/docs/en/development/build_cross_osx.md @@ -1,6 +1,6 @@ # How to Build ClickHouse on Linux for Mac OS X -This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on OS X. This is intended for continuous integration checks that run on Linux servers. If you want to build ClickHouse directly on Mac OS X, then proceed with another instruction: https://clickhouse.tech/docs/en/development/build_osx/ +This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on OS X. This is intended for continuous integration checks that run on Linux servers. If you want to build ClickHouse directly on Mac OS X, then proceed with [another instruction](build_osx.md). The cross-build for Mac OS X is based on the [Build instructions](build.md), follow them first. @@ -33,7 +33,7 @@ cd cctools-port/cctools make install ``` -Also, we need to download MacOS X SDK into the working tree. +Also, we need to download macOS X SDK into the working tree. ```bash cd ClickHouse @@ -54,4 +54,4 @@ CC=clang-8 CXX=clang++-8 cmake . -Bbuild-osx -DCMAKE_TOOLCHAIN_FILE=cmake/darwin ninja -C build-osx ``` -The resulting binary will have Mach-O executable format and can't be run on Linux. +The resulting binary will have a Mach-O executable format and can't be run on Linux. diff --git a/docs/en/development/developer_instruction.md b/docs/en/development/developer_instruction.md index d636ee7189c..7cf56836d07 100644 --- a/docs/en/development/developer_instruction.md +++ b/docs/en/development/developer_instruction.md @@ -2,11 +2,11 @@ Building of ClickHouse is supported on Linux, FreeBSD and Mac OS X. # If you use Windows -If you use Windows, you need to create a virtual machine with Ubuntu. To start working with a virtual machine please install VirtualBox. You can download Ubuntu from the website: https://www.ubuntu.com/#download. Please create a virtual machine from the downloaded image (you should reserve at least 4GB of RAM for it). To run a command line terminal in Ubuntu, please locate a program containing the word "terminal" in its name (gnome-terminal, konsole etc.) or just press Ctrl+Alt+T. +If you use Windows, you need to create a virtual machine with Ubuntu. To start working with a virtual machine please install VirtualBox. You can download Ubuntu from the website: https://www.ubuntu.com/#download. Please create a virtual machine from the downloaded image (you should reserve at least 4GB of RAM for it). To run a command-line terminal in Ubuntu, please locate a program containing the word "terminal" in its name (gnome-terminal, konsole etc.) or just press Ctrl+Alt+T. -# If you use 32-bit system +# If you use a 32-bit system -ClickHouse cannot work or build on 32-bit system. You should acquire access to 64-bit system and you can continue reading. +ClickHouse cannot work or build on a 32-bit system. You should acquire access to a 64-bit system and you can continue reading. # Creating a repository on GitHub @@ -17,7 +17,7 @@ You probably already have one, but if you don't, please register at https://gith Create a fork of ClickHouse repository. To do that please click on the "fork" button in the upper right corner at https://github.com/ClickHouse/ClickHouse. It will fork your own copy of ClickHouse/ClickHouse to your account. -Development process consists of first committing the intended changes into your fork of ClickHouse and then creating a "pull request" for these changes to be accepted into the main repository (ClickHouse/ClickHouse). +The development process consists of first committing the intended changes into your fork of ClickHouse and then creating a "pull request" for these changes to be accepted into the main repository (ClickHouse/ClickHouse). To work with git repositories, please install `git`. @@ -28,7 +28,7 @@ sudo apt install git ``` A brief manual on using Git can be found here: https://services.github.com/on-demand/downloads/github-git-cheat-sheet.pdf. -For a detailed manual on Git see: https://git-scm.com/book/ru/v2. +For a detailed manual on Git see https://git-scm.com/book/ru/v2. # Cloning a repository to your development machine @@ -51,7 +51,7 @@ Please note that ClickHouse repository uses `submodules`. That is what the refer git submodule init git submodule update ``` -You can check status with command: `git submodule status`. +You can check the status with the command: `git submodule status`. If you get the following error message: ``` @@ -67,7 +67,7 @@ You can also clone the repository via https protocol: ``` git clone https://github.com/ClickHouse/ClickHouse.git ``` -This however will not let you send your changes to the server. You can still use it temporarily and add the SSH keys later replacing the remote address of the repository with `git remote` command. +This, however, will not let you send your changes to the server. You can still use it temporarily and add the SSH keys later replacing the remote address of the repository with `git remote` command. You can also add original ClickHouse repo's address to your local repository to pull updates from there: ``` @@ -92,7 +92,7 @@ git submodule update git submodule update --init ``` -The next commands would help you to reset all submodules to the initial state (!WARING! - any chenges inside will be deleted): +The next commands would help you to reset all submodules to the initial state (!WARNING! - any changes inside will be deleted): ``` # Synchronizes submodules' remote URL with .gitmodules @@ -116,7 +116,7 @@ git submodule foreach git submodule foreach git clean -xfd ClickHouse uses CMake and Ninja for building. CMake - a meta-build system that can generate Ninja files (build tasks). -Ninja - a smaller build system with focus on speed used to execute those cmake generated tasks. +Ninja - a smaller build system with a focus on the speed used to execute those cmake generated tasks. To install on Ubuntu, Debian or Mint run `sudo apt install cmake ninja-build`. @@ -162,7 +162,7 @@ cd build ``` You can have several different directories (build_release, build_debug, etc.) for different types of build. -While inside the `build` directory, configure your build by running CMake. Before the first run you need to define environment variables that specify compiler (version 9 gcc compiler in this example). +While inside the `build` directory, configure your build by running CMake. Before the first run, you need to define environment variables that specify compiler (version 9 gcc compiler in this example). Linux: ``` @@ -178,7 +178,7 @@ cmake .. The `CC` variable specifies the compiler for C (short for C Compiler), and `CXX` variable instructs which C++ compiler is to be used for building. -For a faster build you can resort to the `debug` build type - a build with no optimizations. For that supply the following parameter `-D CMAKE_BUILD_TYPE=Debug`: +For a faster build, you can resort to the `debug` build type - a build with no optimizations. For that supply the following parameter `-D CMAKE_BUILD_TYPE=Debug`: ``` cmake -D CMAKE_BUILD_TYPE=Debug .. ``` @@ -197,15 +197,15 @@ ninja Full build requires about 30GB of free disk space or 15GB to build the main binaries. -When large amount of RAM is available on build machine you should limit the number of build tasks run in parallel with `-j` param: +When a large amount of RAM is available on build machine you should limit the number of build tasks run in parallel with `-j` param: ``` ninja -j 1 clickhouse-server clickhouse-client ``` -On machines with 4GB of RAM it is recommended to specify 1, for 8GB of RAM `-j 2` is recommended. +On machines with 4GB of RAM, it is recommended to specify 1, for 8GB of RAM `-j 2` is recommended. If you get the message: `ninja: error: loading 'build.ninja': No such file or directory`, it means that generating a build configuration has failed and you need to inspect the message above. -Upon successful start of the building process you'll see the build progress - the number of processed tasks and the total number of tasks. +Upon the successful start of the building process, you'll see the build progress - the number of processed tasks and the total number of tasks. While building messages about protobuf files in libhdfs2 library like `libprotobuf WARNING` may show up. They affect nothing and are safe to be ignored. @@ -223,7 +223,7 @@ To run the server under the current user you need to navigate to `ClickHouse/dbm ../../../build/dbms/programs/clickhouse server ``` -In this case ClickHouse will use config files located in the current directory. You can run `clickhouse server` from any directory specifying the path to a config file as a command line parameter `--config-file`. +In this case, ClickHouse will use config files located in the current directory. You can run `clickhouse server` from any directory specifying the path to a config file as a command-line parameter `--config-file`. To connect to ClickHouse with clickhouse-client in another terminal navigate to `ClickHouse/build/dbms/programs/` and run `clickhouse client`. @@ -232,7 +232,7 @@ If you get `Connection refused` message on Mac OS X or FreeBSD, try specifying h clickhouse client --host 127.0.0.1 ``` -You can replace production version of ClickHouse binary installed in your system with your custom built ClickHouse binary. To do that install ClickHouse on your machine following the instructions from the official website. Next, run the following: +You can replace the production version of ClickHouse binary installed in your system with your custom-built ClickHouse binary. To do that install ClickHouse on your machine following the instructions from the official website. Next, run the following: ``` sudo service clickhouse-server stop sudo cp ClickHouse/build/dbms/programs/clickhouse /usr/bin/ @@ -241,7 +241,7 @@ sudo service clickhouse-server start Note that `clickhouse-client`, `clickhouse-server` and others are symlinks to the commonly shared `clickhouse` binary. -You can also run your custom built ClickHouse binary with the config file from the ClickHouse package installed on your system: +You can also run your custom-built ClickHouse binary with the config file from the ClickHouse package installed on your system: ``` sudo service clickhouse-server stop sudo -u clickhouse ClickHouse/build/dbms/programs/clickhouse server --config-file /etc/clickhouse-server/config.xml @@ -250,13 +250,13 @@ sudo -u clickhouse ClickHouse/build/dbms/programs/clickhouse server --config-fil # IDE (Integrated Development Environment) -If you do not know which IDE to use, we recommend that you use CLion. CLion is a commercial software, but it offers 30 day free trial period. It is also free of charge for students. CLion can be used both on Linux and on Mac OS X. +If you do not know which IDE to use, we recommend that you use CLion. CLion is commercial software, but it offers 30 days free trial period. It is also free of charge for students. CLion can be used both on Linux and on Mac OS X. -KDevelop and QTCreator are another great alternatives of an IDE for developing ClickHouse. KDevelop comes in as a very handy IDE although unstable. If KDevelop crashes after a while upon opening project, you should click "Stop All" button as soon as it has opened the list of project's files. After doing so KDevelop should be fine to work with. +KDevelop and QTCreator are other great alternatives of an IDE for developing ClickHouse. KDevelop comes in as a very handy IDE although unstable. If KDevelop crashes after a while upon opening project, you should click "Stop All" button as soon as it has opened the list of project's files. After doing so KDevelop should be fine to work with. -As simple code editors you can use Sublime Text or Visual Studio Code, or Kate (all of which are available on Linux). +As simple code editors, you can use Sublime Text or Visual Studio Code, or Kate (all of which are available on Linux). -Just in case, it is worth mentioning that CLion creates `build` path on its own, it also on its own selects `debug` for build type, for configuration it uses a version of CMake that is defined in CLion and not the one installed by you, and finally CLion will use `make` to run build tasks instead of `ninja`. This is a normal behaviour, just keep that in mind to avoid confusion. +Just in case, it is worth mentioning that CLion creates `build` path on its own, it also on its own selects `debug` for build type, for configuration it uses a version of CMake that is defined in CLion and not the one installed by you, and finally, CLion will use `make` to run build tasks instead of `ninja`. This is normal behaviour, just keep that in mind to avoid confusion. # Writing Code @@ -272,7 +272,7 @@ List of tasks: https://github.com/ClickHouse/ClickHouse/blob/master/dbms/tests/i # Test Data -Developing ClickHouse often requires loading realistic datasets. It is particularly important for performance testing. We have a specially prepared set of anonymized data from Yandex.Metrica. It requires additionally some 3GB of free disk space. Note that this data is not required to accomplish most of development tasks. +Developing ClickHouse often requires loading realistic datasets. It is particularly important for performance testing. We have a specially prepared set of anonymized data from Yandex.Metrica. It requires additionally some 3GB of free disk space. Note that this data is not required to accomplish most of the development tasks. ``` sudo apt install wget xz-utils @@ -297,12 +297,12 @@ clickhouse-client --max_insert_block_size 100000 --query "INSERT INTO test.visit # Creating Pull Request -Navigate to your fork repository in GitHub's UI. If you have been developing in a branch, you need to select that branch. There will be a "Pull request" button located on the screen. In essence this means "create a request for accepting my changes into the main repository". +Navigate to your fork repository in GitHub's UI. If you have been developing in a branch, you need to select that branch. There will be a "Pull request" button located on the screen. In essence, this means "create a request for accepting my changes into the main repository". -A pull request can be created even if the work is not completed yet. In this case please put the word "WIP" (work in progress) at the beginning of the title, it can be changed later. This is useful for cooperative reviewing and discussion of changes as well as for running all of the available tests. It is important that you provide a brief description of your changes, it will later be used for generating realease changelogs. +A pull request can be created even if the work is not completed yet. In this case please put the word "WIP" (work in progress) at the beginning of the title, it can be changed later. This is useful for cooperative reviewing and discussion of changes as well as for running all of the available tests. It is important that you provide a brief description of your changes, it will later be used for generating release changelogs. -Testing will commence as soon as Yandex employees label your PR with a tag "can be tested". The results of some first checks (e.g. code style) will come in within several minutes. Build check results will arrive within a half an hour. And the main set of tests will report itself within an hour. +Testing will commence as soon as Yandex employees label your PR with a tag "can be tested". The results of some first checks (e.g. code style) will come in within several minutes. Build check results will arrive within half an hour. And the main set of tests will report itself within an hour. The system will prepare ClickHouse binary builds for your pull request individually. To retrieve these builds click the "Details" link next to "ClickHouse build check" entry in the list of checks. There you will find direct links to the built .deb packages of ClickHouse which you can deploy even on your production servers (if you have no fear). -Most probably some of the builds will fail at first times. This is due to the fact that we check builds both with gcc as well as with clang, with almost all of existing warnings (always with the `-Werror` flag) enabled for clang. On that same page you can find all of the build logs so that you do not have to build ClickHouse in all of the possible ways. +Most probably some of the builds will fail at first times. This is due to the fact that we check builds both with gcc as well as with clang, with almost all of existing warnings (always with the `-Werror` flag) enabled for clang. On that same page, you can find all of the build logs so that you do not have to build ClickHouse in all of the possible ways. diff --git a/website/images/clickhouse-black.svg b/website/images/clickhouse-black.svg index a0a607dc0b2..695d0175685 100644 --- a/website/images/clickhouse-black.svg +++ b/website/images/clickhouse-black.svg @@ -1 +1 @@ -ClickHouse \ No newline at end of file +ClickHouse From 359e71dc0261d3f1ac4cc4db27f19a88e9fbce88 Mon Sep 17 00:00:00 2001 From: "imgbot[bot]" <31301654+imgbot[bot]@users.noreply.github.com> Date: Wed, 18 Mar 2020 17:57:08 +0300 Subject: [PATCH 058/115] [ImgBot] Optimize images (#9732) /website/images/clickhouse-black.svg -- 4.33kb -> 4.33kb (0.02%) Signed-off-by: ImgBotApp Co-authored-by: ImgBotApp --- website/images/clickhouse-black.svg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/images/clickhouse-black.svg b/website/images/clickhouse-black.svg index 695d0175685..a0a607dc0b2 100644 --- a/website/images/clickhouse-black.svg +++ b/website/images/clickhouse-black.svg @@ -1 +1 @@ -ClickHouse +ClickHouse \ No newline at end of file From 83c8581e5a8cb9dde1f8ac8b1952c9af1029f0f0 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 18 Mar 2020 18:22:29 +0300 Subject: [PATCH 059/115] Update synthetic_hardware_benchmark.xml --- dbms/tests/performance/synthetic_hardware_benchmark.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/performance/synthetic_hardware_benchmark.xml b/dbms/tests/performance/synthetic_hardware_benchmark.xml index cb118ca0f5a..fc910077c9f 100644 --- a/dbms/tests/performance/synthetic_hardware_benchmark.xml +++ b/dbms/tests/performance/synthetic_hardware_benchmark.xml @@ -58,7 +58,7 @@ -SELECT number % toUInt32(1e8) AS k, count() FROM numbers_mt(toUInt32(4e8)) GROUP BY k FORMAT Null +SELECT number % toUInt32(1e8) AS k, count() FROM numbers_mt(toUInt32(1e8)) GROUP BY k FORMAT Null From f04c2da1805a96b86b675ca2a8572f3e6c8f9b02 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 18 Mar 2020 18:31:03 +0300 Subject: [PATCH 060/115] Update arithmetic.xml --- dbms/tests/performance/arithmetic.xml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/dbms/tests/performance/arithmetic.xml b/dbms/tests/performance/arithmetic.xml index 30b7707a2f2..c62d9010882 100644 --- a/dbms/tests/performance/arithmetic.xml +++ b/dbms/tests/performance/arithmetic.xml @@ -63,14 +63,14 @@ SELECT count() FROM nums WHERE NOT ignore( - identity({op}({arg}, {arg})), - identity({op}({arg}, {arg})), - identity({op}({arg}, {arg})), - identity({op}({arg}, {arg})), - identity({op}({arg}, {arg})), - identity({op}({arg}, {arg})), - identity({op}({arg}, {arg})), - identity({op}({arg}, {arg})) + {op}({arg}, {arg}), + {op}({arg}, {arg} + 1), + {op}({arg}, {arg} + 2), + {op}({arg}, {arg} + 3), + {op}({arg}, {arg} + 4), + {op}({arg}, {arg} + 5), + {op}({arg}, {arg} + 6), + {op}({arg}, {arg} + 7) ) DROP TABLE nums From f6257bf5b68647dd8601478ee5ea6c871bf4a02e Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 18 Mar 2020 18:54:16 +0300 Subject: [PATCH 061/115] Temporary return wkhtmltopdf logging --- docs/tools/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tools/build.py b/docs/tools/build.py index e395b56afb1..fdef579f41b 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -186,7 +186,7 @@ def build_single_page_version(lang, args, cfg): create_pdf_command = ['wkhtmltopdf', '--print-media-type', single_page_index_html, single_page_pdf] logging.debug(' '.join(create_pdf_command)) with open(os.devnull, 'w') as devnull: - subprocess.check_call(' '.join(create_pdf_command), shell=True, stderr=devnull) + subprocess.check_call(' '.join(create_pdf_command), shell=True) with util.temp_dir() as test_dir: cfg.load_dict({ From 0f59f6d21d351bb1b4ab3da8bc00f7b4ff50c4b8 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 18 Mar 2020 19:02:15 +0300 Subject: [PATCH 062/115] trigger checks --- docs/en/roadmap.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/en/roadmap.md b/docs/en/roadmap.md index 613968b9c93..0001e2a0529 100644 --- a/docs/en/roadmap.md +++ b/docs/en/roadmap.md @@ -9,4 +9,5 @@ - Integration with external authentication services - Resource pools for more precise distribution of cluster capacity between users + [Original article](https://clickhouse.tech/docs/en/roadmap/) From 20211451ae615676010cf29446c145d4652e2f76 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 18 Mar 2020 19:47:33 +0300 Subject: [PATCH 063/115] Fix backquoting in dictionary ddl --- .../getDictionaryConfigurationFromAST.cpp | 7 ++++- .../integration/test_dictionaries_ddl/test.py | 29 +++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index a1d898b2621..0eb734b18fa 100644 --- a/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -34,8 +35,12 @@ using NamesToTypeNames = std::unordered_map; String getUnescapedFieldString(const Field & field) { String string = applyVisitor(FieldVisitorToString(), field); + if (!string.empty() && string.front() == '\'' && string.back() == '\'') - return string.substr(1, string.size() - 2); + string = string.substr(1, string.size() - 2); + + /// Backqouting will be performed on dictionary providers side + boost::replace_all(string, "\\'", "'"); return string; } diff --git a/dbms/tests/integration/test_dictionaries_ddl/test.py b/dbms/tests/integration/test_dictionaries_ddl/test.py index ad50a8c7daf..5f1430a786c 100644 --- a/dbms/tests/integration/test_dictionaries_ddl/test.py +++ b/dbms/tests/integration/test_dictionaries_ddl/test.py @@ -214,3 +214,32 @@ def test_file_dictionary_restrictions(started_cluster): node3.query("SELECT dictGetString('test.restricted_file_dictionary', 'value', toUInt64(1))") except QueryRuntimeException as ex: assert 'is not inside' in str(ex) + + +def test_dictionary_with_where(started_cluster): + mysql_conn = create_mysql_conn("root", "clickhouse", "localhost", 3308) + execute_mysql_query(mysql_conn, "CREATE DATABASE IF NOT EXISTS clickhouse") + execute_mysql_query(mysql_conn, "CREATE TABLE clickhouse.special_table (key_field1 int, value1 text, PRIMARY KEY (key_field1))") + execute_mysql_query(mysql_conn, "INSERT INTO clickhouse.special_table VALUES (1, 'abcabc'), (2, 'qweqwe')") + + node1.query(""" + CREATE DICTIONARY default.special_dict ( + key_field1 Int32, + value1 String DEFAULT 'xxx' + ) + PRIMARY KEY key_field1 + SOURCE(MYSQL( + USER 'root' + PASSWORD 'clickhouse' + DB 'clickhouse' + TABLE 'special_table' + REPLICA(PRIORITY 1 HOST 'mysql1' PORT 3306) + WHERE 'value1 = \\'qweqwe\\'' + )) + LAYOUT(FLAT()) + LIFETIME(MIN 1 MAX 3) + """) + + node1.query("SYSTEM RELOAD DICTIONARY default.special_dict") + + assert node1.query("SELECT dictGetString('default.special_dict', 'value1', toUInt64(2))") == 'qweqwe\n' From bceb246d8e1eadca78acf7fc6a9896b92d7576fe Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 18 Mar 2020 05:02:24 +0300 Subject: [PATCH 064/115] Added most of bugprone checks --- .clang-tidy | 32 ++++++ dbms/programs/obfuscator/Obfuscator.cpp | 2 +- dbms/programs/odbc-bridge/ODBCBridge.cpp | 4 +- dbms/programs/server/Server.cpp | 2 +- dbms/src/Access/AccessRights.cpp | 3 + dbms/src/Access/QuotaContext.cpp | 3 + ...ggregateFunctionSimpleLinearRegression.cpp | 2 +- dbms/src/Columns/ColumnAggregateFunction.cpp | 2 +- dbms/src/Common/ThreadPool.cpp | 2 +- dbms/src/Common/tests/auto_array.cpp | 4 +- dbms/src/Common/tests/pod_array.cpp | 4 +- dbms/src/Core/SettingsCollection.cpp | 4 +- dbms/src/Dictionaries/CacheDictionary.cpp | 4 +- ...acheDictionary_createAttributeWithType.cpp | 4 +- dbms/src/Dictionaries/HashedDictionary.cpp | 2 +- .../Dictionaries/RedisDictionarySource.cpp | 2 +- dbms/src/Formats/ProtobufReader.cpp | 99 ++++++++++--------- dbms/src/Functions/GeoUtils.cpp | 10 +- dbms/src/Functions/array/arrayUniq.cpp | 2 +- dbms/src/Functions/trim.cpp | 4 +- dbms/src/IO/parseDateTimeBestEffort.cpp | 4 +- dbms/src/Interpreters/Aggregator.cpp | 4 +- .../InterpreterKillQueryQuery.cpp | 2 +- dbms/src/Interpreters/SetVariants.cpp | 6 +- .../Interpreters/tests/hash_map_string_2.cpp | 6 +- .../Interpreters/tests/hash_map_string_3.cpp | 4 +- dbms/src/Parsers/ASTTablesInSelectQuery.cpp | 2 +- dbms/src/Parsers/CommonParsers.cpp | 2 +- dbms/src/Processors/ForkProcessor.cpp | 2 +- dbms/src/Storages/Kafka/StorageKafka.cpp | 4 +- .../src/Storages/LiveView/StorageLiveView.cpp | 2 +- dbms/src/Storages/MergeTree/KeyCondition.cpp | 2 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 6 +- .../Storages/MergeTree/MergeTreeSettings.cpp | 2 +- dbms/src/Storages/StorageInMemoryMetadata.cpp | 4 +- 35 files changed, 147 insertions(+), 96 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 49773ad31c9..7dd495237a7 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -57,6 +57,38 @@ Checks: '-*, bugprone-inaccurate-erase, bugprone-incorrect-roundings, bugprone-infinite-loop, + bugprone-integer-division, + bugprone-macro-parentheses, + bugprone-macro-repeated-side-effects, + bugprone-misplaced-operator-in-strlen-in-alloc, + bugprone-misplaced-pointer-artithmetic-in-alloc, + bugprone-misplaced-widening-cast, + bugprone-move-forwarding-reference, + bugprone-multiple-statement-macro, + bugprone-parent-virtual-call, + bugprone-posix-return, + bugprone-reserved-identifier, + bugprone-signed-char-misuse, + bugprone-sizeof-container, + bugprone-sizeof-expression, + bugprone-string-constructor, + bugprone-string-integer-assignment, + bugprone-string-literal-with-embedded-nul, + bugprone-suspicious-enum-usage, + bugprone-suspicious-include, + bugprone-suspicious-memset-usage, + bugprone-suspicious-missing-comma, + bugprone-suspicious-string-compare, + bugprone-swapped-arguments, + bugprone-terminating-continue, + bugprone-throw-keyword-missing, + bugprone-too-small-loop-variable, + bugprone-undefined-memory-manipulation, + bugprone-unhandled-self-assignment, + bugprone-unused-raii, + bugprone-unused-return-value, + bugprone-use-after-move, + bugprone-virtual-near-miss, boost-use-to-string, ' diff --git a/dbms/programs/obfuscator/Obfuscator.cpp b/dbms/programs/obfuscator/Obfuscator.cpp index b67041f44d6..a92a0d03287 100644 --- a/dbms/programs/obfuscator/Obfuscator.cpp +++ b/dbms/programs/obfuscator/Obfuscator.cpp @@ -677,7 +677,7 @@ public: if (!histogram.total) continue; - double average = histogram.total / histogram.buckets.size(); + double average = double(histogram.total) / histogram.buckets.size(); UInt64 new_total = 0; for (auto & bucket : histogram.buckets) diff --git a/dbms/programs/odbc-bridge/ODBCBridge.cpp b/dbms/programs/odbc-bridge/ODBCBridge.cpp index 565ee5602ca..76949cfa483 100644 --- a/dbms/programs/odbc-bridge/ODBCBridge.cpp +++ b/dbms/programs/odbc-bridge/ODBCBridge.cpp @@ -111,7 +111,7 @@ void ODBCBridge::defineOptions(Poco::Util::OptionSet & options) .binding("help") .callback(Poco::Util::OptionCallback(this, &Me::handleHelp))); - ServerApplication::defineOptions(options); /// Don't need complex BaseDaemon's .xml config + ServerApplication::defineOptions(options); // NOLINT Don't need complex BaseDaemon's .xml config } void ODBCBridge::initialize(Application & self) @@ -138,7 +138,7 @@ void ODBCBridge::initialize(Application & self) initializeTerminationAndSignalProcessing(); - ServerApplication::initialize(self); + ServerApplication::initialize(self); // NOLINT } void ODBCBridge::uninitialize() diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index 81c2de8ce3a..aaf19888f5e 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -175,7 +175,7 @@ int Server::run() std::cout << DBMS_NAME << " server version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl; return 0; } - return Application::run(); + return Application::run(); // NOLINT } void Server::initialize(Poco::Util::Application & self) diff --git a/dbms/src/Access/AccessRights.cpp b/dbms/src/Access/AccessRights.cpp index 4f92d8c31c9..80de185ed8f 100644 --- a/dbms/src/Access/AccessRights.cpp +++ b/dbms/src/Access/AccessRights.cpp @@ -75,6 +75,9 @@ public: Node & operator =(const Node & src) { + if (this == &src) + return *this; + node_name = src.node_name; level = src.level; inherited_access = src.inherited_access; diff --git a/dbms/src/Access/QuotaContext.cpp b/dbms/src/Access/QuotaContext.cpp index 815d9440eaa..a48c41dc419 100644 --- a/dbms/src/Access/QuotaContext.cpp +++ b/dbms/src/Access/QuotaContext.cpp @@ -135,6 +135,9 @@ struct QuotaContext::Impl QuotaContext::Interval & QuotaContext::Interval::operator =(const Interval & src) { + if (this == &src) + return *this; + randomize_interval = src.randomize_interval; duration = src.duration; end_of_interval.store(src.end_of_interval.load()); diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp b/dbms/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp index 64f37cd2e14..46c9402c36e 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp @@ -56,7 +56,7 @@ AggregateFunctionPtr createAggregateFunctionSimpleLinearRegression( FOR_LEASTSQR_TYPES_2(M, Float64) #define DISPATCH(T1, T2) \ if (which_x.idx == TypeIndex::T1 && which_y.idx == TypeIndex::T2) \ - return std::make_shared>( \ + return std::make_shared>( /* NOLINT */ \ arguments, \ params \ ); diff --git a/dbms/src/Columns/ColumnAggregateFunction.cpp b/dbms/src/Columns/ColumnAggregateFunction.cpp index 7ea2a3f9dfe..ce07acd1c0d 100644 --- a/dbms/src/Columns/ColumnAggregateFunction.cpp +++ b/dbms/src/Columns/ColumnAggregateFunction.cpp @@ -501,7 +501,7 @@ MutableColumns ColumnAggregateFunction::scatter(IColumn::ColumnIndex num_columns size_t num_rows = size(); { - size_t reserve_size = num_rows / num_columns * 1.1; /// 1.1 is just a guess. Better to use n-sigma rule. + size_t reserve_size = double(num_rows) / num_columns * 1.1; /// 1.1 is just a guess. Better to use n-sigma rule. if (reserve_size > 1) for (auto & column : columns) diff --git a/dbms/src/Common/ThreadPool.cpp b/dbms/src/Common/ThreadPool.cpp index c1cad465ed2..7334188952c 100644 --- a/dbms/src/Common/ThreadPool.cpp +++ b/dbms/src/Common/ThreadPool.cpp @@ -225,7 +225,7 @@ void ThreadPoolImpl::worker(typename std::list::iterator thread_ { std::unique_lock lock(mutex); if (!first_exception) - first_exception = std::current_exception(); + first_exception = std::current_exception(); // NOLINT shutdown = true; --scheduled_jobs; } diff --git a/dbms/src/Common/tests/auto_array.cpp b/dbms/src/Common/tests/auto_array.cpp index bbb533b65e8..3dc490796fa 100644 --- a/dbms/src/Common/tests/auto_array.cpp +++ b/dbms/src/Common/tests/auto_array.cpp @@ -43,7 +43,7 @@ int main(int argc, char ** argv) Arr arr2 = std::move(arr); - std::cerr << arr.size() << ", " << arr2.size() << std::endl; + std::cerr << arr.size() << ", " << arr2.size() << std::endl; // NOLINT for (auto & elem : arr2) std::cerr << elem << std::endl; @@ -182,7 +182,7 @@ int main(int argc, char ** argv) } arr2 = std::move(arr1); - arr1.resize(n); + arr1.resize(n); // NOLINT std::cerr << "arr1.size(): " << arr1.size() << ", arr2.size(): " << arr2.size() << std::endl diff --git a/dbms/src/Common/tests/pod_array.cpp b/dbms/src/Common/tests/pod_array.cpp index 2a3093b3de7..de15b485411 100644 --- a/dbms/src/Common/tests/pod_array.cpp +++ b/dbms/src/Common/tests/pod_array.cpp @@ -409,7 +409,7 @@ static void test3() Array arr2{std::move(arr)}; - ASSERT_CHECK((arr.empty()), res); + ASSERT_CHECK((arr.empty()), res); // NOLINT ASSERT_CHECK((arr2.size() == 3), res); ASSERT_CHECK((arr2[0] == 1), res); @@ -428,7 +428,7 @@ static void test3() Array arr2{std::move(arr)}; - ASSERT_CHECK((arr.empty()), res); + ASSERT_CHECK((arr.empty()), res); // NOLINT ASSERT_CHECK((arr2.size() == 5), res); ASSERT_CHECK((arr2[0] == 1), res); diff --git a/dbms/src/Core/SettingsCollection.cpp b/dbms/src/Core/SettingsCollection.cpp index b830c35b81c..d45c082eb0b 100644 --- a/dbms/src/Core/SettingsCollection.cpp +++ b/dbms/src/Core/SettingsCollection.cpp @@ -465,7 +465,7 @@ void SettingURI::deserialize(ReadBuffer & buf, SettingsBinaryFormat) case static_cast(EnumType::NAME): return IO_NAME; #define IMPLEMENT_SETTING_ENUM_FROM_STRING_HELPER_(NAME, IO_NAME) \ - if (s == IO_NAME) \ + if (s == (IO_NAME)) \ { \ set(EnumType::NAME); \ return; \ @@ -474,7 +474,7 @@ void SettingURI::deserialize(ReadBuffer & buf, SettingsBinaryFormat) #define IMPLEMENT_SETTING_ENUM_CONCAT_NAMES_HELPER_(NAME, IO_NAME) \ if (!all_io_names.empty()) \ all_io_names += ", "; \ - all_io_names += String("'") + IO_NAME + "'"; + all_io_names += String("'") + (IO_NAME) + "'"; #define LOAD_BALANCING_LIST_OF_NAMES(M) \ diff --git a/dbms/src/Dictionaries/CacheDictionary.cpp b/dbms/src/Dictionaries/CacheDictionary.cpp index 2294c99c111..36a8c704f4f 100644 --- a/dbms/src/Dictionaries/CacheDictionary.cpp +++ b/dbms/src/Dictionaries/CacheDictionary.cpp @@ -447,8 +447,8 @@ CacheDictionary::Attribute CacheDictionary::createAttributeWithType(const Attrib { #define DISPATCH(TYPE) \ case AttributeUnderlyingType::ut##TYPE: \ - attr.null_values = TYPE(null_value.get>()); \ - attr.arrays = std::make_unique>(size); \ + attr.null_values = TYPE(null_value.get>()); /* NOLINT */ \ + attr.arrays = std::make_unique>(size); /* NOLINT */ \ bytes_allocated += size * sizeof(TYPE); \ break; DISPATCH(UInt8) diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_createAttributeWithType.cpp b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_createAttributeWithType.cpp index e15a6fb3014..ba9f8d014fd 100644 --- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_createAttributeWithType.cpp +++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_createAttributeWithType.cpp @@ -11,8 +11,8 @@ ComplexKeyCacheDictionary::createAttributeWithType(const AttributeUnderlyingType { #define DISPATCH(TYPE) \ case AttributeUnderlyingType::ut##TYPE: \ - attr.null_values = TYPE(null_value.get>()); \ - attr.arrays = std::make_unique>(size); \ + attr.null_values = TYPE(null_value.get>()); /* NOLINT */ \ + attr.arrays = std::make_unique>(size); /* NOLINT */ \ bytes_allocated += size * sizeof(TYPE); \ break; DISPATCH(UInt8) diff --git a/dbms/src/Dictionaries/HashedDictionary.cpp b/dbms/src/Dictionaries/HashedDictionary.cpp index 722a6e3584c..2bdd33a3d2e 100644 --- a/dbms/src/Dictionaries/HashedDictionary.cpp +++ b/dbms/src/Dictionaries/HashedDictionary.cpp @@ -446,7 +446,7 @@ void HashedDictionary::addAttributeSize(const Attribute & attribute) /** TODO: more accurate calculation */ bytes_allocated += sizeof(CollectionType); bytes_allocated += bucket_count; - bytes_allocated += map_ref->size() * sizeof(Key) * sizeof(T); + bytes_allocated += map_ref->size() * (sizeof(Key) + sizeof(T)); } } diff --git a/dbms/src/Dictionaries/RedisDictionarySource.cpp b/dbms/src/Dictionaries/RedisDictionarySource.cpp index c51e5cdadd4..3c5aaf4bb6b 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.cpp +++ b/dbms/src/Dictionaries/RedisDictionarySource.cpp @@ -183,7 +183,7 @@ namespace DB /// Do not store more than max_block_size values for one request. if (primary_with_secondary.size() == max_block_size + 1) { - hkeys.add(std::move(primary_with_secondary)); + hkeys.add(primary_with_secondary); primary_with_secondary.clear(); primary_with_secondary.addRedisType(key); } diff --git a/dbms/src/Formats/ProtobufReader.cpp b/dbms/src/Formats/ProtobufReader.cpp index 3874ec3e447..5426e8fac62 100644 --- a/dbms/src/Formats/ProtobufReader.cpp +++ b/dbms/src/Formats/ProtobufReader.cpp @@ -273,30 +273,35 @@ UInt64 ProtobufReader::SimpleReader::continueReadingVarint(UInt64 first_byte) char c; #define PROTOBUF_READER_READ_VARINT_BYTE(byteNo) \ - in.readStrict(c); \ - ++cursor; \ - if constexpr (byteNo < 10) \ + do \ { \ - result |= static_cast(static_cast(c)) << (7 * (byteNo - 1)); \ - if (likely(!(c & 0x80))) \ - return result; \ - } \ - else \ - { \ - if (likely(c == 1)) \ - return result; \ - } \ - if constexpr (byteNo < 9) \ - result &= ~(static_cast(0x80) << (7 * (byteNo - 1))); - PROTOBUF_READER_READ_VARINT_BYTE(2) - PROTOBUF_READER_READ_VARINT_BYTE(3) - PROTOBUF_READER_READ_VARINT_BYTE(4) - PROTOBUF_READER_READ_VARINT_BYTE(5) - PROTOBUF_READER_READ_VARINT_BYTE(6) - PROTOBUF_READER_READ_VARINT_BYTE(7) - PROTOBUF_READER_READ_VARINT_BYTE(8) - PROTOBUF_READER_READ_VARINT_BYTE(9) - PROTOBUF_READER_READ_VARINT_BYTE(10) + in.readStrict(c); \ + ++cursor; \ + if constexpr ((byteNo) < 10) \ + { \ + result |= static_cast(static_cast(c)) << (7 * ((byteNo) - 1)); \ + if (likely(!(c & 0x80))) \ + return result; \ + } \ + else \ + { \ + if (likely(c == 1)) \ + return result; \ + } \ + if constexpr ((byteNo) < 9) \ + result &= ~(static_cast(0x80) << (7 * ((byteNo) - 1))); \ + } while (false) + + PROTOBUF_READER_READ_VARINT_BYTE(2); + PROTOBUF_READER_READ_VARINT_BYTE(3); + PROTOBUF_READER_READ_VARINT_BYTE(4); + PROTOBUF_READER_READ_VARINT_BYTE(5); + PROTOBUF_READER_READ_VARINT_BYTE(6); + PROTOBUF_READER_READ_VARINT_BYTE(7); + PROTOBUF_READER_READ_VARINT_BYTE(8); + PROTOBUF_READER_READ_VARINT_BYTE(9); + PROTOBUF_READER_READ_VARINT_BYTE(10); + #undef PROTOBUF_READER_READ_VARINT_BYTE throwUnknownFormat(); @@ -307,28 +312,32 @@ void ProtobufReader::SimpleReader::ignoreVarint() char c; #define PROTOBUF_READER_IGNORE_VARINT_BYTE(byteNo) \ - in.readStrict(c); \ - ++cursor; \ - if constexpr (byteNo < 10) \ + do \ { \ - if (likely(!(c & 0x80))) \ - return; \ - } \ - else \ - { \ - if (likely(c == 1)) \ - return; \ - } - PROTOBUF_READER_IGNORE_VARINT_BYTE(1) - PROTOBUF_READER_IGNORE_VARINT_BYTE(2) - PROTOBUF_READER_IGNORE_VARINT_BYTE(3) - PROTOBUF_READER_IGNORE_VARINT_BYTE(4) - PROTOBUF_READER_IGNORE_VARINT_BYTE(5) - PROTOBUF_READER_IGNORE_VARINT_BYTE(6) - PROTOBUF_READER_IGNORE_VARINT_BYTE(7) - PROTOBUF_READER_IGNORE_VARINT_BYTE(8) - PROTOBUF_READER_IGNORE_VARINT_BYTE(9) - PROTOBUF_READER_IGNORE_VARINT_BYTE(10) + in.readStrict(c); \ + ++cursor; \ + if constexpr ((byteNo) < 10) \ + { \ + if (likely(!(c & 0x80))) \ + return; \ + } \ + else \ + { \ + if (likely(c == 1)) \ + return; \ + } \ + } while (false) + + PROTOBUF_READER_IGNORE_VARINT_BYTE(1); + PROTOBUF_READER_IGNORE_VARINT_BYTE(2); + PROTOBUF_READER_IGNORE_VARINT_BYTE(3); + PROTOBUF_READER_IGNORE_VARINT_BYTE(4); + PROTOBUF_READER_IGNORE_VARINT_BYTE(5); + PROTOBUF_READER_IGNORE_VARINT_BYTE(6); + PROTOBUF_READER_IGNORE_VARINT_BYTE(7); + PROTOBUF_READER_IGNORE_VARINT_BYTE(8); + PROTOBUF_READER_IGNORE_VARINT_BYTE(9); + PROTOBUF_READER_IGNORE_VARINT_BYTE(10); #undef PROTOBUF_READER_IGNORE_VARINT_BYTE throwUnknownFormat(); @@ -846,7 +855,7 @@ private: std::unique_ptr ProtobufReader::createConverter( \ const google::protobuf::FieldDescriptor * field) \ { \ - return std::make_unique>(simple_reader, field); \ + return std::make_unique>(simple_reader, field); /* NOLINT */ \ } PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_INT32, Int64); PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_SINT32, Int64); diff --git a/dbms/src/Functions/GeoUtils.cpp b/dbms/src/Functions/GeoUtils.cpp index 488a102e208..bbd942a9b0d 100644 --- a/dbms/src/Functions/GeoUtils.cpp +++ b/dbms/src/Functions/GeoUtils.cpp @@ -132,17 +132,17 @@ inline std::tuple split(const Encoded & combined, uint8_t prec lat.fill(0); lon.fill(0); - uint8_t i = 0; + size_t i = 0; for (; i < precision * BITS_PER_SYMBOL - 1; i += 2) { // longitude is even bits - lon[i/2] = combined[i]; - lat[i/2] = combined[i + 1]; + lon[i / 2] = combined[i]; + lat[i / 2] = combined[i + 1]; } // precision is even, read the last bit as lat. if (precision & 0x1) { - lon[i/2] = combined[precision * BITS_PER_SYMBOL - 1]; + lon[i / 2] = combined[precision * BITS_PER_SYMBOL - 1]; } return std::tie(lon, lat); @@ -152,7 +152,7 @@ inline void base32Encode(const Encoded & binary, uint8_t precision, char * out) { extern const char geohash_base32_encode_lookup_table[32]; - for (uint8_t i = 0; i < precision * BITS_PER_SYMBOL; i += BITS_PER_SYMBOL) + for (size_t i = 0; i < precision * BITS_PER_SYMBOL; i += BITS_PER_SYMBOL) { uint8_t v = binary[i]; v <<= 1; diff --git a/dbms/src/Functions/array/arrayUniq.cpp b/dbms/src/Functions/array/arrayUniq.cpp index d5aedb20883..d94efc47970 100644 --- a/dbms/src/Functions/array/arrayUniq.cpp +++ b/dbms/src/Functions/array/arrayUniq.cpp @@ -214,7 +214,7 @@ void FunctionArrayUniq::executeMethodImpl( for (ColumnArray::Offset j = prev_off; j < off; ++j) { if constexpr (has_null_map) - { + { // NOLINT if ((*null_map)[j]) { found_null = true; diff --git a/dbms/src/Functions/trim.cpp b/dbms/src/Functions/trim.cpp index 46f69530005..f674afbd310 100644 --- a/dbms/src/Functions/trim.cpp +++ b/dbms/src/Functions/trim.cpp @@ -79,14 +79,14 @@ private: const char * char_end = char_data + size; if constexpr (mode::trim_left) - { + { // NOLINT const char * found = find_first_not_symbols<' '>(char_data, char_end); size_t num_chars = found - char_data; char_data += num_chars; } if constexpr (mode::trim_right) - { + { // NOLINT const char * found = find_last_not_symbols_or_null<' '>(char_data, char_end); if (found) char_end = found + 1; diff --git a/dbms/src/IO/parseDateTimeBestEffort.cpp b/dbms/src/IO/parseDateTimeBestEffort.cpp index 24d05f73aa0..6e747b13b3f 100644 --- a/dbms/src/IO/parseDateTimeBestEffort.cpp +++ b/dbms/src/IO/parseDateTimeBestEffort.cpp @@ -68,7 +68,7 @@ inline void readDecimalNumber(T & res, const char * src) template inline void readDecimalNumber(T & res, size_t num_digits, const char * src) { -#define READ_DECIMAL_NUMBER(N) res *= common::exp10_i32(N); readDecimalNumber(res, src); src += N; num_digits -= N; break +#define READ_DECIMAL_NUMBER(N) do { res *= common::exp10_i32(N); readDecimalNumber(res, src); src += (N); num_digits -= (N); } while (false) while (num_digits) { @@ -77,7 +77,7 @@ inline void readDecimalNumber(T & res, size_t num_digits, const char * src) case 3: READ_DECIMAL_NUMBER(3); break; case 2: READ_DECIMAL_NUMBER(2); break; case 1: READ_DECIMAL_NUMBER(1); break; - default: READ_DECIMAL_NUMBER(4); + default: READ_DECIMAL_NUMBER(4); break; } } #undef DECIMAL_NUMBER_CASE diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 0ab4949371b..1a40b7cefc3 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -82,8 +82,8 @@ void AggregatedDataVariants::convertToTwoLevel() { #define M(NAME) \ case Type::NAME: \ - NAME ## _two_level = std::make_unique(*NAME); \ - NAME.reset(); \ + NAME ## _two_level = std::make_unique(*(NAME)); \ + (NAME).reset(); \ type = Type::NAME ## _two_level; \ break; diff --git a/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp b/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp index f384e005e3c..81a093f4eae 100644 --- a/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -102,7 +102,7 @@ static QueryDescriptors extractQueriesExceptMeAndCheckAccess(const Block & proce res.emplace_back(std::move(query_id), std::move(query_user), i, false); } - if (res.empty() && !query_user.empty()) + if (res.empty() && !query_user.empty()) // NOLINT throw Exception("User " + my_client.current_user + " attempts to kill query created by " + query_user, ErrorCodes::ACCESS_DENIED); return res; diff --git a/dbms/src/Interpreters/SetVariants.cpp b/dbms/src/Interpreters/SetVariants.cpp index 56f2ff04230..52f54d2442a 100644 --- a/dbms/src/Interpreters/SetVariants.cpp +++ b/dbms/src/Interpreters/SetVariants.cpp @@ -23,7 +23,7 @@ void SetVariantsTemplate::init(Type type_) case Type::EMPTY: break; #define M(NAME) \ - case Type::NAME: NAME = std::make_unique(); break; + case Type::NAME: (NAME) = std::make_unique(); break; APPLY_FOR_SET_VARIANTS(M) #undef M } @@ -37,7 +37,7 @@ size_t SetVariantsTemplate::getTotalRowCount() const case Type::EMPTY: return 0; #define M(NAME) \ - case Type::NAME: return NAME->data.size(); + case Type::NAME: return (NAME)->data.size(); APPLY_FOR_SET_VARIANTS(M) #undef M } @@ -53,7 +53,7 @@ size_t SetVariantsTemplate::getTotalByteCount() const case Type::EMPTY: return 0; #define M(NAME) \ - case Type::NAME: return NAME->data.getBufferSizeInBytes(); + case Type::NAME: return (NAME)->data.getBufferSizeInBytes(); APPLY_FOR_SET_VARIANTS(M) #undef M } diff --git a/dbms/src/Interpreters/tests/hash_map_string_2.cpp b/dbms/src/Interpreters/tests/hash_map_string_2.cpp index 32b723c1187..8e13ee46e6d 100644 --- a/dbms/src/Interpreters/tests/hash_map_string_2.cpp +++ b/dbms/src/Interpreters/tests/hash_map_string_2.cpp @@ -54,16 +54,16 @@ struct STRUCT : public StringRef {}; \ namespace ZeroTraits \ { \ template <> \ - inline bool check(STRUCT x) { return 0 == x.size; } \ + inline bool check(STRUCT x) { return 0 == x.size; } /* NOLINT */ \ \ template <> \ - inline void set(STRUCT & x) { x.size = 0; } \ + inline void set(STRUCT & x) { x.size = 0; } /* NOLINT */ \ } \ \ template <> \ struct DefaultHash \ { \ - size_t operator() (STRUCT x) const \ + size_t operator() (STRUCT x) const /* NOLINT */ \ { \ return CityHash_v1_0_2::CityHash64(x.data, x.size); \ } \ diff --git a/dbms/src/Interpreters/tests/hash_map_string_3.cpp b/dbms/src/Interpreters/tests/hash_map_string_3.cpp index 62ed0584d3f..cc21129a6a6 100644 --- a/dbms/src/Interpreters/tests/hash_map_string_3.cpp +++ b/dbms/src/Interpreters/tests/hash_map_string_3.cpp @@ -57,10 +57,10 @@ struct STRUCT : public StringRef {}; \ namespace ZeroTraits \ { \ template <> \ - inline bool check(STRUCT x) { return nullptr == x.data; } \ + inline bool check(STRUCT x) { return nullptr == x.data; } /* NOLINT */ \ \ template <> \ - inline void set(STRUCT & x) { x.data = nullptr; } \ + inline void set(STRUCT & x) { x.data = nullptr; } /* NOLINT */ \ } \ \ template <> \ diff --git a/dbms/src/Parsers/ASTTablesInSelectQuery.cpp b/dbms/src/Parsers/ASTTablesInSelectQuery.cpp index b085f5a28ae..60cb0475be7 100644 --- a/dbms/src/Parsers/ASTTablesInSelectQuery.cpp +++ b/dbms/src/Parsers/ASTTablesInSelectQuery.cpp @@ -11,7 +11,7 @@ do \ { \ if (member) \ { \ - res->member = member->clone(); \ + res->member = (member)->clone(); \ res->children.push_back(res->member); \ } \ } \ diff --git a/dbms/src/Parsers/CommonParsers.cpp b/dbms/src/Parsers/CommonParsers.cpp index ddbf1b17966..47868f5df48 100644 --- a/dbms/src/Parsers/CommonParsers.cpp +++ b/dbms/src/Parsers/CommonParsers.cpp @@ -50,7 +50,7 @@ bool ParserKeyword::parseImpl(Pos & pos, ASTPtr & /*node*/, Expected & expected) if (word_length != pos->size()) return false; - if (strncasecmp(pos->begin, current_word, word_length)) + if (0 != strncasecmp(pos->begin, current_word, word_length)) return false; ++pos; diff --git a/dbms/src/Processors/ForkProcessor.cpp b/dbms/src/Processors/ForkProcessor.cpp index 913e7c2d1c7..7fa21c4236d 100644 --- a/dbms/src/Processors/ForkProcessor.cpp +++ b/dbms/src/Processors/ForkProcessor.cpp @@ -65,7 +65,7 @@ ForkProcessor::Status ForkProcessor::prepare() { ++num_processed_outputs; if (num_processed_outputs == num_active_outputs) - output.push(std::move(data)); /// Can push because no full or unneeded outputs. + output.push(std::move(data)); // NOLINT Can push because no full or unneeded outputs. else output.push(data.clone()); } diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp index 1c988840abf..002f072f004 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.cpp +++ b/dbms/src/Storages/Kafka/StorageKafka.cpp @@ -431,7 +431,7 @@ void registerStorageKafka(StorageFactory & factory) // Check arguments and settings #define CHECK_KAFKA_STORAGE_ARGUMENT(ARG_NUM, PAR_NAME) \ /* One of the four required arguments is not specified */ \ - if (args_count < ARG_NUM && ARG_NUM <= 4 && \ + if (args_count < (ARG_NUM) && (ARG_NUM) <= 4 && \ !kafka_settings.PAR_NAME.changed) \ { \ throw Exception( \ @@ -442,7 +442,7 @@ void registerStorageKafka(StorageFactory & factory) /* The same argument is given in two places */ \ if (has_settings && \ kafka_settings.PAR_NAME.changed && \ - args_count >= ARG_NUM) \ + args_count >= (ARG_NUM)) \ { \ throw Exception( \ "The argument №" #ARG_NUM " of storage Kafka " \ diff --git a/dbms/src/Storages/LiveView/StorageLiveView.cpp b/dbms/src/Storages/LiveView/StorageLiveView.cpp index 1faa5e04dc9..29eb896bcee 100644 --- a/dbms/src/Storages/LiveView/StorageLiveView.cpp +++ b/dbms/src/Storages/LiveView/StorageLiveView.cpp @@ -323,7 +323,7 @@ ASTPtr StorageLiveView::getInnerBlocksQuery() /// Rewrite inner query with right aliases for JOIN. /// It cannot be done in constructor or startup() because InterpreterSelectQuery may access table, /// which is not loaded yet during server startup, so we do it lazily - InterpreterSelectQuery(inner_blocks_query, *live_view_context, SelectQueryOptions().modify().analyze()); + InterpreterSelectQuery(inner_blocks_query, *live_view_context, SelectQueryOptions().modify().analyze()); // NOLINT auto table_id = getStorageID(); extractDependentTable(inner_blocks_query, global_context, table_id.table_name, inner_subquery); } diff --git a/dbms/src/Storages/MergeTree/KeyCondition.cpp b/dbms/src/Storages/MergeTree/KeyCondition.cpp index c74ca41f054..e994d254958 100644 --- a/dbms/src/Storages/MergeTree/KeyCondition.cpp +++ b/dbms/src/Storages/MergeTree/KeyCondition.cpp @@ -516,7 +516,7 @@ void KeyCondition::traverseAST(const ASTPtr & node, const Context & context, Blo * - in this case `n - 1` elements are added (where `n` is the number of arguments). */ if (i != 0 || element.function == RPNElement::FUNCTION_NOT) - rpn.emplace_back(std::move(element)); + rpn.emplace_back(element); } return; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 2b02aad8970..0b87b241d85 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1091,8 +1091,10 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( { auto merged_processor = std::make_shared(header, pipes.size(), sort_description, max_block_size); - pipes.emplace_back(std::move(pipes), std::move(merged_processor)); - break; + Pipe pipe(std::move(pipes), std::move(merged_processor)); + pipes = Pipes(); + pipes.emplace_back(std::move(pipe)); + return pipes; } case MergeTreeData::MergingParams::Collapsing: diff --git a/dbms/src/Storages/MergeTree/MergeTreeSettings.cpp b/dbms/src/Storages/MergeTree/MergeTreeSettings.cpp index 93f5ff20045..5c4113c1565 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -70,7 +70,7 @@ void MergeTreeSettings::loadFromQuery(ASTStorage & storage_def) if (std::find_if(changes.begin(), changes.end(), \ [](const SettingChange & c) { return c.name == #NAME; }) \ == changes.end()) \ - changes.push_back(SettingChange{#NAME, NAME.value}); + changes.push_back(SettingChange{#NAME, (NAME).value}); APPLY_FOR_IMMUTABLE_MERGE_TREE_SETTINGS(ADD_IF_ABSENT) #undef ADD_IF_ABSENT diff --git a/dbms/src/Storages/StorageInMemoryMetadata.cpp b/dbms/src/Storages/StorageInMemoryMetadata.cpp index 86c6551896a..a05872234de 100644 --- a/dbms/src/Storages/StorageInMemoryMetadata.cpp +++ b/dbms/src/Storages/StorageInMemoryMetadata.cpp @@ -35,11 +35,13 @@ StorageInMemoryMetadata::StorageInMemoryMetadata(const StorageInMemoryMetadata & StorageInMemoryMetadata & StorageInMemoryMetadata::operator=(const StorageInMemoryMetadata & other) { + if (this == &other) + return *this; + columns = other.columns; indices = other.indices; constraints = other.constraints; - if (other.partition_by_ast) partition_by_ast = other.partition_by_ast->clone(); else From 766187fe35ec1769966ba5142ec85d25e495811e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 18 Mar 2020 06:29:25 +0300 Subject: [PATCH 065/115] Fixed style --- .../AggregateFunctionSimpleLinearRegression.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp b/dbms/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp index 46c9402c36e..44631d5832a 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp @@ -56,7 +56,7 @@ AggregateFunctionPtr createAggregateFunctionSimpleLinearRegression( FOR_LEASTSQR_TYPES_2(M, Float64) #define DISPATCH(T1, T2) \ if (which_x.idx == TypeIndex::T1 && which_y.idx == TypeIndex::T2) \ - return std::make_shared>( /* NOLINT */ \ + return std::make_shared>(/* NOLINT */ \ arguments, \ params \ ); From 711254ec528bc90ffaadc443ce7c4933538735ef Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 18 Mar 2020 16:11:38 +0300 Subject: [PATCH 066/115] Fixed clang-tidy check --- dbms/src/Common/SymbolIndex.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Common/SymbolIndex.cpp b/dbms/src/Common/SymbolIndex.cpp index a9cdc1fa867..2bd822662c1 100644 --- a/dbms/src/Common/SymbolIndex.cpp +++ b/dbms/src/Common/SymbolIndex.cpp @@ -168,7 +168,7 @@ void collectSymbolsFromProgramHeaders(dl_phdr_info * info, const ElfW(Sym) * elf_sym = reinterpret_cast(correct_address(info->dlpi_addr, it->d_un.d_ptr)); /* Iterate over the symbol table */ - for (ElfW(Word) sym_index = 0; sym_index < sym_cnt; ++sym_index) + for (ElfW(Word) sym_index = 0; sym_index < ElfW(Word)(sym_cnt); ++sym_index) { /// We are not interested in empty symbols. if (!elf_sym[sym_index].st_size) From 08b75e0c1cfe6277af6a74c0f97699787a075927 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 18 Mar 2020 19:50:08 +0300 Subject: [PATCH 067/115] Fixed clang-tidy check --- dbms/src/Columns/ColumnVector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Columns/ColumnVector.cpp b/dbms/src/Columns/ColumnVector.cpp index a1c86953dc9..c4339548b19 100644 --- a/dbms/src/Columns/ColumnVector.cpp +++ b/dbms/src/Columns/ColumnVector.cpp @@ -118,7 +118,7 @@ void ColumnVector::getPermutation(bool reverse, size_t limit, int nan_directi if (s >= 256 && s <= std::numeric_limits::max()) { PaddedPODArray> pairs(s); - for (UInt32 i = 0; i < s; ++i) + for (UInt32 i = 0; i < UInt32(s); ++i) pairs[i] = {data[i], i}; RadixSort>::executeLSD(pairs.data(), s); From c20853eeccf864bc266376712ef7f5d2e4866927 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 18 Mar 2020 05:02:24 +0300 Subject: [PATCH 068/115] Added most of bugprone checks --- .clang-tidy | 32 ++++++ dbms/programs/obfuscator/Obfuscator.cpp | 2 +- dbms/programs/odbc-bridge/ODBCBridge.cpp | 4 +- dbms/programs/server/Server.cpp | 2 +- dbms/src/Access/AccessRights.cpp | 3 + dbms/src/Access/QuotaContext.cpp | 3 + ...ggregateFunctionSimpleLinearRegression.cpp | 2 +- dbms/src/Columns/ColumnAggregateFunction.cpp | 2 +- dbms/src/Common/ThreadPool.cpp | 2 +- dbms/src/Common/tests/auto_array.cpp | 4 +- dbms/src/Common/tests/pod_array.cpp | 4 +- dbms/src/Core/SettingsCollection.cpp | 4 +- dbms/src/Dictionaries/CacheDictionary.cpp | 4 +- ...acheDictionary_createAttributeWithType.cpp | 4 +- dbms/src/Dictionaries/HashedDictionary.cpp | 2 +- .../Dictionaries/RedisDictionarySource.cpp | 2 +- dbms/src/Formats/ProtobufReader.cpp | 99 ++++++++++--------- dbms/src/Functions/GeoUtils.cpp | 10 +- dbms/src/Functions/array/arrayUniq.cpp | 2 +- dbms/src/Functions/trim.cpp | 4 +- dbms/src/IO/parseDateTimeBestEffort.cpp | 4 +- dbms/src/Interpreters/Aggregator.cpp | 4 +- .../InterpreterKillQueryQuery.cpp | 2 +- dbms/src/Interpreters/SetVariants.cpp | 6 +- .../Interpreters/tests/hash_map_string_2.cpp | 6 +- .../Interpreters/tests/hash_map_string_3.cpp | 4 +- dbms/src/Parsers/ASTTablesInSelectQuery.cpp | 2 +- dbms/src/Parsers/CommonParsers.cpp | 2 +- dbms/src/Processors/ForkProcessor.cpp | 2 +- dbms/src/Storages/Kafka/StorageKafka.cpp | 4 +- .../src/Storages/LiveView/StorageLiveView.cpp | 2 +- dbms/src/Storages/MergeTree/KeyCondition.cpp | 2 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 6 +- .../Storages/MergeTree/MergeTreeSettings.cpp | 2 +- dbms/src/Storages/StorageInMemoryMetadata.cpp | 4 +- 35 files changed, 147 insertions(+), 96 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 49773ad31c9..7dd495237a7 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -57,6 +57,38 @@ Checks: '-*, bugprone-inaccurate-erase, bugprone-incorrect-roundings, bugprone-infinite-loop, + bugprone-integer-division, + bugprone-macro-parentheses, + bugprone-macro-repeated-side-effects, + bugprone-misplaced-operator-in-strlen-in-alloc, + bugprone-misplaced-pointer-artithmetic-in-alloc, + bugprone-misplaced-widening-cast, + bugprone-move-forwarding-reference, + bugprone-multiple-statement-macro, + bugprone-parent-virtual-call, + bugprone-posix-return, + bugprone-reserved-identifier, + bugprone-signed-char-misuse, + bugprone-sizeof-container, + bugprone-sizeof-expression, + bugprone-string-constructor, + bugprone-string-integer-assignment, + bugprone-string-literal-with-embedded-nul, + bugprone-suspicious-enum-usage, + bugprone-suspicious-include, + bugprone-suspicious-memset-usage, + bugprone-suspicious-missing-comma, + bugprone-suspicious-string-compare, + bugprone-swapped-arguments, + bugprone-terminating-continue, + bugprone-throw-keyword-missing, + bugprone-too-small-loop-variable, + bugprone-undefined-memory-manipulation, + bugprone-unhandled-self-assignment, + bugprone-unused-raii, + bugprone-unused-return-value, + bugprone-use-after-move, + bugprone-virtual-near-miss, boost-use-to-string, ' diff --git a/dbms/programs/obfuscator/Obfuscator.cpp b/dbms/programs/obfuscator/Obfuscator.cpp index b67041f44d6..a92a0d03287 100644 --- a/dbms/programs/obfuscator/Obfuscator.cpp +++ b/dbms/programs/obfuscator/Obfuscator.cpp @@ -677,7 +677,7 @@ public: if (!histogram.total) continue; - double average = histogram.total / histogram.buckets.size(); + double average = double(histogram.total) / histogram.buckets.size(); UInt64 new_total = 0; for (auto & bucket : histogram.buckets) diff --git a/dbms/programs/odbc-bridge/ODBCBridge.cpp b/dbms/programs/odbc-bridge/ODBCBridge.cpp index 565ee5602ca..76949cfa483 100644 --- a/dbms/programs/odbc-bridge/ODBCBridge.cpp +++ b/dbms/programs/odbc-bridge/ODBCBridge.cpp @@ -111,7 +111,7 @@ void ODBCBridge::defineOptions(Poco::Util::OptionSet & options) .binding("help") .callback(Poco::Util::OptionCallback(this, &Me::handleHelp))); - ServerApplication::defineOptions(options); /// Don't need complex BaseDaemon's .xml config + ServerApplication::defineOptions(options); // NOLINT Don't need complex BaseDaemon's .xml config } void ODBCBridge::initialize(Application & self) @@ -138,7 +138,7 @@ void ODBCBridge::initialize(Application & self) initializeTerminationAndSignalProcessing(); - ServerApplication::initialize(self); + ServerApplication::initialize(self); // NOLINT } void ODBCBridge::uninitialize() diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index 81c2de8ce3a..aaf19888f5e 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -175,7 +175,7 @@ int Server::run() std::cout << DBMS_NAME << " server version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl; return 0; } - return Application::run(); + return Application::run(); // NOLINT } void Server::initialize(Poco::Util::Application & self) diff --git a/dbms/src/Access/AccessRights.cpp b/dbms/src/Access/AccessRights.cpp index 4f92d8c31c9..80de185ed8f 100644 --- a/dbms/src/Access/AccessRights.cpp +++ b/dbms/src/Access/AccessRights.cpp @@ -75,6 +75,9 @@ public: Node & operator =(const Node & src) { + if (this == &src) + return *this; + node_name = src.node_name; level = src.level; inherited_access = src.inherited_access; diff --git a/dbms/src/Access/QuotaContext.cpp b/dbms/src/Access/QuotaContext.cpp index 815d9440eaa..a48c41dc419 100644 --- a/dbms/src/Access/QuotaContext.cpp +++ b/dbms/src/Access/QuotaContext.cpp @@ -135,6 +135,9 @@ struct QuotaContext::Impl QuotaContext::Interval & QuotaContext::Interval::operator =(const Interval & src) { + if (this == &src) + return *this; + randomize_interval = src.randomize_interval; duration = src.duration; end_of_interval.store(src.end_of_interval.load()); diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp b/dbms/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp index 64f37cd2e14..46c9402c36e 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp @@ -56,7 +56,7 @@ AggregateFunctionPtr createAggregateFunctionSimpleLinearRegression( FOR_LEASTSQR_TYPES_2(M, Float64) #define DISPATCH(T1, T2) \ if (which_x.idx == TypeIndex::T1 && which_y.idx == TypeIndex::T2) \ - return std::make_shared>( \ + return std::make_shared>( /* NOLINT */ \ arguments, \ params \ ); diff --git a/dbms/src/Columns/ColumnAggregateFunction.cpp b/dbms/src/Columns/ColumnAggregateFunction.cpp index 7ea2a3f9dfe..ce07acd1c0d 100644 --- a/dbms/src/Columns/ColumnAggregateFunction.cpp +++ b/dbms/src/Columns/ColumnAggregateFunction.cpp @@ -501,7 +501,7 @@ MutableColumns ColumnAggregateFunction::scatter(IColumn::ColumnIndex num_columns size_t num_rows = size(); { - size_t reserve_size = num_rows / num_columns * 1.1; /// 1.1 is just a guess. Better to use n-sigma rule. + size_t reserve_size = double(num_rows) / num_columns * 1.1; /// 1.1 is just a guess. Better to use n-sigma rule. if (reserve_size > 1) for (auto & column : columns) diff --git a/dbms/src/Common/ThreadPool.cpp b/dbms/src/Common/ThreadPool.cpp index c1cad465ed2..7334188952c 100644 --- a/dbms/src/Common/ThreadPool.cpp +++ b/dbms/src/Common/ThreadPool.cpp @@ -225,7 +225,7 @@ void ThreadPoolImpl::worker(typename std::list::iterator thread_ { std::unique_lock lock(mutex); if (!first_exception) - first_exception = std::current_exception(); + first_exception = std::current_exception(); // NOLINT shutdown = true; --scheduled_jobs; } diff --git a/dbms/src/Common/tests/auto_array.cpp b/dbms/src/Common/tests/auto_array.cpp index bbb533b65e8..3dc490796fa 100644 --- a/dbms/src/Common/tests/auto_array.cpp +++ b/dbms/src/Common/tests/auto_array.cpp @@ -43,7 +43,7 @@ int main(int argc, char ** argv) Arr arr2 = std::move(arr); - std::cerr << arr.size() << ", " << arr2.size() << std::endl; + std::cerr << arr.size() << ", " << arr2.size() << std::endl; // NOLINT for (auto & elem : arr2) std::cerr << elem << std::endl; @@ -182,7 +182,7 @@ int main(int argc, char ** argv) } arr2 = std::move(arr1); - arr1.resize(n); + arr1.resize(n); // NOLINT std::cerr << "arr1.size(): " << arr1.size() << ", arr2.size(): " << arr2.size() << std::endl diff --git a/dbms/src/Common/tests/pod_array.cpp b/dbms/src/Common/tests/pod_array.cpp index 2a3093b3de7..de15b485411 100644 --- a/dbms/src/Common/tests/pod_array.cpp +++ b/dbms/src/Common/tests/pod_array.cpp @@ -409,7 +409,7 @@ static void test3() Array arr2{std::move(arr)}; - ASSERT_CHECK((arr.empty()), res); + ASSERT_CHECK((arr.empty()), res); // NOLINT ASSERT_CHECK((arr2.size() == 3), res); ASSERT_CHECK((arr2[0] == 1), res); @@ -428,7 +428,7 @@ static void test3() Array arr2{std::move(arr)}; - ASSERT_CHECK((arr.empty()), res); + ASSERT_CHECK((arr.empty()), res); // NOLINT ASSERT_CHECK((arr2.size() == 5), res); ASSERT_CHECK((arr2[0] == 1), res); diff --git a/dbms/src/Core/SettingsCollection.cpp b/dbms/src/Core/SettingsCollection.cpp index b830c35b81c..d45c082eb0b 100644 --- a/dbms/src/Core/SettingsCollection.cpp +++ b/dbms/src/Core/SettingsCollection.cpp @@ -465,7 +465,7 @@ void SettingURI::deserialize(ReadBuffer & buf, SettingsBinaryFormat) case static_cast(EnumType::NAME): return IO_NAME; #define IMPLEMENT_SETTING_ENUM_FROM_STRING_HELPER_(NAME, IO_NAME) \ - if (s == IO_NAME) \ + if (s == (IO_NAME)) \ { \ set(EnumType::NAME); \ return; \ @@ -474,7 +474,7 @@ void SettingURI::deserialize(ReadBuffer & buf, SettingsBinaryFormat) #define IMPLEMENT_SETTING_ENUM_CONCAT_NAMES_HELPER_(NAME, IO_NAME) \ if (!all_io_names.empty()) \ all_io_names += ", "; \ - all_io_names += String("'") + IO_NAME + "'"; + all_io_names += String("'") + (IO_NAME) + "'"; #define LOAD_BALANCING_LIST_OF_NAMES(M) \ diff --git a/dbms/src/Dictionaries/CacheDictionary.cpp b/dbms/src/Dictionaries/CacheDictionary.cpp index 2294c99c111..36a8c704f4f 100644 --- a/dbms/src/Dictionaries/CacheDictionary.cpp +++ b/dbms/src/Dictionaries/CacheDictionary.cpp @@ -447,8 +447,8 @@ CacheDictionary::Attribute CacheDictionary::createAttributeWithType(const Attrib { #define DISPATCH(TYPE) \ case AttributeUnderlyingType::ut##TYPE: \ - attr.null_values = TYPE(null_value.get>()); \ - attr.arrays = std::make_unique>(size); \ + attr.null_values = TYPE(null_value.get>()); /* NOLINT */ \ + attr.arrays = std::make_unique>(size); /* NOLINT */ \ bytes_allocated += size * sizeof(TYPE); \ break; DISPATCH(UInt8) diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_createAttributeWithType.cpp b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_createAttributeWithType.cpp index e15a6fb3014..ba9f8d014fd 100644 --- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_createAttributeWithType.cpp +++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_createAttributeWithType.cpp @@ -11,8 +11,8 @@ ComplexKeyCacheDictionary::createAttributeWithType(const AttributeUnderlyingType { #define DISPATCH(TYPE) \ case AttributeUnderlyingType::ut##TYPE: \ - attr.null_values = TYPE(null_value.get>()); \ - attr.arrays = std::make_unique>(size); \ + attr.null_values = TYPE(null_value.get>()); /* NOLINT */ \ + attr.arrays = std::make_unique>(size); /* NOLINT */ \ bytes_allocated += size * sizeof(TYPE); \ break; DISPATCH(UInt8) diff --git a/dbms/src/Dictionaries/HashedDictionary.cpp b/dbms/src/Dictionaries/HashedDictionary.cpp index 722a6e3584c..2bdd33a3d2e 100644 --- a/dbms/src/Dictionaries/HashedDictionary.cpp +++ b/dbms/src/Dictionaries/HashedDictionary.cpp @@ -446,7 +446,7 @@ void HashedDictionary::addAttributeSize(const Attribute & attribute) /** TODO: more accurate calculation */ bytes_allocated += sizeof(CollectionType); bytes_allocated += bucket_count; - bytes_allocated += map_ref->size() * sizeof(Key) * sizeof(T); + bytes_allocated += map_ref->size() * (sizeof(Key) + sizeof(T)); } } diff --git a/dbms/src/Dictionaries/RedisDictionarySource.cpp b/dbms/src/Dictionaries/RedisDictionarySource.cpp index c51e5cdadd4..3c5aaf4bb6b 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.cpp +++ b/dbms/src/Dictionaries/RedisDictionarySource.cpp @@ -183,7 +183,7 @@ namespace DB /// Do not store more than max_block_size values for one request. if (primary_with_secondary.size() == max_block_size + 1) { - hkeys.add(std::move(primary_with_secondary)); + hkeys.add(primary_with_secondary); primary_with_secondary.clear(); primary_with_secondary.addRedisType(key); } diff --git a/dbms/src/Formats/ProtobufReader.cpp b/dbms/src/Formats/ProtobufReader.cpp index 3874ec3e447..5426e8fac62 100644 --- a/dbms/src/Formats/ProtobufReader.cpp +++ b/dbms/src/Formats/ProtobufReader.cpp @@ -273,30 +273,35 @@ UInt64 ProtobufReader::SimpleReader::continueReadingVarint(UInt64 first_byte) char c; #define PROTOBUF_READER_READ_VARINT_BYTE(byteNo) \ - in.readStrict(c); \ - ++cursor; \ - if constexpr (byteNo < 10) \ + do \ { \ - result |= static_cast(static_cast(c)) << (7 * (byteNo - 1)); \ - if (likely(!(c & 0x80))) \ - return result; \ - } \ - else \ - { \ - if (likely(c == 1)) \ - return result; \ - } \ - if constexpr (byteNo < 9) \ - result &= ~(static_cast(0x80) << (7 * (byteNo - 1))); - PROTOBUF_READER_READ_VARINT_BYTE(2) - PROTOBUF_READER_READ_VARINT_BYTE(3) - PROTOBUF_READER_READ_VARINT_BYTE(4) - PROTOBUF_READER_READ_VARINT_BYTE(5) - PROTOBUF_READER_READ_VARINT_BYTE(6) - PROTOBUF_READER_READ_VARINT_BYTE(7) - PROTOBUF_READER_READ_VARINT_BYTE(8) - PROTOBUF_READER_READ_VARINT_BYTE(9) - PROTOBUF_READER_READ_VARINT_BYTE(10) + in.readStrict(c); \ + ++cursor; \ + if constexpr ((byteNo) < 10) \ + { \ + result |= static_cast(static_cast(c)) << (7 * ((byteNo) - 1)); \ + if (likely(!(c & 0x80))) \ + return result; \ + } \ + else \ + { \ + if (likely(c == 1)) \ + return result; \ + } \ + if constexpr ((byteNo) < 9) \ + result &= ~(static_cast(0x80) << (7 * ((byteNo) - 1))); \ + } while (false) + + PROTOBUF_READER_READ_VARINT_BYTE(2); + PROTOBUF_READER_READ_VARINT_BYTE(3); + PROTOBUF_READER_READ_VARINT_BYTE(4); + PROTOBUF_READER_READ_VARINT_BYTE(5); + PROTOBUF_READER_READ_VARINT_BYTE(6); + PROTOBUF_READER_READ_VARINT_BYTE(7); + PROTOBUF_READER_READ_VARINT_BYTE(8); + PROTOBUF_READER_READ_VARINT_BYTE(9); + PROTOBUF_READER_READ_VARINT_BYTE(10); + #undef PROTOBUF_READER_READ_VARINT_BYTE throwUnknownFormat(); @@ -307,28 +312,32 @@ void ProtobufReader::SimpleReader::ignoreVarint() char c; #define PROTOBUF_READER_IGNORE_VARINT_BYTE(byteNo) \ - in.readStrict(c); \ - ++cursor; \ - if constexpr (byteNo < 10) \ + do \ { \ - if (likely(!(c & 0x80))) \ - return; \ - } \ - else \ - { \ - if (likely(c == 1)) \ - return; \ - } - PROTOBUF_READER_IGNORE_VARINT_BYTE(1) - PROTOBUF_READER_IGNORE_VARINT_BYTE(2) - PROTOBUF_READER_IGNORE_VARINT_BYTE(3) - PROTOBUF_READER_IGNORE_VARINT_BYTE(4) - PROTOBUF_READER_IGNORE_VARINT_BYTE(5) - PROTOBUF_READER_IGNORE_VARINT_BYTE(6) - PROTOBUF_READER_IGNORE_VARINT_BYTE(7) - PROTOBUF_READER_IGNORE_VARINT_BYTE(8) - PROTOBUF_READER_IGNORE_VARINT_BYTE(9) - PROTOBUF_READER_IGNORE_VARINT_BYTE(10) + in.readStrict(c); \ + ++cursor; \ + if constexpr ((byteNo) < 10) \ + { \ + if (likely(!(c & 0x80))) \ + return; \ + } \ + else \ + { \ + if (likely(c == 1)) \ + return; \ + } \ + } while (false) + + PROTOBUF_READER_IGNORE_VARINT_BYTE(1); + PROTOBUF_READER_IGNORE_VARINT_BYTE(2); + PROTOBUF_READER_IGNORE_VARINT_BYTE(3); + PROTOBUF_READER_IGNORE_VARINT_BYTE(4); + PROTOBUF_READER_IGNORE_VARINT_BYTE(5); + PROTOBUF_READER_IGNORE_VARINT_BYTE(6); + PROTOBUF_READER_IGNORE_VARINT_BYTE(7); + PROTOBUF_READER_IGNORE_VARINT_BYTE(8); + PROTOBUF_READER_IGNORE_VARINT_BYTE(9); + PROTOBUF_READER_IGNORE_VARINT_BYTE(10); #undef PROTOBUF_READER_IGNORE_VARINT_BYTE throwUnknownFormat(); @@ -846,7 +855,7 @@ private: std::unique_ptr ProtobufReader::createConverter( \ const google::protobuf::FieldDescriptor * field) \ { \ - return std::make_unique>(simple_reader, field); \ + return std::make_unique>(simple_reader, field); /* NOLINT */ \ } PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_INT32, Int64); PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_SINT32, Int64); diff --git a/dbms/src/Functions/GeoUtils.cpp b/dbms/src/Functions/GeoUtils.cpp index 488a102e208..bbd942a9b0d 100644 --- a/dbms/src/Functions/GeoUtils.cpp +++ b/dbms/src/Functions/GeoUtils.cpp @@ -132,17 +132,17 @@ inline std::tuple split(const Encoded & combined, uint8_t prec lat.fill(0); lon.fill(0); - uint8_t i = 0; + size_t i = 0; for (; i < precision * BITS_PER_SYMBOL - 1; i += 2) { // longitude is even bits - lon[i/2] = combined[i]; - lat[i/2] = combined[i + 1]; + lon[i / 2] = combined[i]; + lat[i / 2] = combined[i + 1]; } // precision is even, read the last bit as lat. if (precision & 0x1) { - lon[i/2] = combined[precision * BITS_PER_SYMBOL - 1]; + lon[i / 2] = combined[precision * BITS_PER_SYMBOL - 1]; } return std::tie(lon, lat); @@ -152,7 +152,7 @@ inline void base32Encode(const Encoded & binary, uint8_t precision, char * out) { extern const char geohash_base32_encode_lookup_table[32]; - for (uint8_t i = 0; i < precision * BITS_PER_SYMBOL; i += BITS_PER_SYMBOL) + for (size_t i = 0; i < precision * BITS_PER_SYMBOL; i += BITS_PER_SYMBOL) { uint8_t v = binary[i]; v <<= 1; diff --git a/dbms/src/Functions/array/arrayUniq.cpp b/dbms/src/Functions/array/arrayUniq.cpp index d5aedb20883..d94efc47970 100644 --- a/dbms/src/Functions/array/arrayUniq.cpp +++ b/dbms/src/Functions/array/arrayUniq.cpp @@ -214,7 +214,7 @@ void FunctionArrayUniq::executeMethodImpl( for (ColumnArray::Offset j = prev_off; j < off; ++j) { if constexpr (has_null_map) - { + { // NOLINT if ((*null_map)[j]) { found_null = true; diff --git a/dbms/src/Functions/trim.cpp b/dbms/src/Functions/trim.cpp index 46f69530005..f674afbd310 100644 --- a/dbms/src/Functions/trim.cpp +++ b/dbms/src/Functions/trim.cpp @@ -79,14 +79,14 @@ private: const char * char_end = char_data + size; if constexpr (mode::trim_left) - { + { // NOLINT const char * found = find_first_not_symbols<' '>(char_data, char_end); size_t num_chars = found - char_data; char_data += num_chars; } if constexpr (mode::trim_right) - { + { // NOLINT const char * found = find_last_not_symbols_or_null<' '>(char_data, char_end); if (found) char_end = found + 1; diff --git a/dbms/src/IO/parseDateTimeBestEffort.cpp b/dbms/src/IO/parseDateTimeBestEffort.cpp index 24d05f73aa0..6e747b13b3f 100644 --- a/dbms/src/IO/parseDateTimeBestEffort.cpp +++ b/dbms/src/IO/parseDateTimeBestEffort.cpp @@ -68,7 +68,7 @@ inline void readDecimalNumber(T & res, const char * src) template inline void readDecimalNumber(T & res, size_t num_digits, const char * src) { -#define READ_DECIMAL_NUMBER(N) res *= common::exp10_i32(N); readDecimalNumber(res, src); src += N; num_digits -= N; break +#define READ_DECIMAL_NUMBER(N) do { res *= common::exp10_i32(N); readDecimalNumber(res, src); src += (N); num_digits -= (N); } while (false) while (num_digits) { @@ -77,7 +77,7 @@ inline void readDecimalNumber(T & res, size_t num_digits, const char * src) case 3: READ_DECIMAL_NUMBER(3); break; case 2: READ_DECIMAL_NUMBER(2); break; case 1: READ_DECIMAL_NUMBER(1); break; - default: READ_DECIMAL_NUMBER(4); + default: READ_DECIMAL_NUMBER(4); break; } } #undef DECIMAL_NUMBER_CASE diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 0ab4949371b..1a40b7cefc3 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -82,8 +82,8 @@ void AggregatedDataVariants::convertToTwoLevel() { #define M(NAME) \ case Type::NAME: \ - NAME ## _two_level = std::make_unique(*NAME); \ - NAME.reset(); \ + NAME ## _two_level = std::make_unique(*(NAME)); \ + (NAME).reset(); \ type = Type::NAME ## _two_level; \ break; diff --git a/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp b/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp index f384e005e3c..81a093f4eae 100644 --- a/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -102,7 +102,7 @@ static QueryDescriptors extractQueriesExceptMeAndCheckAccess(const Block & proce res.emplace_back(std::move(query_id), std::move(query_user), i, false); } - if (res.empty() && !query_user.empty()) + if (res.empty() && !query_user.empty()) // NOLINT throw Exception("User " + my_client.current_user + " attempts to kill query created by " + query_user, ErrorCodes::ACCESS_DENIED); return res; diff --git a/dbms/src/Interpreters/SetVariants.cpp b/dbms/src/Interpreters/SetVariants.cpp index 56f2ff04230..52f54d2442a 100644 --- a/dbms/src/Interpreters/SetVariants.cpp +++ b/dbms/src/Interpreters/SetVariants.cpp @@ -23,7 +23,7 @@ void SetVariantsTemplate::init(Type type_) case Type::EMPTY: break; #define M(NAME) \ - case Type::NAME: NAME = std::make_unique(); break; + case Type::NAME: (NAME) = std::make_unique(); break; APPLY_FOR_SET_VARIANTS(M) #undef M } @@ -37,7 +37,7 @@ size_t SetVariantsTemplate::getTotalRowCount() const case Type::EMPTY: return 0; #define M(NAME) \ - case Type::NAME: return NAME->data.size(); + case Type::NAME: return (NAME)->data.size(); APPLY_FOR_SET_VARIANTS(M) #undef M } @@ -53,7 +53,7 @@ size_t SetVariantsTemplate::getTotalByteCount() const case Type::EMPTY: return 0; #define M(NAME) \ - case Type::NAME: return NAME->data.getBufferSizeInBytes(); + case Type::NAME: return (NAME)->data.getBufferSizeInBytes(); APPLY_FOR_SET_VARIANTS(M) #undef M } diff --git a/dbms/src/Interpreters/tests/hash_map_string_2.cpp b/dbms/src/Interpreters/tests/hash_map_string_2.cpp index 32b723c1187..8e13ee46e6d 100644 --- a/dbms/src/Interpreters/tests/hash_map_string_2.cpp +++ b/dbms/src/Interpreters/tests/hash_map_string_2.cpp @@ -54,16 +54,16 @@ struct STRUCT : public StringRef {}; \ namespace ZeroTraits \ { \ template <> \ - inline bool check(STRUCT x) { return 0 == x.size; } \ + inline bool check(STRUCT x) { return 0 == x.size; } /* NOLINT */ \ \ template <> \ - inline void set(STRUCT & x) { x.size = 0; } \ + inline void set(STRUCT & x) { x.size = 0; } /* NOLINT */ \ } \ \ template <> \ struct DefaultHash \ { \ - size_t operator() (STRUCT x) const \ + size_t operator() (STRUCT x) const /* NOLINT */ \ { \ return CityHash_v1_0_2::CityHash64(x.data, x.size); \ } \ diff --git a/dbms/src/Interpreters/tests/hash_map_string_3.cpp b/dbms/src/Interpreters/tests/hash_map_string_3.cpp index 62ed0584d3f..cc21129a6a6 100644 --- a/dbms/src/Interpreters/tests/hash_map_string_3.cpp +++ b/dbms/src/Interpreters/tests/hash_map_string_3.cpp @@ -57,10 +57,10 @@ struct STRUCT : public StringRef {}; \ namespace ZeroTraits \ { \ template <> \ - inline bool check(STRUCT x) { return nullptr == x.data; } \ + inline bool check(STRUCT x) { return nullptr == x.data; } /* NOLINT */ \ \ template <> \ - inline void set(STRUCT & x) { x.data = nullptr; } \ + inline void set(STRUCT & x) { x.data = nullptr; } /* NOLINT */ \ } \ \ template <> \ diff --git a/dbms/src/Parsers/ASTTablesInSelectQuery.cpp b/dbms/src/Parsers/ASTTablesInSelectQuery.cpp index b085f5a28ae..60cb0475be7 100644 --- a/dbms/src/Parsers/ASTTablesInSelectQuery.cpp +++ b/dbms/src/Parsers/ASTTablesInSelectQuery.cpp @@ -11,7 +11,7 @@ do \ { \ if (member) \ { \ - res->member = member->clone(); \ + res->member = (member)->clone(); \ res->children.push_back(res->member); \ } \ } \ diff --git a/dbms/src/Parsers/CommonParsers.cpp b/dbms/src/Parsers/CommonParsers.cpp index ddbf1b17966..47868f5df48 100644 --- a/dbms/src/Parsers/CommonParsers.cpp +++ b/dbms/src/Parsers/CommonParsers.cpp @@ -50,7 +50,7 @@ bool ParserKeyword::parseImpl(Pos & pos, ASTPtr & /*node*/, Expected & expected) if (word_length != pos->size()) return false; - if (strncasecmp(pos->begin, current_word, word_length)) + if (0 != strncasecmp(pos->begin, current_word, word_length)) return false; ++pos; diff --git a/dbms/src/Processors/ForkProcessor.cpp b/dbms/src/Processors/ForkProcessor.cpp index 913e7c2d1c7..7fa21c4236d 100644 --- a/dbms/src/Processors/ForkProcessor.cpp +++ b/dbms/src/Processors/ForkProcessor.cpp @@ -65,7 +65,7 @@ ForkProcessor::Status ForkProcessor::prepare() { ++num_processed_outputs; if (num_processed_outputs == num_active_outputs) - output.push(std::move(data)); /// Can push because no full or unneeded outputs. + output.push(std::move(data)); // NOLINT Can push because no full or unneeded outputs. else output.push(data.clone()); } diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp index 1c988840abf..002f072f004 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.cpp +++ b/dbms/src/Storages/Kafka/StorageKafka.cpp @@ -431,7 +431,7 @@ void registerStorageKafka(StorageFactory & factory) // Check arguments and settings #define CHECK_KAFKA_STORAGE_ARGUMENT(ARG_NUM, PAR_NAME) \ /* One of the four required arguments is not specified */ \ - if (args_count < ARG_NUM && ARG_NUM <= 4 && \ + if (args_count < (ARG_NUM) && (ARG_NUM) <= 4 && \ !kafka_settings.PAR_NAME.changed) \ { \ throw Exception( \ @@ -442,7 +442,7 @@ void registerStorageKafka(StorageFactory & factory) /* The same argument is given in two places */ \ if (has_settings && \ kafka_settings.PAR_NAME.changed && \ - args_count >= ARG_NUM) \ + args_count >= (ARG_NUM)) \ { \ throw Exception( \ "The argument №" #ARG_NUM " of storage Kafka " \ diff --git a/dbms/src/Storages/LiveView/StorageLiveView.cpp b/dbms/src/Storages/LiveView/StorageLiveView.cpp index 1faa5e04dc9..29eb896bcee 100644 --- a/dbms/src/Storages/LiveView/StorageLiveView.cpp +++ b/dbms/src/Storages/LiveView/StorageLiveView.cpp @@ -323,7 +323,7 @@ ASTPtr StorageLiveView::getInnerBlocksQuery() /// Rewrite inner query with right aliases for JOIN. /// It cannot be done in constructor or startup() because InterpreterSelectQuery may access table, /// which is not loaded yet during server startup, so we do it lazily - InterpreterSelectQuery(inner_blocks_query, *live_view_context, SelectQueryOptions().modify().analyze()); + InterpreterSelectQuery(inner_blocks_query, *live_view_context, SelectQueryOptions().modify().analyze()); // NOLINT auto table_id = getStorageID(); extractDependentTable(inner_blocks_query, global_context, table_id.table_name, inner_subquery); } diff --git a/dbms/src/Storages/MergeTree/KeyCondition.cpp b/dbms/src/Storages/MergeTree/KeyCondition.cpp index c74ca41f054..e994d254958 100644 --- a/dbms/src/Storages/MergeTree/KeyCondition.cpp +++ b/dbms/src/Storages/MergeTree/KeyCondition.cpp @@ -516,7 +516,7 @@ void KeyCondition::traverseAST(const ASTPtr & node, const Context & context, Blo * - in this case `n - 1` elements are added (where `n` is the number of arguments). */ if (i != 0 || element.function == RPNElement::FUNCTION_NOT) - rpn.emplace_back(std::move(element)); + rpn.emplace_back(element); } return; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 2b02aad8970..0b87b241d85 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1091,8 +1091,10 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( { auto merged_processor = std::make_shared(header, pipes.size(), sort_description, max_block_size); - pipes.emplace_back(std::move(pipes), std::move(merged_processor)); - break; + Pipe pipe(std::move(pipes), std::move(merged_processor)); + pipes = Pipes(); + pipes.emplace_back(std::move(pipe)); + return pipes; } case MergeTreeData::MergingParams::Collapsing: diff --git a/dbms/src/Storages/MergeTree/MergeTreeSettings.cpp b/dbms/src/Storages/MergeTree/MergeTreeSettings.cpp index 93f5ff20045..5c4113c1565 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -70,7 +70,7 @@ void MergeTreeSettings::loadFromQuery(ASTStorage & storage_def) if (std::find_if(changes.begin(), changes.end(), \ [](const SettingChange & c) { return c.name == #NAME; }) \ == changes.end()) \ - changes.push_back(SettingChange{#NAME, NAME.value}); + changes.push_back(SettingChange{#NAME, (NAME).value}); APPLY_FOR_IMMUTABLE_MERGE_TREE_SETTINGS(ADD_IF_ABSENT) #undef ADD_IF_ABSENT diff --git a/dbms/src/Storages/StorageInMemoryMetadata.cpp b/dbms/src/Storages/StorageInMemoryMetadata.cpp index 86c6551896a..a05872234de 100644 --- a/dbms/src/Storages/StorageInMemoryMetadata.cpp +++ b/dbms/src/Storages/StorageInMemoryMetadata.cpp @@ -35,11 +35,13 @@ StorageInMemoryMetadata::StorageInMemoryMetadata(const StorageInMemoryMetadata & StorageInMemoryMetadata & StorageInMemoryMetadata::operator=(const StorageInMemoryMetadata & other) { + if (this == &other) + return *this; + columns = other.columns; indices = other.indices; constraints = other.constraints; - if (other.partition_by_ast) partition_by_ast = other.partition_by_ast->clone(); else From 93466ce0971f5aa92e3eb2b5ce79af4ec8c74139 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 18 Mar 2020 06:27:32 +0300 Subject: [PATCH 069/115] Added even more clang-tidy checks --- .clang-tidy | 37 ++++- base/common/JSON.cpp | 2 +- base/common/getMemoryAmount.cpp | 8 +- base/common/tests/date_lut2.cpp | 2 +- base/common/tests/date_lut3.cpp | 2 +- base/common/tests/date_lut4.cpp | 2 +- .../tests/date_lut_default_timezone.cpp | 2 +- base/common/tests/date_lut_init.cpp | 2 +- base/common/tests/gtest_strong_typedef.cpp | 2 +- base/daemon/BaseDaemon.cpp | 6 +- dbms/programs/copier/ClusterCopier.cpp | 2 +- dbms/programs/copier/ClusterCopier.h | 3 +- dbms/programs/obfuscator/Obfuscator.cpp | 16 +- .../performance-test/PerformanceTest.cpp | 2 - .../performance-test/ReportBuilder.cpp | 2 +- dbms/programs/performance-test/TestStats.cpp | 4 +- .../AggregateFunctionGroupUniqArray.cpp | 6 +- dbms/src/Client/ConnectionPoolWithFailover.h | 4 +- dbms/src/Client/MultiplexedConnections.cpp | 6 +- dbms/src/Common/Config/ConfigProcessor.cpp | 2 +- dbms/src/Common/DNSResolver.cpp | 4 +- dbms/src/Common/Exception.cpp | 6 +- dbms/src/Common/OpenSSLHelpers.cpp | 2 +- dbms/src/Common/ProfileEvents.h | 2 +- dbms/src/Common/SymbolIndex.cpp | 4 +- dbms/src/Common/ThreadPool.cpp | 4 +- dbms/src/Common/ThreadPool.h | 2 +- dbms/src/Common/ZooKeeper/TestKeeper.cpp | 10 +- dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp | 10 +- .../ZooKeeper/tests/zkutil_test_async.cpp | 1 + dbms/src/Common/tests/CMakeLists.txt | 3 - dbms/src/Common/tests/auto_array.cpp | 9 +- dbms/src/Common/tests/cow_columns.cpp | 2 +- dbms/src/Common/tests/cow_compositions.cpp | 4 +- .../tests/gtest_sensitive_data_masker.cpp | 2 +- dbms/src/Common/tests/int_hashes_perf.cpp | 4 +- .../tests/integer_hash_tables_and_hashes.cpp | 4 +- .../src/Common/tests/parallel_aggregation.cpp | 6 +- .../Common/tests/parallel_aggregation2.cpp | 6 +- dbms/src/Common/tests/radix_sort.cpp | 5 +- dbms/src/Common/tests/sip_hash.cpp | 154 ------------------ .../Compression/CompressionCodecMultiple.cpp | 2 +- .../tests/gtest_compressionCodec.cpp | 10 +- dbms/src/Core/tests/string_pool.cpp | 2 +- .../AddingDefaultsBlockInputStream.cpp | 2 +- .../tests/finish_sorting_stream.cpp | 10 +- dbms/src/DataTypes/DataTypeLowCardinality.cpp | 2 +- dbms/src/Databases/DatabaseDictionary.h | 2 +- dbms/src/Databases/DatabaseLazy.h | 2 +- dbms/src/Databases/DatabaseMySQL.h | 2 +- dbms/src/Databases/DatabaseWithDictionaries.h | 4 +- dbms/src/Databases/DatabasesCommon.h | 2 +- .../Dictionaries/RedisBlockInputStream.cpp | 34 ++-- dbms/src/Disks/DiskLocal.h | 16 +- dbms/src/Disks/DiskMemory.h | 16 +- dbms/src/Disks/DiskS3.cpp | 23 ++- dbms/src/Disks/DiskS3.h | 16 +- dbms/src/Formats/ProtobufSchemas.cpp | 2 +- dbms/src/Functions/CRC.cpp | 2 +- dbms/src/Functions/FunctionHelpers.cpp | 2 +- dbms/src/Functions/FunctionsLogical.cpp | 8 +- dbms/src/Functions/FunctionsStringRegex.cpp | 8 +- dbms/src/Functions/GeoUtils.cpp | 4 +- dbms/src/Functions/array/array.cpp | 2 +- dbms/src/Functions/array/arrayConcat.cpp | 2 +- dbms/src/Functions/array/arrayElement.cpp | 2 +- dbms/src/Functions/array/arrayIntersect.cpp | 2 +- dbms/src/Functions/array/arrayPushBack.cpp | 2 +- dbms/src/Functions/array/arrayPushFront.cpp | 2 +- dbms/src/Functions/array/arrayResize.cpp | 2 +- dbms/src/Functions/array/arraySort.cpp | 2 +- dbms/src/Functions/array/hasAll.cpp | 2 +- dbms/src/Functions/array/hasAny.cpp | 2 +- dbms/src/Functions/array/range.cpp | 2 +- dbms/src/Functions/bitCount.cpp | 2 +- dbms/src/Functions/caseWithExpression.cpp | 2 +- dbms/src/Functions/coalesce.cpp | 2 +- dbms/src/Functions/concat.cpp | 2 +- dbms/src/Functions/evalMLMethod.cpp | 2 +- dbms/src/Functions/formatDateTime.cpp | 2 +- dbms/src/Functions/getMacro.cpp | 2 +- dbms/src/Functions/getScalar.cpp | 2 +- dbms/src/Functions/if.cpp | 2 +- dbms/src/Functions/ifNotFinite.cpp | 2 +- dbms/src/Functions/ifNull.cpp | 2 +- dbms/src/Functions/multiIf.cpp | 2 +- dbms/src/Functions/neighbor.cpp | 2 +- dbms/src/Functions/nullIf.cpp | 2 +- dbms/src/IO/AIO.cpp | 4 +- dbms/src/IO/ReadBufferFromHDFS.cpp | 2 +- dbms/src/IO/ReadHelpers.cpp | 6 +- dbms/src/IO/S3Common.cpp | 2 +- dbms/src/IO/WriteBufferFromHDFS.cpp | 2 +- .../tests/gtest_aio_seek_back_after_eof.cpp | 4 +- dbms/src/IO/tests/gtest_bit_io.cpp | 4 +- dbms/src/IO/tests/hashing_read_buffer.cpp | 6 +- dbms/src/IO/tests/hashing_write_buffer.cpp | 17 +- dbms/src/IO/tests/parse_int_perf.cpp | 2 +- dbms/src/IO/tests/ryu_test.cpp | 4 +- dbms/src/IO/tests/valid_utf8_perf.cpp | 2 +- dbms/src/Interpreters/ActionsVisitor.cpp | 9 +- dbms/src/Interpreters/CatBoostModel.cpp | 2 +- .../Interpreters/CrossToInnerJoinVisitor.cpp | 2 +- dbms/src/Interpreters/DDLWorker.cpp | 2 +- dbms/src/Interpreters/ExpressionJIT.cpp | 2 +- dbms/src/Interpreters/Join.h | 2 +- dbms/src/Interpreters/JoinSwitcher.h | 2 +- .../JoinToSubqueryTransformVisitor.cpp | 2 +- dbms/src/Interpreters/MergeJoin.h | 2 +- dbms/src/Interpreters/QueryNormalizer.cpp | 4 +- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 1 + .../TranslateQualifiedNamesVisitor.cpp | 2 +- dbms/src/Interpreters/tests/aggregate.cpp | 2 +- dbms/src/Interpreters/tests/expression.cpp | 2 +- dbms/src/Interpreters/tests/hash_map.cpp | 16 +- .../Interpreters/tests/hash_map_lookup.cpp | 4 +- .../Interpreters/tests/hash_map_string.cpp | 4 +- .../Interpreters/tests/hash_map_string_2.cpp | 4 +- .../Interpreters/tests/hash_map_string_3.cpp | 4 +- .../tests/hash_map_string_small.cpp | 4 +- .../Interpreters/tests/string_hash_map.cpp | 4 +- .../Interpreters/tests/two_level_hash_map.cpp | 2 +- .../Formats/Impl/AvroRowInputFormat.cpp | 6 +- .../Formats/Impl/AvroRowOutputFormat.cpp | 2 +- dbms/src/Processors/tests/processors_test.cpp | 2 +- .../tests/processors_test_chain.cpp | 2 +- .../tests/processors_test_expand_pipeline.cpp | 4 +- .../tests/processors_test_merge.cpp | 2 +- ...ocessors_test_merging_sorted_transform.cpp | 2 +- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 7 +- .../MergeTree/MergeTreeDataMergerMutator.cpp | 2 +- .../MergeTree/MergeTreeDataPartCompact.h | 6 +- .../MergeTree/MergeTreeDataPartWide.h | 6 +- .../MergeTreeDataPartWriterCompact.h | 6 +- .../MergeTree/MergeTreeDataPartWriterWide.h | 6 +- .../MergeTree/MergeTreePartsMover.cpp | 2 +- dbms/src/Storages/StorageDistributed.cpp | 2 +- dbms/src/Storages/StorageJoin.cpp | 2 +- .../TableFunctions/TableFunctionRemote.cpp | 1 + 139 files changed, 334 insertions(+), 436 deletions(-) delete mode 100644 dbms/src/Common/tests/sip_hash.cpp diff --git a/.clang-tidy b/.clang-tidy index 7dd495237a7..d906af2f9e3 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -1,9 +1,14 @@ Checks: '-*, - google-readability-avoid-underscore-in-googletest-name, - misc-throw-by-value-catch-by-reference, misc-misplaced-const, misc-unconventional-assign-operator, + misc-redundant-expression, + misc-static-assert, + misc-unconventional-assign-operator, + misc-uniqueptr-reset-release, + misc-unused-alias-decls, + misc-unused-parameters, + misc-unused-using-decls, modernize-avoid-bind, modernize-loop-convert, @@ -21,6 +26,13 @@ Checks: '-*, performance-faster-string-find, performance-for-range-copy, + performance-implicit-conversion-in-loop, + performance-inefficient-algorithm, + performance-inefficient-vector-operation, + performance-move-constructor-init, + performance-no-automatic-move, + performance-trivially-destructible, + performance-unnecessary-copy-initialization, readability-avoid-const-params-in-decls, readability-const-return-type, @@ -90,6 +102,27 @@ Checks: '-*, bugprone-use-after-move, bugprone-virtual-near-miss, + cert-dcl21-cpp, + cert-dcl50-cpp, + cert-env33-c, + cert-err34-c, + cert-err52-cpp, + cert-flp30-c, + cert-mem57-cpp, + cert-msc50-cpp, + cert-oop58-cpp, + + google-build-explicit-make-pair, + google-build-namespaces, + google-default-arguments, + google-explicit-constructor, + google-readability-casting, + google-readability-avoid-underscore-in-googletest-name, + google-runtime-int, + google-runtime-operator, + + hicpp-exception-baseclass, + boost-use-to-string, ' WarningsAsErrors: '*' diff --git a/base/common/JSON.cpp b/base/common/JSON.cpp index 92de0dc2d25..209616b3795 100644 --- a/base/common/JSON.cpp +++ b/base/common/JSON.cpp @@ -776,7 +776,7 @@ JSON::iterator & JSON::iterator::operator++() return *this; } -JSON::iterator JSON::iterator::operator++(int) +JSON::iterator JSON::iterator::operator++(int) // NOLINT { iterator copy(*this); ++*this; diff --git a/base/common/getMemoryAmount.cpp b/base/common/getMemoryAmount.cpp index d1a7907b152..5e600a37351 100644 --- a/base/common/getMemoryAmount.cpp +++ b/base/common/getMemoryAmount.cpp @@ -69,13 +69,13 @@ uint64_t getMemoryAmountOrZero() #elif defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE) /* FreeBSD, Linux, OpenBSD, and Solaris. -------------------- */ - return (uint64_t)sysconf(_SC_PHYS_PAGES) - * (uint64_t)sysconf(_SC_PAGESIZE); + return uint64_t(sysconf(_SC_PHYS_PAGES)) + *uint64_t(sysconf(_SC_PAGESIZE)); #elif defined(_SC_PHYS_PAGES) && defined(_SC_PAGE_SIZE) /* Legacy. -------------------------------------------------- */ - return (uint64_t)sysconf(_SC_PHYS_PAGES) - * (uint64_t)sysconf(_SC_PAGE_SIZE); + return uint64_t(sysconf(_SC_PHYS_PAGES)) + * uint64_t(sysconf(_SC_PAGE_SIZE)); #elif defined(CTL_HW) && (defined(HW_PHYSMEM) || defined(HW_REALMEM)) /* DragonFly BSD, FreeBSD, NetBSD, OpenBSD, and OSX. -------- */ diff --git a/base/common/tests/date_lut2.cpp b/base/common/tests/date_lut2.cpp index f1a106a16ca..6dcf5e8adf2 100644 --- a/base/common/tests/date_lut2.cpp +++ b/base/common/tests/date_lut2.cpp @@ -43,7 +43,7 @@ void loop(time_t begin, time_t end, int step) } -int main(int argc, char ** argv) +int main(int, char **) { loop(orderedIdentifierToDate(20101031), orderedIdentifierToDate(20101101), 15 * 60); loop(orderedIdentifierToDate(20100328), orderedIdentifierToDate(20100330), 15 * 60); diff --git a/base/common/tests/date_lut3.cpp b/base/common/tests/date_lut3.cpp index c2e4c7ccf8d..411765d2b2a 100644 --- a/base/common/tests/date_lut3.cpp +++ b/base/common/tests/date_lut3.cpp @@ -53,7 +53,7 @@ void loop(time_t begin, time_t end, int step) } -int main(int argc, char ** argv) +int main(int, char **) { loop(orderedIdentifierToDate(20101031), orderedIdentifierToDate(20101101), 15 * 60); loop(orderedIdentifierToDate(20100328), orderedIdentifierToDate(20100330), 15 * 60); diff --git a/base/common/tests/date_lut4.cpp b/base/common/tests/date_lut4.cpp index 50c3ef4e3d3..a82ec25f183 100644 --- a/base/common/tests/date_lut4.cpp +++ b/base/common/tests/date_lut4.cpp @@ -2,7 +2,7 @@ #include -int main(int argc, char ** argv) +int main(int, char **) { /** В DateLUT был глюк - для времён из дня 1970-01-01, возвращался номер часа больше 23. */ static const time_t TIME = 66130; diff --git a/base/common/tests/date_lut_default_timezone.cpp b/base/common/tests/date_lut_default_timezone.cpp index aeefae3c9e0..b8e5aa08931 100644 --- a/base/common/tests/date_lut_default_timezone.cpp +++ b/base/common/tests/date_lut_default_timezone.cpp @@ -2,7 +2,7 @@ #include #include -int main(int argc, char ** argv) +int main(int, char **) { try { diff --git a/base/common/tests/date_lut_init.cpp b/base/common/tests/date_lut_init.cpp index 3b03e36b02d..48f0d6063c7 100644 --- a/base/common/tests/date_lut_init.cpp +++ b/base/common/tests/date_lut_init.cpp @@ -1,7 +1,7 @@ #include /// Позволяет проверить время инициализации DateLUT. -int main(int argc, char ** argv) +int main(int, char **) { DateLUT::instance(); return 0; diff --git a/base/common/tests/gtest_strong_typedef.cpp b/base/common/tests/gtest_strong_typedef.cpp index 6bf2f1eaad1..8ee9535ce81 100644 --- a/base/common/tests/gtest_strong_typedef.cpp +++ b/base/common/tests/gtest_strong_typedef.cpp @@ -54,7 +54,7 @@ TEST(StrongTypedefSuite, NoDefaultCtor) { struct NoDefaultCtor { - NoDefaultCtor(int i) {} + NoDefaultCtor(int) {} // NOLINT }; STRONG_TYPEDEF(NoDefaultCtor, MyStruct); diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index 41c705f41c2..0434a2c2fb7 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -99,12 +99,12 @@ static void writeSignalIDtoSignalPipe(int sig) } /** Signal handler for HUP / USR1 */ -static void closeLogsSignalHandler(int sig, siginfo_t * info, void * context) +static void closeLogsSignalHandler(int sig, siginfo_t *, void *) { writeSignalIDtoSignalPipe(sig); } -static void terminateRequestedSignalHandler(int sig, siginfo_t * info, void * context) +static void terminateRequestedSignalHandler(int sig, siginfo_t *, void *) { writeSignalIDtoSignalPipe(sig); } @@ -404,7 +404,7 @@ std::string instructionFailToString(InstructionFail fail) sigjmp_buf jmpbuf; -void sigIllCheckHandler(int sig, siginfo_t * info, void * context) +void sigIllCheckHandler(int, siginfo_t *, void *) { siglongjmp(jmpbuf, 1); } diff --git a/dbms/programs/copier/ClusterCopier.cpp b/dbms/programs/copier/ClusterCopier.cpp index d59a895af41..4431362913d 100644 --- a/dbms/programs/copier/ClusterCopier.cpp +++ b/dbms/programs/copier/ClusterCopier.cpp @@ -1181,7 +1181,7 @@ String ClusterCopier::getRemoteCreateTable(const DatabaseAndTableName & table, C ASTPtr ClusterCopier::getCreateTableForPullShard(const ConnectionTimeouts & timeouts, TaskShard & task_shard) { /// Fetch and parse (possibly) new definition - auto connection_entry = task_shard.info.pool->get(timeouts, &task_cluster->settings_pull); + auto connection_entry = task_shard.info.pool->get(timeouts, &task_cluster->settings_pull, true); String create_query_pull_str = getRemoteCreateTable( task_shard.task_table.table_pull, *connection_entry, diff --git a/dbms/programs/copier/ClusterCopier.h b/dbms/programs/copier/ClusterCopier.h index cdb06185992..2b7b4fff9e2 100644 --- a/dbms/programs/copier/ClusterCopier.h +++ b/dbms/programs/copier/ClusterCopier.h @@ -114,8 +114,7 @@ protected: Connection & connection, const Settings * settings = nullptr); - ASTPtr getCreateTableForPullShard(const ConnectionTimeouts & timeouts, - TaskShard & task_shard); + ASTPtr getCreateTableForPullShard(const ConnectionTimeouts & timeouts, TaskShard & task_shard); void createShardInternalTables(const ConnectionTimeouts & timeouts, TaskShard & task_shard, diff --git a/dbms/programs/obfuscator/Obfuscator.cpp b/dbms/programs/obfuscator/Obfuscator.cpp index a92a0d03287..0352eba0a0a 100644 --- a/dbms/programs/obfuscator/Obfuscator.cpp +++ b/dbms/programs/obfuscator/Obfuscator.cpp @@ -181,7 +181,7 @@ private: UInt64 seed; public: - UnsignedIntegerModel(UInt64 seed_) : seed(seed_) {} + explicit UnsignedIntegerModel(UInt64 seed_) : seed(seed_) {} void train(const IColumn &) override {} void finalize() override {} @@ -222,7 +222,7 @@ private: UInt64 seed; public: - SignedIntegerModel(UInt64 seed_) : seed(seed_) {} + explicit SignedIntegerModel(UInt64 seed_) : seed(seed_) {} void train(const IColumn &) override {} void finalize() override {} @@ -271,7 +271,7 @@ private: Float res_prev_value = 0; public: - FloatModel(UInt64 seed_) : seed(seed_) {} + explicit FloatModel(UInt64 seed_) : seed(seed_) {} void train(const IColumn &) override {} void finalize() override {} @@ -372,7 +372,7 @@ private: UInt64 seed; public: - FixedStringModel(UInt64 seed_) : seed(seed_) {} + explicit FixedStringModel(UInt64 seed_) : seed(seed_) {} void train(const IColumn &) override {} void finalize() override {} @@ -414,7 +414,7 @@ private: const DateLUTImpl & date_lut; public: - DateTimeModel(UInt64 seed_) : seed(seed_), date_lut(DateLUT::instance()) {} + explicit DateTimeModel(UInt64 seed_) : seed(seed_), date_lut(DateLUT::instance()) {} void train(const IColumn &) override {} void finalize() override {} @@ -567,7 +567,7 @@ private: } public: - MarkovModel(MarkovModelParameters params_) + explicit MarkovModel(MarkovModelParameters params_) : params(std::move(params_)), code_points(params.order, BEGIN) {} void consume(const char * data, size_t size) @@ -836,7 +836,7 @@ private: ModelPtr nested_model; public: - ArrayModel(ModelPtr nested_model_) : nested_model(std::move(nested_model_)) {} + explicit ArrayModel(ModelPtr nested_model_) : nested_model(std::move(nested_model_)) {} void train(const IColumn & column) override { @@ -874,7 +874,7 @@ private: ModelPtr nested_model; public: - NullableModel(ModelPtr nested_model_) : nested_model(std::move(nested_model_)) {} + explicit NullableModel(ModelPtr nested_model_) : nested_model(std::move(nested_model_)) {} void train(const IColumn & column) override { diff --git a/dbms/programs/performance-test/PerformanceTest.cpp b/dbms/programs/performance-test/PerformanceTest.cpp index c71760a1e58..53adab75fc4 100644 --- a/dbms/programs/performance-test/PerformanceTest.cpp +++ b/dbms/programs/performance-test/PerformanceTest.cpp @@ -52,8 +52,6 @@ void waitQuery(Connection & connection) } } -namespace fs = std::filesystem; - PerformanceTest::PerformanceTest( const XMLConfigurationPtr & config_, Connection & connection_, diff --git a/dbms/programs/performance-test/ReportBuilder.cpp b/dbms/programs/performance-test/ReportBuilder.cpp index 0bb4f3fdb6f..87f2e759f95 100644 --- a/dbms/programs/performance-test/ReportBuilder.cpp +++ b/dbms/programs/performance-test/ReportBuilder.cpp @@ -119,7 +119,7 @@ std::string ReportBuilder::buildFullReport( if (statistics.sampler.size() != 0) { JSONString quantiles(4); /// here, 4 is the size of \t padding - for (double percent = 10; percent <= 90; percent += 10) + for (int percent = 10; percent <= 90; percent += 10) { std::string quantile_key = std::to_string(percent / 100.0); while (quantile_key.back() == '0') diff --git a/dbms/programs/performance-test/TestStats.cpp b/dbms/programs/performance-test/TestStats.cpp index 5268f8bb328..024130ce4d2 100644 --- a/dbms/programs/performance-test/TestStats.cpp +++ b/dbms/programs/performance-test/TestStats.cpp @@ -17,9 +17,9 @@ std::string TestStats::getStatisticByName(const std::string & statistic_name) { std::string result = "\n"; - for (double percent = 10; percent <= 90; percent += 10) + for (int percent = 10; percent <= 90; percent += 10) { - result += FOUR_SPACES + std::to_string((percent / 100)); + result += FOUR_SPACES + std::to_string((percent / 100.0)); result += ": " + std::to_string(sampler.quantileInterpolated(percent / 100.0)); result += "\n"; } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp b/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp index e7d6ea2528c..64a8200aa52 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp @@ -25,7 +25,8 @@ template class AggregateFunctionGroupUniqArrayDate : public AggregateFunctionGroupUniqArray { public: - AggregateFunctionGroupUniqArrayDate(const DataTypePtr & argument_type, UInt64 max_elems_ = std::numeric_limits::max()) : AggregateFunctionGroupUniqArray(argument_type, max_elems_) {} + explicit AggregateFunctionGroupUniqArrayDate(const DataTypePtr & argument_type, UInt64 max_elems_ = std::numeric_limits::max()) + : AggregateFunctionGroupUniqArray(argument_type, max_elems_) {} DataTypePtr getReturnType() const override { return std::make_shared(std::make_shared()); } }; @@ -33,7 +34,8 @@ template class AggregateFunctionGroupUniqArrayDateTime : public AggregateFunctionGroupUniqArray { public: - AggregateFunctionGroupUniqArrayDateTime(const DataTypePtr & argument_type, UInt64 max_elems_ = std::numeric_limits::max()) : AggregateFunctionGroupUniqArray(argument_type, max_elems_) {} + explicit AggregateFunctionGroupUniqArrayDateTime(const DataTypePtr & argument_type, UInt64 max_elems_ = std::numeric_limits::max()) + : AggregateFunctionGroupUniqArray(argument_type, max_elems_) {} DataTypePtr getReturnType() const override { return std::make_shared(std::make_shared()); } }; diff --git a/dbms/src/Client/ConnectionPoolWithFailover.h b/dbms/src/Client/ConnectionPoolWithFailover.h index 78073b52108..bdc06656ff1 100644 --- a/dbms/src/Client/ConnectionPoolWithFailover.h +++ b/dbms/src/Client/ConnectionPoolWithFailover.h @@ -44,8 +44,8 @@ public: /** Allocates connection to work. */ Entry get(const ConnectionTimeouts & timeouts, - const Settings * settings = nullptr, - bool force_connected = true) override; /// From IConnectionPool + const Settings * settings, + bool force_connected) override; /// From IConnectionPool /** Allocates up to the specified number of connections to work. * Connections provide access to different replicas of one shard. diff --git a/dbms/src/Client/MultiplexedConnections.cpp b/dbms/src/Client/MultiplexedConnections.cpp index 9bc9303c1c5..b0ff2104ab1 100644 --- a/dbms/src/Client/MultiplexedConnections.cpp +++ b/dbms/src/Client/MultiplexedConnections.cpp @@ -1,5 +1,7 @@ #include #include +#include + namespace DB { @@ -308,10 +310,10 @@ MultiplexedConnections::ReplicaState & MultiplexedConnections::getReplicaForRead throw Exception("Timeout exceeded while reading from " + dumpAddressesUnlocked(), ErrorCodes::TIMEOUT_EXCEEDED); } - /// TODO Absolutely wrong code: read_list could be empty; rand() is not thread safe and has low quality; motivation of rand is unclear. + /// TODO Absolutely wrong code: read_list could be empty; motivation of rand is unclear. /// This code path is disabled by default. - auto & socket = read_list[rand() % read_list.size()]; + auto & socket = read_list[thread_local_rng() % read_list.size()]; if (fd_to_replica_state_idx.empty()) { fd_to_replica_state_idx.reserve(replica_states.size()); diff --git a/dbms/src/Common/Config/ConfigProcessor.cpp b/dbms/src/Common/Config/ConfigProcessor.cpp index 0213e2abe90..7c39518d30b 100644 --- a/dbms/src/Common/Config/ConfigProcessor.cpp +++ b/dbms/src/Common/Config/ConfigProcessor.cpp @@ -294,7 +294,7 @@ void ConfigProcessor::doIncludesRecursive( auto process_include = [&](const Node * include_attr, const std::function & get_node, const char * error_msg) { - std::string name = include_attr->getNodeValue(); + const std::string & name = include_attr->getNodeValue(); const Node * node_to_include = get_node(name); if (!node_to_include) { diff --git a/dbms/src/Common/DNSResolver.cpp b/dbms/src/Common/DNSResolver.cpp index 2bf35ca417a..b0b9c61bc84 100644 --- a/dbms/src/Common/DNSResolver.cpp +++ b/dbms/src/Common/DNSResolver.cpp @@ -170,8 +170,8 @@ bool DNSResolver::updateCache() { { std::lock_guard lock(impl->drop_mutex); - for (auto & host : impl->new_hosts) - impl->known_hosts.insert(std::move(host)); + for (const auto & host : impl->new_hosts) + impl->known_hosts.insert(host); impl->new_hosts.clear(); impl->host_name.emplace(Poco::Net::DNS::hostName()); diff --git a/dbms/src/Common/Exception.cpp b/dbms/src/Common/Exception.cpp index 97219379ee8..bc3d4a78969 100644 --- a/dbms/src/Common/Exception.cpp +++ b/dbms/src/Common/Exception.cpp @@ -273,7 +273,7 @@ void tryLogException(std::exception_ptr e, const char * log_name, const std::str { try { - std::rethrow_exception(std::move(e)); + std::rethrow_exception(std::move(e)); // NOLINT } catch (...) { @@ -285,7 +285,7 @@ void tryLogException(std::exception_ptr e, Poco::Logger * logger, const std::str { try { - std::rethrow_exception(std::move(e)); + std::rethrow_exception(std::move(e)); // NOLINT } catch (...) { @@ -327,7 +327,7 @@ std::string getExceptionMessage(std::exception_ptr e, bool with_stacktrace) { try { - std::rethrow_exception(std::move(e)); + std::rethrow_exception(std::move(e)); // NOLINT } catch (...) { diff --git a/dbms/src/Common/OpenSSLHelpers.cpp b/dbms/src/Common/OpenSSLHelpers.cpp index d482534ef24..caa843f96f6 100644 --- a/dbms/src/Common/OpenSSLHelpers.cpp +++ b/dbms/src/Common/OpenSSLHelpers.cpp @@ -23,7 +23,7 @@ String getOpenSSLErrors() SCOPE_EXIT(BIO_free(mem)); ERR_print_errors(mem); char * buf = nullptr; - long size = BIO_get_mem_data(mem, &buf); + size_t size = BIO_get_mem_data(mem, &buf); return String(buf, size); } diff --git a/dbms/src/Common/ProfileEvents.h b/dbms/src/Common/ProfileEvents.h index ca327c9810b..e1b68e43e52 100644 --- a/dbms/src/Common/ProfileEvents.h +++ b/dbms/src/Common/ProfileEvents.h @@ -36,7 +36,7 @@ namespace ProfileEvents Counters(VariableContext level_ = VariableContext::Thread, Counters * parent_ = &global_counters); /// Global level static initializer - Counters(Counter * allocated_counters) + Counters(Counter * allocated_counters) noexcept : counters(allocated_counters), parent(nullptr), level(VariableContext::Global) {} Counter & operator[] (Event event) diff --git a/dbms/src/Common/SymbolIndex.cpp b/dbms/src/Common/SymbolIndex.cpp index a9cdc1fa867..a5c0835fe9c 100644 --- a/dbms/src/Common/SymbolIndex.cpp +++ b/dbms/src/Common/SymbolIndex.cpp @@ -186,7 +186,7 @@ void collectSymbolsFromProgramHeaders(dl_phdr_info * info, symbol.address_begin = reinterpret_cast(info->dlpi_addr + elf_sym[sym_index].st_value); symbol.address_end = reinterpret_cast(info->dlpi_addr + elf_sym[sym_index].st_value + elf_sym[sym_index].st_size); symbol.name = sym_name; - symbols.push_back(std::move(symbol)); + symbols.push_back(symbol); } break; @@ -227,7 +227,7 @@ void collectSymbolsFromELFSymbolTable( symbol.address_begin = reinterpret_cast(info->dlpi_addr + symbol_table_entry->st_value); symbol.address_end = reinterpret_cast(info->dlpi_addr + symbol_table_entry->st_value + symbol_table_entry->st_size); symbol.name = symbol_name; - symbols.push_back(std::move(symbol)); + symbols.push_back(symbol); } } diff --git a/dbms/src/Common/ThreadPool.cpp b/dbms/src/Common/ThreadPool.cpp index 7334188952c..7911cab5f80 100644 --- a/dbms/src/Common/ThreadPool.cpp +++ b/dbms/src/Common/ThreadPool.cpp @@ -257,11 +257,11 @@ template class ThreadPoolImpl; template class ThreadPoolImpl; -void ExceptionHandler::setException(std::exception_ptr && exception) +void ExceptionHandler::setException(std::exception_ptr exception) { std::unique_lock lock(mutex); if (!first_exception) - first_exception = std::move(exception); + first_exception = std::move(exception); // NOLINT } void ExceptionHandler::throwIfException() diff --git a/dbms/src/Common/ThreadPool.h b/dbms/src/Common/ThreadPool.h index 662d34afadd..8a33245d445 100644 --- a/dbms/src/Common/ThreadPool.h +++ b/dbms/src/Common/ThreadPool.h @@ -221,7 +221,7 @@ using ThreadPool = ThreadPoolImpl; class ExceptionHandler { public: - void setException(std::exception_ptr && exception); + void setException(std::exception_ptr exception); void throwIfException(); private: diff --git a/dbms/src/Common/ZooKeeper/TestKeeper.cpp b/dbms/src/Common/ZooKeeper/TestKeeper.cpp index cb53ae52cc3..0bcef6cd75f 100644 --- a/dbms/src/Common/ZooKeeper/TestKeeper.cpp +++ b/dbms/src/Common/ZooKeeper/TestKeeper.cpp @@ -67,7 +67,7 @@ static void processWatchesImpl(const String & path, TestKeeper::Watches & watche struct TestKeeperCreateRequest final : CreateRequest, TestKeeperRequest { TestKeeperCreateRequest() = default; - TestKeeperCreateRequest(const CreateRequest & base) : CreateRequest(base) {} + explicit TestKeeperCreateRequest(const CreateRequest & base) : CreateRequest(base) {} ResponsePtr createResponse() const override; ResponsePtr process(TestKeeper::Container & container, int64_t zxid) const override; @@ -80,7 +80,7 @@ struct TestKeeperCreateRequest final : CreateRequest, TestKeeperRequest struct TestKeeperRemoveRequest final : RemoveRequest, TestKeeperRequest { TestKeeperRemoveRequest() = default; - TestKeeperRemoveRequest(const RemoveRequest & base) : RemoveRequest(base) {} + explicit TestKeeperRemoveRequest(const RemoveRequest & base) : RemoveRequest(base) {} bool isMutable() const override { return true; } ResponsePtr createResponse() const override; ResponsePtr process(TestKeeper::Container & container, int64_t zxid) const override; @@ -107,7 +107,7 @@ struct TestKeeperGetRequest final : GetRequest, TestKeeperRequest struct TestKeeperSetRequest final : SetRequest, TestKeeperRequest { TestKeeperSetRequest() = default; - TestKeeperSetRequest(const SetRequest & base) : SetRequest(base) {} + explicit TestKeeperSetRequest(const SetRequest & base) : SetRequest(base) {} bool isMutable() const override { return true; } ResponsePtr createResponse() const override; ResponsePtr process(TestKeeper::Container & container, int64_t zxid) const override; @@ -127,14 +127,14 @@ struct TestKeeperListRequest final : ListRequest, TestKeeperRequest struct TestKeeperCheckRequest final : CheckRequest, TestKeeperRequest { TestKeeperCheckRequest() = default; - TestKeeperCheckRequest(const CheckRequest & base) : CheckRequest(base) {} + explicit TestKeeperCheckRequest(const CheckRequest & base) : CheckRequest(base) {} ResponsePtr createResponse() const override; ResponsePtr process(TestKeeper::Container & container, int64_t zxid) const override; }; struct TestKeeperMultiRequest final : MultiRequest, TestKeeperRequest { - TestKeeperMultiRequest(const Requests & generic_requests) + explicit TestKeeperMultiRequest(const Requests & generic_requests) { requests.reserve(generic_requests.size()); diff --git a/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 0525ebd377b..c55a2738a2a 100644 --- a/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -481,7 +481,7 @@ struct ZooKeeperCloseResponse final : ZooKeeperResponse struct ZooKeeperCreateRequest final : CreateRequest, ZooKeeperRequest { ZooKeeperCreateRequest() = default; - ZooKeeperCreateRequest(const CreateRequest & base) : CreateRequest(base) {} + explicit ZooKeeperCreateRequest(const CreateRequest & base) : CreateRequest(base) {} ZooKeeper::OpNum getOpNum() const override { return 1; } void writeImpl(WriteBuffer & out) const override @@ -513,7 +513,7 @@ struct ZooKeeperCreateResponse final : CreateResponse, ZooKeeperResponse struct ZooKeeperRemoveRequest final : RemoveRequest, ZooKeeperRequest { ZooKeeperRemoveRequest() = default; - ZooKeeperRemoveRequest(const RemoveRequest & base) : RemoveRequest(base) {} + explicit ZooKeeperRemoveRequest(const RemoveRequest & base) : RemoveRequest(base) {} ZooKeeper::OpNum getOpNum() const override { return 2; } void writeImpl(WriteBuffer & out) const override @@ -571,7 +571,7 @@ struct ZooKeeperGetResponse final : GetResponse, ZooKeeperResponse struct ZooKeeperSetRequest final : SetRequest, ZooKeeperRequest { ZooKeeperSetRequest() = default; - ZooKeeperSetRequest(const SetRequest & base) : SetRequest(base) {} + explicit ZooKeeperSetRequest(const SetRequest & base) : SetRequest(base) {} ZooKeeper::OpNum getOpNum() const override { return 5; } void writeImpl(WriteBuffer & out) const override @@ -614,7 +614,7 @@ struct ZooKeeperListResponse final : ListResponse, ZooKeeperResponse struct ZooKeeperCheckRequest final : CheckRequest, ZooKeeperRequest { ZooKeeperCheckRequest() = default; - ZooKeeperCheckRequest(const CheckRequest & base) : CheckRequest(base) {} + explicit ZooKeeperCheckRequest(const CheckRequest & base) : CheckRequest(base) {} ZooKeeper::OpNum getOpNum() const override { return 13; } void writeImpl(WriteBuffer & out) const override @@ -710,7 +710,7 @@ struct ZooKeeperMultiRequest final : MultiRequest, ZooKeeperRequest struct ZooKeeperMultiResponse final : MultiResponse, ZooKeeperResponse { - ZooKeeperMultiResponse(const Requests & requests) + explicit ZooKeeperMultiResponse(const Requests & requests) { responses.reserve(requests.size()); diff --git a/dbms/src/Common/ZooKeeper/tests/zkutil_test_async.cpp b/dbms/src/Common/ZooKeeper/tests/zkutil_test_async.cpp index 59c203a4683..17258c529ff 100644 --- a/dbms/src/Common/ZooKeeper/tests/zkutil_test_async.cpp +++ b/dbms/src/Common/ZooKeeper/tests/zkutil_test_async.cpp @@ -24,6 +24,7 @@ try while (true) { std::vector> futures; + futures.reserve(nodes.size()); for (auto & node : nodes) futures.push_back(zookeeper.asyncGet("/tmp/" + node)); diff --git a/dbms/src/Common/tests/CMakeLists.txt b/dbms/src/Common/tests/CMakeLists.txt index 6f1c825227c..d78a366c5c4 100644 --- a/dbms/src/Common/tests/CMakeLists.txt +++ b/dbms/src/Common/tests/CMakeLists.txt @@ -4,9 +4,6 @@ if(OPENSSL_CRYPTO_LIBRARY) target_link_libraries (hashes_test PRIVATE ${OPENSSL_CRYPTO_LIBRARY}) endif() -add_executable (sip_hash sip_hash.cpp) -target_link_libraries (sip_hash PRIVATE clickhouse_common_io) - add_executable (sip_hash_perf sip_hash_perf.cpp) target_link_libraries (sip_hash_perf PRIVATE clickhouse_common_io) diff --git a/dbms/src/Common/tests/auto_array.cpp b/dbms/src/Common/tests/auto_array.cpp index 3dc490796fa..9e9a18ffec9 100644 --- a/dbms/src/Common/tests/auto_array.cpp +++ b/dbms/src/Common/tests/auto_array.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -12,6 +13,8 @@ int main(int argc, char ** argv) { + pcg64 rng; + { size_t n = 10; using T = std::string; @@ -63,7 +66,7 @@ int main(int argc, char ** argv) { Arr key(n); for (size_t j = 0; j < n; ++j) - key[j] = DB::toString(rand()); + key[j] = DB::toString(rng()); map[std::move(key)] = "Hello, world! " + DB::toString(i); } @@ -107,7 +110,7 @@ int main(int argc, char ** argv) { Arr key(n); for (size_t j = 0; j < n; ++j) - key[j] = DB::toString(rand()); + key[j] = DB::toString(rng()); vec.push_back(std::move(key)); } @@ -152,7 +155,7 @@ int main(int argc, char ** argv) Map::LookupResult it; bool inserted; - map.emplace(rand(), it, inserted); + map.emplace(rng(), it, inserted); if (inserted) { new (&it->getMapped()) Arr(n); diff --git a/dbms/src/Common/tests/cow_columns.cpp b/dbms/src/Common/tests/cow_columns.cpp index 7b629e264e5..b4c3637be5a 100644 --- a/dbms/src/Common/tests/cow_columns.cpp +++ b/dbms/src/Common/tests/cow_columns.cpp @@ -28,7 +28,7 @@ private: friend class COWHelper; int data; - ConcreteColumn(int data_) : data(data_) {} + explicit ConcreteColumn(int data_) : data(data_) {} ConcreteColumn(const ConcreteColumn &) = default; MutableColumnPtr test() const override diff --git a/dbms/src/Common/tests/cow_compositions.cpp b/dbms/src/Common/tests/cow_compositions.cpp index 8d0110a0290..0335693d1bd 100644 --- a/dbms/src/Common/tests/cow_compositions.cpp +++ b/dbms/src/Common/tests/cow_compositions.cpp @@ -30,7 +30,7 @@ private: friend class COWHelper; int data; - ConcreteColumn(int data_) : data(data_) {} + explicit ConcreteColumn(int data_) : data(data_) {} ConcreteColumn(const ConcreteColumn &) = default; public: @@ -45,7 +45,7 @@ private: ConcreteColumn::WrappedPtr wrapped; - ColumnComposition(int data) : wrapped(ConcreteColumn::create(data)) {} + explicit ColumnComposition(int data) : wrapped(ConcreteColumn::create(data)) {} ColumnComposition(const ColumnComposition &) = default; IColumn::MutablePtr deepMutate() const override diff --git a/dbms/src/Common/tests/gtest_sensitive_data_masker.cpp b/dbms/src/Common/tests/gtest_sensitive_data_masker.cpp index 9b9af39f6ea..d5133b2ef95 100644 --- a/dbms/src/Common/tests/gtest_sensitive_data_masker.cpp +++ b/dbms/src/Common/tests/gtest_sensitive_data_masker.cpp @@ -63,7 +63,7 @@ TEST(Common, SensitiveDataMasker) #ifndef NDEBUG // simple benchmark auto start = std::chrono::high_resolution_clock::now(); - constexpr unsigned long int iterations = 200000; + static constexpr size_t iterations = 200000; for (int i = 0; i < iterations; ++i) { std::string query2 = "SELECT id FROM mysql('localhost:3308', 'database', 'table', 'root', 'qwerty123') WHERE ssn='123-45-6789' or " diff --git a/dbms/src/Common/tests/int_hashes_perf.cpp b/dbms/src/Common/tests/int_hashes_perf.cpp index 6792b22dfce..5165919ddaf 100644 --- a/dbms/src/Common/tests/int_hashes_perf.cpp +++ b/dbms/src/Common/tests/int_hashes_perf.cpp @@ -273,8 +273,8 @@ static inline void test(size_t n, const UInt64 * data, const char * name) int main(int argc, char ** argv) { - size_t n = (atoi(argv[1]) + (BUF_SIZE - 1)) / BUF_SIZE * BUF_SIZE; - size_t method = argc <= 2 ? 0 : atoi(argv[2]); + size_t n = (std::stol(argv[1]) + (BUF_SIZE - 1)) / BUF_SIZE * BUF_SIZE; + size_t method = argc <= 2 ? 0 : std::stol(argv[2]); std::cerr << std::fixed << std::setprecision(2); diff --git a/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp b/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp index 29e4a31bfb3..b5ba0be6420 100644 --- a/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp +++ b/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp @@ -343,8 +343,8 @@ int main(int argc, char ** argv) return 1; } - size_t n = atoi(argv[1]); -// size_t m = atoi(argv[2]); + size_t n = std::stol(argv[1]); +// size_t m = std::stol(argv[2]); std::cerr << std::fixed << std::setprecision(3); diff --git a/dbms/src/Common/tests/parallel_aggregation.cpp b/dbms/src/Common/tests/parallel_aggregation.cpp index 4533ff3a88e..32085336961 100644 --- a/dbms/src/Common/tests/parallel_aggregation.cpp +++ b/dbms/src/Common/tests/parallel_aggregation.cpp @@ -246,9 +246,9 @@ void aggregate5(Map & local_map, MapSmallLocks & global_map, Source::const_itera int main(int argc, char ** argv) { - size_t n = atoi(argv[1]); - size_t num_threads = atoi(argv[2]); - size_t method = argc <= 3 ? 0 : atoi(argv[3]); + size_t n = std::stol(argv[1]); + size_t num_threads = std::stol(argv[2]); + size_t method = argc <= 3 ? 0 : std::stol(argv[3]); std::cerr << std::fixed << std::setprecision(2); diff --git a/dbms/src/Common/tests/parallel_aggregation2.cpp b/dbms/src/Common/tests/parallel_aggregation2.cpp index 30c7f05222c..fdcb4c74776 100644 --- a/dbms/src/Common/tests/parallel_aggregation2.cpp +++ b/dbms/src/Common/tests/parallel_aggregation2.cpp @@ -285,9 +285,9 @@ struct Merger int main(int argc, char ** argv) { - size_t n = atoi(argv[1]); - size_t num_threads = atoi(argv[2]); - size_t method = argc <= 3 ? 0 : atoi(argv[3]); + size_t n = std::stol(argv[1]); + size_t num_threads = std::stol(argv[2]); + size_t method = argc <= 3 ? 0 : std::stol(argv[3]); std::cerr << std::fixed << std::setprecision(2); diff --git a/dbms/src/Common/tests/radix_sort.cpp b/dbms/src/Common/tests/radix_sort.cpp index 44225d2b218..a7313d05cec 100644 --- a/dbms/src/Common/tests/radix_sort.cpp +++ b/dbms/src/Common/tests/radix_sort.cpp @@ -1,6 +1,7 @@ #if !defined(__APPLE__) && !defined(__FreeBSD__) #include #endif +#include #include #include #include @@ -31,6 +32,8 @@ static void NO_INLINE sort3(Key * data, size_t size) int main(int argc, char ** argv) { + pcg64 rng; + if (argc < 3) { std::cerr << "Usage: program n method\n"; @@ -48,7 +51,7 @@ int main(int argc, char ** argv) Stopwatch watch; for (auto & elem : data) - elem = rand(); + elem = rng(); watch.stop(); double elapsed = watch.elapsedSeconds(); diff --git a/dbms/src/Common/tests/sip_hash.cpp b/dbms/src/Common/tests/sip_hash.cpp deleted file mode 100644 index 046ea0edc15..00000000000 --- a/dbms/src/Common/tests/sip_hash.cpp +++ /dev/null @@ -1,154 +0,0 @@ -#include -#include - -#include -#include - -#include - -/// Adapted version https://www.131002.net/siphash/siphash24.c - -/* - SipHash-2-4 output with - k = 00 01 02 ... - and - in = (empty string) - in = 00 (1 byte) - in = 00 01 (2 bytes) - in = 00 01 02 (3 bytes) - ... - in = 00 01 02 ... 3e (63 bytes) -*/ -uint8_t vectors[64][8] = -{ - { 0x31, 0x0e, 0x0e, 0xdd, 0x47, 0xdb, 0x6f, 0x72, }, - { 0xfd, 0x67, 0xdc, 0x93, 0xc5, 0x39, 0xf8, 0x74, }, - { 0x5a, 0x4f, 0xa9, 0xd9, 0x09, 0x80, 0x6c, 0x0d, }, - { 0x2d, 0x7e, 0xfb, 0xd7, 0x96, 0x66, 0x67, 0x85, }, - { 0xb7, 0x87, 0x71, 0x27, 0xe0, 0x94, 0x27, 0xcf, }, - { 0x8d, 0xa6, 0x99, 0xcd, 0x64, 0x55, 0x76, 0x18, }, - { 0xce, 0xe3, 0xfe, 0x58, 0x6e, 0x46, 0xc9, 0xcb, }, - { 0x37, 0xd1, 0x01, 0x8b, 0xf5, 0x00, 0x02, 0xab, }, - { 0x62, 0x24, 0x93, 0x9a, 0x79, 0xf5, 0xf5, 0x93, }, - { 0xb0, 0xe4, 0xa9, 0x0b, 0xdf, 0x82, 0x00, 0x9e, }, - { 0xf3, 0xb9, 0xdd, 0x94, 0xc5, 0xbb, 0x5d, 0x7a, }, - { 0xa7, 0xad, 0x6b, 0x22, 0x46, 0x2f, 0xb3, 0xf4, }, - { 0xfb, 0xe5, 0x0e, 0x86, 0xbc, 0x8f, 0x1e, 0x75, }, - { 0x90, 0x3d, 0x84, 0xc0, 0x27, 0x56, 0xea, 0x14, }, - { 0xee, 0xf2, 0x7a, 0x8e, 0x90, 0xca, 0x23, 0xf7, }, - { 0xe5, 0x45, 0xbe, 0x49, 0x61, 0xca, 0x29, 0xa1, }, - { 0xdb, 0x9b, 0xc2, 0x57, 0x7f, 0xcc, 0x2a, 0x3f, }, - { 0x94, 0x47, 0xbe, 0x2c, 0xf5, 0xe9, 0x9a, 0x69, }, - { 0x9c, 0xd3, 0x8d, 0x96, 0xf0, 0xb3, 0xc1, 0x4b, }, - { 0xbd, 0x61, 0x79, 0xa7, 0x1d, 0xc9, 0x6d, 0xbb, }, - { 0x98, 0xee, 0xa2, 0x1a, 0xf2, 0x5c, 0xd6, 0xbe, }, - { 0xc7, 0x67, 0x3b, 0x2e, 0xb0, 0xcb, 0xf2, 0xd0, }, - { 0x88, 0x3e, 0xa3, 0xe3, 0x95, 0x67, 0x53, 0x93, }, - { 0xc8, 0xce, 0x5c, 0xcd, 0x8c, 0x03, 0x0c, 0xa8, }, - { 0x94, 0xaf, 0x49, 0xf6, 0xc6, 0x50, 0xad, 0xb8, }, - { 0xea, 0xb8, 0x85, 0x8a, 0xde, 0x92, 0xe1, 0xbc, }, - { 0xf3, 0x15, 0xbb, 0x5b, 0xb8, 0x35, 0xd8, 0x17, }, - { 0xad, 0xcf, 0x6b, 0x07, 0x63, 0x61, 0x2e, 0x2f, }, - { 0xa5, 0xc9, 0x1d, 0xa7, 0xac, 0xaa, 0x4d, 0xde, }, - { 0x71, 0x65, 0x95, 0x87, 0x66, 0x50, 0xa2, 0xa6, }, - { 0x28, 0xef, 0x49, 0x5c, 0x53, 0xa3, 0x87, 0xad, }, - { 0x42, 0xc3, 0x41, 0xd8, 0xfa, 0x92, 0xd8, 0x32, }, - { 0xce, 0x7c, 0xf2, 0x72, 0x2f, 0x51, 0x27, 0x71, }, - { 0xe3, 0x78, 0x59, 0xf9, 0x46, 0x23, 0xf3, 0xa7, }, - { 0x38, 0x12, 0x05, 0xbb, 0x1a, 0xb0, 0xe0, 0x12, }, - { 0xae, 0x97, 0xa1, 0x0f, 0xd4, 0x34, 0xe0, 0x15, }, - { 0xb4, 0xa3, 0x15, 0x08, 0xbe, 0xff, 0x4d, 0x31, }, - { 0x81, 0x39, 0x62, 0x29, 0xf0, 0x90, 0x79, 0x02, }, - { 0x4d, 0x0c, 0xf4, 0x9e, 0xe5, 0xd4, 0xdc, 0xca, }, - { 0x5c, 0x73, 0x33, 0x6a, 0x76, 0xd8, 0xbf, 0x9a, }, - { 0xd0, 0xa7, 0x04, 0x53, 0x6b, 0xa9, 0x3e, 0x0e, }, - { 0x92, 0x59, 0x58, 0xfc, 0xd6, 0x42, 0x0c, 0xad, }, - { 0xa9, 0x15, 0xc2, 0x9b, 0xc8, 0x06, 0x73, 0x18, }, - { 0x95, 0x2b, 0x79, 0xf3, 0xbc, 0x0a, 0xa6, 0xd4, }, - { 0xf2, 0x1d, 0xf2, 0xe4, 0x1d, 0x45, 0x35, 0xf9, }, - { 0x87, 0x57, 0x75, 0x19, 0x04, 0x8f, 0x53, 0xa9, }, - { 0x10, 0xa5, 0x6c, 0xf5, 0xdf, 0xcd, 0x9a, 0xdb, }, - { 0xeb, 0x75, 0x09, 0x5c, 0xcd, 0x98, 0x6c, 0xd0, }, - { 0x51, 0xa9, 0xcb, 0x9e, 0xcb, 0xa3, 0x12, 0xe6, }, - { 0x96, 0xaf, 0xad, 0xfc, 0x2c, 0xe6, 0x66, 0xc7, }, - { 0x72, 0xfe, 0x52, 0x97, 0x5a, 0x43, 0x64, 0xee, }, - { 0x5a, 0x16, 0x45, 0xb2, 0x76, 0xd5, 0x92, 0xa1, }, - { 0xb2, 0x74, 0xcb, 0x8e, 0xbf, 0x87, 0x87, 0x0a, }, - { 0x6f, 0x9b, 0xb4, 0x20, 0x3d, 0xe7, 0xb3, 0x81, }, - { 0xea, 0xec, 0xb2, 0xa3, 0x0b, 0x22, 0xa8, 0x7f, }, - { 0x99, 0x24, 0xa4, 0x3c, 0xc1, 0x31, 0x57, 0x24, }, - { 0xbd, 0x83, 0x8d, 0x3a, 0xaf, 0xbf, 0x8d, 0xb7, }, - { 0x0b, 0x1a, 0x2a, 0x32, 0x65, 0xd5, 0x1a, 0xea, }, - { 0x13, 0x50, 0x79, 0xa3, 0x23, 0x1c, 0xe6, 0x60, }, - { 0x93, 0x2b, 0x28, 0x46, 0xe4, 0xd7, 0x06, 0x66, }, - { 0xe1, 0x91, 0x5f, 0x5c, 0xb1, 0xec, 0xa4, 0x6c, }, - { 0xf3, 0x25, 0x96, 0x5c, 0xa1, 0x6d, 0x62, 0x9f, }, - { 0x57, 0x5f, 0xf2, 0x8e, 0x60, 0x38, 0x1b, 0xe5, }, - { 0x72, 0x45, 0x06, 0xeb, 0x4c, 0x32, 0x8a, 0x95, } -}; - - -static int test_vectors() -{ -#define MAXLEN 64 - char in[MAXLEN]; - - union - { - char out[16]; - uint64_t out64[2]; - }; - - union - { - char k[16]; - uint64_t k64[2]; - }; - - int i; - int ok = 1; - - for (i = 0; i < 16; ++i) - k[i] = i; - - for (i = 0; i < MAXLEN; ++i) - { - in[i] = i; - - size_t part = i == 0 ? 0 : (rand() % i); - - SipHash hash(k64[0], k64[1]); - - hash.update(in, part); - hash.update(in + part, i - part); - - hash.get128(out); - - uint64_t test_vector; - memcpy(&test_vector, vectors[i], 8); - - if ((out64[0] ^ out64[1]) != test_vector) - { - std::cerr << "test vector failed for " << i << " bytes" << std::endl; - ok = 0; - } - } - - return ok; -} - - -int main(int, char **) -{ - size_t n = 100000; - - size_t i = 0; - for (; i < n; ++i) - if (!test_vectors()) - break; - - if (i == n) - std::cerr << "test vectors ok" << std::endl; - - return 0; -} diff --git a/dbms/src/Compression/CompressionCodecMultiple.cpp b/dbms/src/Compression/CompressionCodecMultiple.cpp index 4f5d45606c2..0d5189381cd 100644 --- a/dbms/src/Compression/CompressionCodecMultiple.cpp +++ b/dbms/src/Compression/CompressionCodecMultiple.cpp @@ -98,7 +98,7 @@ void CompressionCodecMultiple::doDecompressData(const char * source, UInt32 sour /// Insert all data into compressed buf source_size -= (compression_methods_size + 1); - for (long idx = compression_methods_size - 1; idx >= 0; --idx) + for (int idx = compression_methods_size - 1; idx >= 0; --idx) { UInt8 compression_method = source[idx + 1]; const auto codec = CompressionCodecFactory::instance().get(compression_method); diff --git a/dbms/src/Compression/tests/gtest_compressionCodec.cpp b/dbms/src/Compression/tests/gtest_compressionCodec.cpp index eb3c98b5ba1..df9471718d1 100644 --- a/dbms/src/Compression/tests/gtest_compressionCodec.cpp +++ b/dbms/src/Compression/tests/gtest_compressionCodec.cpp @@ -95,7 +95,7 @@ std::string bin(const T & value, size_t bits = sizeof(T)*8) static const uint8_t MAX_BITS = sizeof(T)*8; assert(bits <= MAX_BITS); - return std::bitset(static_cast(value)) + return std::bitset(static_cast(value)) .to_string().substr(MAX_BITS - bits, bits); } @@ -182,7 +182,7 @@ public: return *this; } - operator bool() const + explicit operator bool() const { return ItemsLeft() > 0; } @@ -706,9 +706,9 @@ typename std::conditional_t, std::uniform_real_distr template -struct MonotonicGenerator +struct MonotonicGenerator // NOLINT { - MonotonicGenerator(T stride_ = 1, T max_step = 10) + explicit MonotonicGenerator(T stride_ = 1, T max_step = 10) // NOLINT : prev_value(0), stride(stride_), random_engine(0), @@ -732,7 +732,7 @@ private: template struct RandomGenerator { - RandomGenerator(T seed = 0, T value_min = std::numeric_limits::min(), T value_max = std::numeric_limits::max()) + explicit RandomGenerator(T seed = 0, T value_min = std::numeric_limits::min(), T value_max = std::numeric_limits::max()) : random_engine(seed), distribution(value_min, value_max) { diff --git a/dbms/src/Core/tests/string_pool.cpp b/dbms/src/Core/tests/string_pool.cpp index a2ee0ec3ea5..358381c4c0c 100644 --- a/dbms/src/Core/tests/string_pool.cpp +++ b/dbms/src/Core/tests/string_pool.cpp @@ -27,7 +27,7 @@ int main(int argc, char ** argv) std::ofstream devnull("/dev/null"); DB::ReadBufferFromFileDescriptor in(STDIN_FILENO); - size_t n = atoi(argv[1]); + size_t n = std::stol(argv[1]); size_t elems_show = 1; using Vec = std::vector; diff --git a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp index 10b0d0a7fd1..d2df3dbc496 100644 --- a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp +++ b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp @@ -54,7 +54,7 @@ static void mixNumberColumns( const ColumnPtr & col_defaults, const BlockMissingValues::RowsBitMask & defaults_mask) { - auto call = [&](const auto & types) -> bool + auto call = [&](const auto & types) { using Types = std::decay_t; using DataType = typename Types::LeftType; diff --git a/dbms/src/DataStreams/tests/finish_sorting_stream.cpp b/dbms/src/DataStreams/tests/finish_sorting_stream.cpp index 861965e1e1c..cfc9ba217b3 100644 --- a/dbms/src/DataStreams/tests/finish_sorting_stream.cpp +++ b/dbms/src/DataStreams/tests/finish_sorting_stream.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -11,6 +12,7 @@ #include + using namespace DB; namespace DB @@ -24,12 +26,12 @@ namespace DB int main(int argc, char ** argv) { - srand(123456); + pcg64 rng; try { - size_t m = argc >= 2 ? atoi(argv[1]) : 2; - size_t n = argc >= 3 ? atoi(argv[2]) : 10; + size_t m = argc >= 2 ? std::stol(argv[1]) : 2; + size_t n = argc >= 3 ? std::stol(argv[2]) : 10; Blocks blocks; for (size_t t = 0; t < m; ++t) @@ -46,7 +48,7 @@ int main(int argc, char ** argv) vec.resize(n); for (size_t j = 0; j < n; ++j) - vec[j] = rand() % 10; + vec[j] = rng() % 10; column.column = std::move(col); block.insert(column); diff --git a/dbms/src/DataTypes/DataTypeLowCardinality.cpp b/dbms/src/DataTypes/DataTypeLowCardinality.cpp index 417c988e5b9..ebec1571e9e 100644 --- a/dbms/src/DataTypes/DataTypeLowCardinality.cpp +++ b/dbms/src/DataTypes/DataTypeLowCardinality.cpp @@ -77,7 +77,7 @@ struct KeysSerializationVersion throw Exception("Invalid version for DataTypeLowCardinality key column.", ErrorCodes::LOGICAL_ERROR); } - KeysSerializationVersion(UInt64 version) : value(static_cast(version)) { checkVersion(version); } + explicit KeysSerializationVersion(UInt64 version) : value(static_cast(version)) { checkVersion(version); } }; /// Version is stored at the start of each granule. It's used to store indexes type and flags. diff --git a/dbms/src/Databases/DatabaseDictionary.h b/dbms/src/Databases/DatabaseDictionary.h index cd5dde3177c..9673580bf30 100644 --- a/dbms/src/Databases/DatabaseDictionary.h +++ b/dbms/src/Databases/DatabaseDictionary.h @@ -37,7 +37,7 @@ public: const Context & context, const String & table_name) const override; - DatabaseTablesIteratorPtr getTablesIterator(const Context & context, const FilterByNameFunction & filter_by_table_name = {}) override; + DatabaseTablesIteratorPtr getTablesIterator(const Context & context, const FilterByNameFunction & filter_by_table_name) override; bool empty(const Context & context) const override; diff --git a/dbms/src/Databases/DatabaseLazy.h b/dbms/src/Databases/DatabaseLazy.h index 8d1f20c068d..da270818d17 100644 --- a/dbms/src/Databases/DatabaseLazy.h +++ b/dbms/src/Databases/DatabaseLazy.h @@ -60,7 +60,7 @@ public: bool empty(const Context & context) const override; - DatabaseTablesIteratorPtr getTablesIterator(const Context & context, const FilterByNameFunction & filter_by_table_name = {}) override; + DatabaseTablesIteratorPtr getTablesIterator(const Context & context, const FilterByNameFunction & filter_by_table_name) override; void attachTable(const String & table_name, const StoragePtr & table) override; diff --git a/dbms/src/Databases/DatabaseMySQL.h b/dbms/src/Databases/DatabaseMySQL.h index f0112481661..5a7886cd6e2 100644 --- a/dbms/src/Databases/DatabaseMySQL.h +++ b/dbms/src/Databases/DatabaseMySQL.h @@ -30,7 +30,7 @@ public: bool empty(const Context & context) const override; - DatabaseTablesIteratorPtr getTablesIterator(const Context & context, const FilterByNameFunction & filter_by_table_name = {}) override; + DatabaseTablesIteratorPtr getTablesIterator(const Context & context, const FilterByNameFunction & filter_by_table_name) override; ASTPtr getCreateDatabaseQuery(const Context & /*context*/) const override; diff --git a/dbms/src/Databases/DatabaseWithDictionaries.h b/dbms/src/Databases/DatabaseWithDictionaries.h index 5ec37bdbb1a..e47ab6206c5 100644 --- a/dbms/src/Databases/DatabaseWithDictionaries.h +++ b/dbms/src/Databases/DatabaseWithDictionaries.h @@ -20,9 +20,9 @@ public: StoragePtr tryGetTable(const Context & context, const String & table_name) const override; - DatabaseTablesIteratorPtr getTablesWithDictionaryTablesIterator(const Context & context, const FilterByNameFunction & filter_by_dictionary_name = {}) override; + DatabaseTablesIteratorPtr getTablesWithDictionaryTablesIterator(const Context & context, const FilterByNameFunction & filter_by_dictionary_name) override; - DatabaseDictionariesIteratorPtr getDictionariesIterator(const Context & context, const FilterByNameFunction & filter_by_dictionary_name = {}) override; + DatabaseDictionariesIteratorPtr getDictionariesIterator(const Context & context, const FilterByNameFunction & filter_by_dictionary_name) override; bool isDictionaryExist(const Context & context, const String & dictionary_name) const override; diff --git a/dbms/src/Databases/DatabasesCommon.h b/dbms/src/Databases/DatabasesCommon.h index 1cefb8949bc..3bf7460da01 100644 --- a/dbms/src/Databases/DatabasesCommon.h +++ b/dbms/src/Databases/DatabasesCommon.h @@ -33,7 +33,7 @@ public: StoragePtr detachTable(const String & table_name) override; - DatabaseTablesIteratorPtr getTablesIterator(const Context & context, const FilterByNameFunction & filter_by_table_name = {}) override; + DatabaseTablesIteratorPtr getTablesIterator(const Context & context, const FilterByNameFunction & filter_by_table_name) override; void shutdown() override; diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.cpp b/dbms/src/Dictionaries/RedisBlockInputStream.cpp index 8bd6ff06054..9b331269b23 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.cpp +++ b/dbms/src/Dictionaries/RedisBlockInputStream.cpp @@ -50,9 +50,9 @@ namespace DB using ValueType = ExternalResultDescription::ValueType; template - inline void insert(IColumn & column, const String & stringValue) + inline void insert(IColumn & column, const String & string_value) { - assert_cast &>(column).insertValue(parse(stringValue)); + assert_cast &>(column).insertValue(parse(string_value)); } void insertValue(IColumn & column, const ValueType type, const Poco::Redis::BulkString & bulk_string) @@ -60,50 +60,50 @@ namespace DB if (bulk_string.isNull()) throw Exception{"Type mismatch, expected not Null String", ErrorCodes::TYPE_MISMATCH}; - String stringValue = bulk_string.value(); + const String & string_value = bulk_string.value(); switch (type) { case ValueType::vtUInt8: - insert(column, stringValue); + insert(column, string_value); break; case ValueType::vtUInt16: - insert(column, stringValue); + insert(column, string_value); break; case ValueType::vtUInt32: - insert(column, stringValue); + insert(column, string_value); break; case ValueType::vtUInt64: - insert(column, stringValue); + insert(column, string_value); break; case ValueType::vtInt8: - insert(column, stringValue); + insert(column, string_value); break; case ValueType::vtInt16: - insert(column, stringValue); + insert(column, string_value); break; case ValueType::vtInt32: - insert(column, stringValue); + insert(column, string_value); break; case ValueType::vtInt64: - insert(column, stringValue); + insert(column, string_value); break; case ValueType::vtFloat32: - insert(column, stringValue); + insert(column, string_value); break; case ValueType::vtFloat64: - insert(column, stringValue); + insert(column, string_value); break; case ValueType::vtString: - assert_cast(column).insert(parse(stringValue)); + assert_cast(column).insert(parse(string_value)); break; case ValueType::vtDate: - assert_cast(column).insertValue(parse(stringValue).getDayNum()); + assert_cast(column).insertValue(parse(string_value).getDayNum()); break; case ValueType::vtDateTime: - assert_cast(column).insertValue(static_cast(parse(stringValue))); + assert_cast(column).insertValue(static_cast(parse(string_value))); break; case ValueType::vtUUID: - assert_cast(column).insertValue(parse(stringValue)); + assert_cast(column).insertValue(parse(string_value)); break; } } diff --git a/dbms/src/Disks/DiskLocal.h b/dbms/src/Disks/DiskLocal.h index 0bca5dc72d4..77c86fa1f3e 100644 --- a/dbms/src/Disks/DiskLocal.h +++ b/dbms/src/Disks/DiskLocal.h @@ -71,17 +71,17 @@ public: std::unique_ptr readFile( const String & path, - size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, - size_t estimated_size = 0, - size_t aio_threshold = 0, - size_t mmap_threshold = 0) const override; + size_t buf_size, + size_t estimated_size, + size_t aio_threshold, + size_t mmap_threshold) const override; std::unique_ptr writeFile( const String & path, - size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, - WriteMode mode = WriteMode::Rewrite, - size_t estimated_size = 0, - size_t aio_threshold = 0) override; + size_t buf_size, + WriteMode mode, + size_t estimated_size, + size_t aio_threshold) override; void remove(const String & path) override; diff --git a/dbms/src/Disks/DiskMemory.h b/dbms/src/Disks/DiskMemory.h index f67a361a948..8ddb5307c41 100644 --- a/dbms/src/Disks/DiskMemory.h +++ b/dbms/src/Disks/DiskMemory.h @@ -64,17 +64,17 @@ public: std::unique_ptr readFile( const String & path, - size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, - size_t estimated_size = 0, - size_t aio_threshold = 0, - size_t mmap_threshold = 0) const override; + size_t buf_size, + size_t estimated_size, + size_t aio_threshold, + size_t mmap_threshold) const override; std::unique_ptr writeFile( const String & path, - size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, - WriteMode mode = WriteMode::Rewrite, - size_t estimated_size = 0, - size_t aio_threshold = 0) override; + size_t buf_size, + WriteMode mode, + size_t estimated_size, + size_t aio_threshold) override; void remove(const String & path) override; diff --git a/dbms/src/Disks/DiskS3.cpp b/dbms/src/Disks/DiskS3.cpp index 6b98520637a..d3712631a58 100644 --- a/dbms/src/Disks/DiskS3.cpp +++ b/dbms/src/Disks/DiskS3.cpp @@ -648,24 +648,29 @@ DiskS3Reservation::~DiskS3Reservation() } } -inline void checkWriteAccess(std::shared_ptr & disk) +namespace { - auto file = disk->writeFile("test_acl", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite); + +void checkWriteAccess(IDisk & disk) +{ + auto file = disk.writeFile("test_acl", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite); file->write("test", 4); } -inline void checkReadAccess(const String & disk_name, std::shared_ptr & disk) +void checkReadAccess(const String & disk_name, IDisk & disk) { - auto file = disk->readFile("test_acl", DBMS_DEFAULT_BUFFER_SIZE); + auto file = disk.readFile("test_acl", DBMS_DEFAULT_BUFFER_SIZE); String buf(4, '0'); file->readStrict(buf.data(), 4); if (buf != "test") throw Exception("No read access to S3 bucket in disk " + disk_name, ErrorCodes::PATH_ACCESS_DENIED); } -inline void checkRemoveAccess(std::shared_ptr & disk) +void checkRemoveAccess(IDisk & disk) { - disk->remove("test_acl"); + disk.remove("test_acl"); +} + } void registerDiskS3(DiskFactory & factory) @@ -692,9 +697,9 @@ void registerDiskS3(DiskFactory & factory) = std::make_shared(name, client, uri.bucket, uri.key, metadata_path, context.getSettingsRef().s3_min_upload_part_size); /// This code is used only to check access to the corresponding disk. - checkWriteAccess(s3disk); - checkReadAccess(name, s3disk); - checkRemoveAccess(s3disk); + checkWriteAccess(*s3disk); + checkReadAccess(name, *s3disk); + checkRemoveAccess(*s3disk); return s3disk; }; diff --git a/dbms/src/Disks/DiskS3.h b/dbms/src/Disks/DiskS3.h index d2950940063..10c7f015f77 100644 --- a/dbms/src/Disks/DiskS3.h +++ b/dbms/src/Disks/DiskS3.h @@ -71,17 +71,17 @@ public: std::unique_ptr readFile( const String & path, - size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, - size_t estimated_size = 0, - size_t aio_threshold = 0, - size_t mmap_threshold = 0) const override; + size_t buf_size, + size_t estimated_size, + size_t aio_threshold, + size_t mmap_threshold) const override; std::unique_ptr writeFile( const String & path, - size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, - WriteMode mode = WriteMode::Rewrite, - size_t estimated_size = 0, - size_t aio_threshold = 0) override; + size_t buf_size, + WriteMode mode, + size_t estimated_size, + size_t aio_threshold) override; void remove(const String & path) override; diff --git a/dbms/src/Formats/ProtobufSchemas.cpp b/dbms/src/Formats/ProtobufSchemas.cpp index 11afbffe694..f4973263bc8 100644 --- a/dbms/src/Formats/ProtobufSchemas.cpp +++ b/dbms/src/Formats/ProtobufSchemas.cpp @@ -24,7 +24,7 @@ ProtobufSchemas & ProtobufSchemas::instance() class ProtobufSchemas::ImporterWithSourceTree : public google::protobuf::compiler::MultiFileErrorCollector { public: - ImporterWithSourceTree(const String & schema_directory) : importer(&disk_source_tree, this) + explicit ImporterWithSourceTree(const String & schema_directory) : importer(&disk_source_tree, this) { disk_source_tree.MapPath("", schema_directory); } diff --git a/dbms/src/Functions/CRC.cpp b/dbms/src/Functions/CRC.cpp index 22814b2c26c..b4cb064dd8a 100644 --- a/dbms/src/Functions/CRC.cpp +++ b/dbms/src/Functions/CRC.cpp @@ -11,7 +11,7 @@ template struct CRCBase { T tab[256]; - CRCBase(T polynomial) + explicit CRCBase(T polynomial) { for (size_t i = 0; i < 256; ++i) { diff --git a/dbms/src/Functions/FunctionHelpers.cpp b/dbms/src/Functions/FunctionHelpers.cpp index fde1774695c..2210be65b32 100644 --- a/dbms/src/Functions/FunctionHelpers.cpp +++ b/dbms/src/Functions/FunctionHelpers.cpp @@ -167,7 +167,7 @@ void validateFunctionArgumentTypes(const IFunction & func, { if (arguments.size() < mandatory_args.size() || arguments.size() > mandatory_args.size() + optional_args.size()) { - auto joinArgumentTypes = [](const auto & args, const String sep = ", ") -> String + auto joinArgumentTypes = [](const auto & args, const String sep = ", ") { String result; for (const auto & a : args) diff --git a/dbms/src/Functions/FunctionsLogical.cpp b/dbms/src/Functions/FunctionsLogical.cpp index 8b8f03a0c89..b5fe1ff59e1 100644 --- a/dbms/src/Functions/FunctionsLogical.cpp +++ b/dbms/src/Functions/FunctionsLogical.cpp @@ -152,7 +152,7 @@ class AssociativeApplierImpl public: /// Remembers the last N columns from `in`. - AssociativeApplierImpl(const UInt8ColumnPtrs & in) + explicit AssociativeApplierImpl(const UInt8ColumnPtrs & in) : vec(in[in.size() - N]->getData()), next(in) {} /// Returns a combination of values in the i-th row of all columns stored in the constructor. @@ -176,7 +176,7 @@ class AssociativeApplierImpl using ResultValueType = typename Op::ResultType; public: - AssociativeApplierImpl(const UInt8ColumnPtrs & in) + explicit AssociativeApplierImpl(const UInt8ColumnPtrs & in) : vec(in[in.size() - 1]->getData()) {} inline ResultValueType apply(const size_t i) const { return vec[i]; } @@ -239,7 +239,7 @@ class AssociativeGenericApplierImpl public: /// Remembers the last N columns from `in`. - AssociativeGenericApplierImpl(const ColumnRawPtrs & in) + explicit AssociativeGenericApplierImpl(const ColumnRawPtrs & in) : val_getter{ValueGetterBuilder::build(in[in.size() - N])}, next{in} {} /// Returns a combination of values in the i-th row of all columns stored in the constructor. @@ -265,7 +265,7 @@ class AssociativeGenericApplierImpl public: /// Remembers the last N columns from `in`. - AssociativeGenericApplierImpl(const ColumnRawPtrs & in) + explicit AssociativeGenericApplierImpl(const ColumnRawPtrs & in) : val_getter{ValueGetterBuilder::build(in[in.size() - 1])} {} inline ResultValueType apply(const size_t i) const { return val_getter(i); } diff --git a/dbms/src/Functions/FunctionsStringRegex.cpp b/dbms/src/Functions/FunctionsStringRegex.cpp index 186e58f83fa..4f89c3ae305 100644 --- a/dbms/src/Functions/FunctionsStringRegex.cpp +++ b/dbms/src/Functions/FunctionsStringRegex.cpp @@ -306,8 +306,8 @@ struct MultiMatchAnyImpl MultiRegexps::ScratchPtr smart_scratch(scratch); auto on_match = []([[maybe_unused]] unsigned int id, - unsigned long long /* from */, - unsigned long long /* to */, + unsigned long long /* from */, // NOLINT + unsigned long long /* to */, // NOLINT unsigned int /* flags */, void * context) -> int { @@ -407,8 +407,8 @@ struct MultiMatchAllIndicesImpl MultiRegexps::ScratchPtr smart_scratch(scratch); auto on_match = [](unsigned int id, - unsigned long long /* from */, - unsigned long long /* to */, + unsigned long long /* from */, // NOLINT + unsigned long long /* to */, // NOLINT unsigned int /* flags */, void * context) -> int { diff --git a/dbms/src/Functions/GeoUtils.cpp b/dbms/src/Functions/GeoUtils.cpp index bbd942a9b0d..382c02e7008 100644 --- a/dbms/src/Functions/GeoUtils.cpp +++ b/dbms/src/Functions/GeoUtils.cpp @@ -319,9 +319,9 @@ UInt64 geohashesInBox(const GeohashesInBoxPreparedArgs & args, char * out) } UInt64 items = 0; - for (auto lon = args.longitude_min; lon < args.longitude_max; lon += args.longitude_step) + for (auto lon = args.longitude_min; lon < args.longitude_max; lon += args.longitude_step) // NOLINT { - for (auto lat = args.latitude_min; lat < args.latitude_max; lat += args.latitude_step) + for (auto lat = args.latitude_min; lat < args.latitude_max; lat += args.latitude_step) // NOLINT { assert(items <= args.items_count); diff --git a/dbms/src/Functions/array/array.cpp b/dbms/src/Functions/array/array.cpp index d517ced8203..aa4b945055a 100644 --- a/dbms/src/Functions/array/array.cpp +++ b/dbms/src/Functions/array/array.cpp @@ -19,7 +19,7 @@ public: return std::make_shared(context); } - FunctionArray(const Context & context_) + explicit FunctionArray(const Context & context_) : context(context_) { } diff --git a/dbms/src/Functions/array/arrayConcat.cpp b/dbms/src/Functions/array/arrayConcat.cpp index 30da20c7766..f96584e3f54 100644 --- a/dbms/src/Functions/array/arrayConcat.cpp +++ b/dbms/src/Functions/array/arrayConcat.cpp @@ -27,7 +27,7 @@ class FunctionArrayConcat : public IFunction public: static constexpr auto name = "arrayConcat"; static FunctionPtr create(const Context & context) { return std::make_shared(context); } - FunctionArrayConcat(const Context & context_) : context(context_) {} + explicit FunctionArrayConcat(const Context & context_) : context(context_) {} String getName() const override { return name; } diff --git a/dbms/src/Functions/array/arrayElement.cpp b/dbms/src/Functions/array/arrayElement.cpp index 7c610017b29..6113a16ddfa 100644 --- a/dbms/src/Functions/array/arrayElement.cpp +++ b/dbms/src/Functions/array/arrayElement.cpp @@ -95,7 +95,7 @@ namespace ArrayImpl class NullMapBuilder { public: - operator bool() const { return src_null_map; } + explicit operator bool() const { return src_null_map; } bool operator!() const { return !src_null_map; } void initSource(const UInt8 * src_null_map_) diff --git a/dbms/src/Functions/array/arrayIntersect.cpp b/dbms/src/Functions/array/arrayIntersect.cpp index 4673f4a7a05..ffeb6e99222 100644 --- a/dbms/src/Functions/array/arrayIntersect.cpp +++ b/dbms/src/Functions/array/arrayIntersect.cpp @@ -39,7 +39,7 @@ class FunctionArrayIntersect : public IFunction public: static constexpr auto name = "arrayIntersect"; static FunctionPtr create(const Context & context) { return std::make_shared(context); } - FunctionArrayIntersect(const Context & context_) : context(context_) {} + explicit FunctionArrayIntersect(const Context & context_) : context(context_) {} String getName() const override { return name; } diff --git a/dbms/src/Functions/array/arrayPushBack.cpp b/dbms/src/Functions/array/arrayPushBack.cpp index a9c4ed88a7a..74d9596dcd2 100644 --- a/dbms/src/Functions/array/arrayPushBack.cpp +++ b/dbms/src/Functions/array/arrayPushBack.cpp @@ -10,7 +10,7 @@ class FunctionArrayPushBack : public FunctionArrayPush public: static constexpr auto name = "arrayPushBack"; static FunctionPtr create(const Context & context) { return std::make_shared(context); } - FunctionArrayPushBack(const Context & context_) : FunctionArrayPush(context_, false, name) {} + explicit FunctionArrayPushBack(const Context & context_) : FunctionArrayPush(context_, false, name) {} }; void registerFunctionArrayPushBack(FunctionFactory & factory) diff --git a/dbms/src/Functions/array/arrayPushFront.cpp b/dbms/src/Functions/array/arrayPushFront.cpp index e0cc56c8ae2..ab8535b6672 100644 --- a/dbms/src/Functions/array/arrayPushFront.cpp +++ b/dbms/src/Functions/array/arrayPushFront.cpp @@ -11,7 +11,7 @@ class FunctionArrayPushFront : public FunctionArrayPush public: static constexpr auto name = "arrayPushFront"; static FunctionPtr create(const Context & context) { return std::make_shared(context); } - FunctionArrayPushFront(const Context & context_) : FunctionArrayPush(context_, true, name) {} + explicit FunctionArrayPushFront(const Context & context_) : FunctionArrayPush(context_, true, name) {} }; diff --git a/dbms/src/Functions/array/arrayResize.cpp b/dbms/src/Functions/array/arrayResize.cpp index 903a39aa4ab..9e34e7ccd92 100644 --- a/dbms/src/Functions/array/arrayResize.cpp +++ b/dbms/src/Functions/array/arrayResize.cpp @@ -26,7 +26,7 @@ class FunctionArrayResize : public IFunction public: static constexpr auto name = "arrayResize"; static FunctionPtr create(const Context & context) { return std::make_shared(context); } - FunctionArrayResize(const Context & context_) : context(context_) {} + explicit FunctionArrayResize(const Context & context_) : context(context_) {} String getName() const override { return name; } diff --git a/dbms/src/Functions/array/arraySort.cpp b/dbms/src/Functions/array/arraySort.cpp index 17a711e8902..478c7e52614 100644 --- a/dbms/src/Functions/array/arraySort.cpp +++ b/dbms/src/Functions/array/arraySort.cpp @@ -23,7 +23,7 @@ struct ArraySortImpl { const IColumn & column; - Less(const IColumn & column_) : column(column_) {} + explicit Less(const IColumn & column_) : column(column_) {} bool operator()(size_t lhs, size_t rhs) const { diff --git a/dbms/src/Functions/array/hasAll.cpp b/dbms/src/Functions/array/hasAll.cpp index 6ae1640e382..8d833adb5f5 100644 --- a/dbms/src/Functions/array/hasAll.cpp +++ b/dbms/src/Functions/array/hasAll.cpp @@ -10,7 +10,7 @@ class FunctionArrayHasAll : public FunctionArrayHasAllAny public: static constexpr auto name = "hasAll"; static FunctionPtr create(const Context & context) { return std::make_shared(context); } - FunctionArrayHasAll(const Context & context_) : FunctionArrayHasAllAny(context_, true, name) {} + explicit FunctionArrayHasAll(const Context & context_) : FunctionArrayHasAllAny(context_, true, name) {} }; void registerFunctionHasAll(FunctionFactory & factory) diff --git a/dbms/src/Functions/array/hasAny.cpp b/dbms/src/Functions/array/hasAny.cpp index 756e5311b50..84a3a736364 100644 --- a/dbms/src/Functions/array/hasAny.cpp +++ b/dbms/src/Functions/array/hasAny.cpp @@ -10,7 +10,7 @@ class FunctionArrayHasAny : public FunctionArrayHasAllAny public: static constexpr auto name = "hasAny"; static FunctionPtr create(const Context & context) { return std::make_shared(context); } - FunctionArrayHasAny(const Context & context_) : FunctionArrayHasAllAny(context_, false, name) {} + explicit FunctionArrayHasAny(const Context & context_) : FunctionArrayHasAllAny(context_, false, name) {} }; void registerFunctionHasAny(FunctionFactory & factory) diff --git a/dbms/src/Functions/array/range.cpp b/dbms/src/Functions/array/range.cpp index b04dcce7519..283eb760fcf 100644 --- a/dbms/src/Functions/array/range.cpp +++ b/dbms/src/Functions/array/range.cpp @@ -28,7 +28,7 @@ public: static constexpr auto name = "range"; static constexpr size_t max_elements = 100'000'000; static FunctionPtr create(const Context & context_) { return std::make_shared(context_); } - FunctionRange(const Context & context_) : context(context_) {} + explicit FunctionRange(const Context & context_) : context(context_) {} private: const Context & context; diff --git a/dbms/src/Functions/bitCount.cpp b/dbms/src/Functions/bitCount.cpp index 17805254c02..73df2c680da 100644 --- a/dbms/src/Functions/bitCount.cpp +++ b/dbms/src/Functions/bitCount.cpp @@ -26,7 +26,7 @@ struct BitCountImpl if constexpr (std::is_same_v) return __builtin_popcount(static_cast(a)); else - return __builtin_popcountll(ext::bit_cast(a)); + return __builtin_popcountll(ext::bit_cast(a)); } #if USE_EMBEDDED_COMPILER diff --git a/dbms/src/Functions/caseWithExpression.cpp b/dbms/src/Functions/caseWithExpression.cpp index e1e124fd1ef..dc48536b4a7 100644 --- a/dbms/src/Functions/caseWithExpression.cpp +++ b/dbms/src/Functions/caseWithExpression.cpp @@ -21,7 +21,7 @@ public: static FunctionPtr create(const Context & context_) { return std::make_shared(context_); } public: - FunctionCaseWithExpression(const Context & context_) : context(context_) {} + explicit FunctionCaseWithExpression(const Context & context_) : context(context_) {} bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } String getName() const override { return name; } diff --git a/dbms/src/Functions/coalesce.cpp b/dbms/src/Functions/coalesce.cpp index 6560aa88cc3..9d5d4df4a7f 100644 --- a/dbms/src/Functions/coalesce.cpp +++ b/dbms/src/Functions/coalesce.cpp @@ -26,7 +26,7 @@ public: return std::make_shared(context); } - FunctionCoalesce(const Context & context_) : context(context_) {} + explicit FunctionCoalesce(const Context & context_) : context(context_) {} std::string getName() const override { diff --git a/dbms/src/Functions/concat.cpp b/dbms/src/Functions/concat.cpp index 224dc32ca33..fd9448a5327 100644 --- a/dbms/src/Functions/concat.cpp +++ b/dbms/src/Functions/concat.cpp @@ -32,7 +32,7 @@ class ConcatImpl : public IFunction { public: static constexpr auto name = Name::name; - ConcatImpl(const Context & context_) : context(context_) {} + explicit ConcatImpl(const Context & context_) : context(context_) {} static FunctionPtr create(const Context & context) { return std::make_shared(context); } String getName() const override { return name; } diff --git a/dbms/src/Functions/evalMLMethod.cpp b/dbms/src/Functions/evalMLMethod.cpp index e9fe3c087c7..4f6ca87a17b 100644 --- a/dbms/src/Functions/evalMLMethod.cpp +++ b/dbms/src/Functions/evalMLMethod.cpp @@ -35,7 +35,7 @@ public: { return std::make_shared(context); } - FunctionEvalMLMethod(const Context & context_) : context(context_) + explicit FunctionEvalMLMethod(const Context & context_) : context(context_) {} String getName() const override diff --git a/dbms/src/Functions/formatDateTime.cpp b/dbms/src/Functions/formatDateTime.cpp index 5a3ee7b1c48..5273348d00e 100644 --- a/dbms/src/Functions/formatDateTime.cpp +++ b/dbms/src/Functions/formatDateTime.cpp @@ -93,7 +93,7 @@ private: Func func; size_t shift; - Action(Func func_, size_t shift_ = 0) : func(func_), shift(shift_) {} + explicit Action(Func func_, size_t shift_ = 0) : func(func_), shift(shift_) {} void perform(char *& target, Time source, const DateLUTImpl & timezone) { diff --git a/dbms/src/Functions/getMacro.cpp b/dbms/src/Functions/getMacro.cpp index 02dec99d9d8..ca0c135d38a 100644 --- a/dbms/src/Functions/getMacro.cpp +++ b/dbms/src/Functions/getMacro.cpp @@ -33,7 +33,7 @@ public: return std::make_shared(context.getMacros()); } - FunctionGetMacro(MultiVersion::Version macros_) : macros(std::move(macros_)) {} + explicit FunctionGetMacro(MultiVersion::Version macros_) : macros(std::move(macros_)) {} String getName() const override { diff --git a/dbms/src/Functions/getScalar.cpp b/dbms/src/Functions/getScalar.cpp index d6c0d79557c..a6e9d00148e 100644 --- a/dbms/src/Functions/getScalar.cpp +++ b/dbms/src/Functions/getScalar.cpp @@ -27,7 +27,7 @@ public: return std::make_shared(context); } - FunctionGetScalar(const Context & context_) : context(context_) {} + explicit FunctionGetScalar(const Context & context_) : context(context_) {} String getName() const override { diff --git a/dbms/src/Functions/if.cpp b/dbms/src/Functions/if.cpp index 220f1505bcd..dfafbe36471 100644 --- a/dbms/src/Functions/if.cpp +++ b/dbms/src/Functions/if.cpp @@ -173,7 +173,7 @@ class FunctionIf : public FunctionIfBase public: static constexpr auto name = "if"; static FunctionPtr create(const Context & context) { return std::make_shared(context); } - FunctionIf(const Context & context_) : context(context_) {} + explicit FunctionIf(const Context & context_) : context(context_) {} private: template diff --git a/dbms/src/Functions/ifNotFinite.cpp b/dbms/src/Functions/ifNotFinite.cpp index 605eeddf515..184c08877b9 100644 --- a/dbms/src/Functions/ifNotFinite.cpp +++ b/dbms/src/Functions/ifNotFinite.cpp @@ -15,7 +15,7 @@ class FunctionIfNotFinite : public IFunction public: static constexpr auto name = "ifNotFinite"; - FunctionIfNotFinite(const Context & context_) : context(context_) {} + explicit FunctionIfNotFinite(const Context & context_) : context(context_) {} static FunctionPtr create(const Context & context) { diff --git a/dbms/src/Functions/ifNull.cpp b/dbms/src/Functions/ifNull.cpp index 05e9ded387e..1be8916d795 100644 --- a/dbms/src/Functions/ifNull.cpp +++ b/dbms/src/Functions/ifNull.cpp @@ -19,7 +19,7 @@ class FunctionIfNull : public IFunction public: static constexpr auto name = "ifNull"; - FunctionIfNull(const Context & context_) : context(context_) {} + explicit FunctionIfNull(const Context & context_) : context(context_) {} static FunctionPtr create(const Context & context) { diff --git a/dbms/src/Functions/multiIf.cpp b/dbms/src/Functions/multiIf.cpp index 68609af9102..2340f7826c7 100644 --- a/dbms/src/Functions/multiIf.cpp +++ b/dbms/src/Functions/multiIf.cpp @@ -34,7 +34,7 @@ class FunctionMultiIf final : public FunctionIfBase public: static constexpr auto name = "multiIf"; static FunctionPtr create(const Context & context) { return std::make_shared(context); } - FunctionMultiIf(const Context & context_) : context(context_) {} + explicit FunctionMultiIf(const Context & context_) : context(context_) {} public: String getName() const override { return name; } diff --git a/dbms/src/Functions/neighbor.cpp b/dbms/src/Functions/neighbor.cpp index 1080507ced5..dc83aeb0d31 100644 --- a/dbms/src/Functions/neighbor.cpp +++ b/dbms/src/Functions/neighbor.cpp @@ -29,7 +29,7 @@ public: static constexpr auto name = "neighbor"; static FunctionPtr create(const Context & context) { return std::make_shared(context); } - FunctionNeighbor(const Context & context_) : context(context_) {} + explicit FunctionNeighbor(const Context & context_) : context(context_) {} /// Get the name of the function. String getName() const override { return name; } diff --git a/dbms/src/Functions/nullIf.cpp b/dbms/src/Functions/nullIf.cpp index 12b9124863e..34655ebece6 100644 --- a/dbms/src/Functions/nullIf.cpp +++ b/dbms/src/Functions/nullIf.cpp @@ -25,7 +25,7 @@ public: return std::make_shared(context); } - FunctionNullIf(const Context & context_) : context(context_) {} + explicit FunctionNullIf(const Context & context_) : context(context_) {} std::string getName() const override { diff --git a/dbms/src/IO/AIO.cpp b/dbms/src/IO/AIO.cpp index ed22b263a94..33fb79fcf95 100644 --- a/dbms/src/IO/AIO.cpp +++ b/dbms/src/IO/AIO.cpp @@ -30,12 +30,12 @@ int io_destroy(aio_context_t ctx) return syscall(__NR_io_destroy, ctx); } -int io_submit(aio_context_t ctx, long nr, struct iocb * iocbpp[]) +int io_submit(aio_context_t ctx, long nr, struct iocb * iocbpp[]) // NOLINT { return syscall(__NR_io_submit, ctx, nr, iocbpp); } -int io_getevents(aio_context_t ctx, long min_nr, long max_nr, io_event * events, struct timespec * timeout) +int io_getevents(aio_context_t ctx, long min_nr, long max_nr, io_event * events, struct timespec * timeout) // NOLINT { return syscall(__NR_io_getevents, ctx, min_nr, max_nr, events, timeout); } diff --git a/dbms/src/IO/ReadBufferFromHDFS.cpp b/dbms/src/IO/ReadBufferFromHDFS.cpp index 42419be3117..6d40f8326c2 100644 --- a/dbms/src/IO/ReadBufferFromHDFS.cpp +++ b/dbms/src/IO/ReadBufferFromHDFS.cpp @@ -22,7 +22,7 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl HDFSBuilderPtr builder; HDFSFSPtr fs; - ReadBufferFromHDFSImpl(const std::string & hdfs_name_) + explicit ReadBufferFromHDFSImpl(const std::string & hdfs_name_) : hdfs_uri(hdfs_name_) , builder(createHDFSBuilder(hdfs_uri)) , fs(createHDFSFS(builder.get())) diff --git a/dbms/src/IO/ReadHelpers.cpp b/dbms/src/IO/ReadHelpers.cpp index eba724f2193..7c3c99ecd7d 100644 --- a/dbms/src/IO/ReadHelpers.cpp +++ b/dbms/src/IO/ReadHelpers.cpp @@ -752,9 +752,9 @@ ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf) UInt16 year = 0; if (!append_digit(year) - || !append_digit(year) - || !append_digit(year) - || !append_digit(year)) + || !append_digit(year) // NOLINT + || !append_digit(year) // NOLINT + || !append_digit(year)) // NOLINT return error(); if (!ignore_delimiter()) diff --git a/dbms/src/IO/S3Common.cpp b/dbms/src/IO/S3Common.cpp index 700901ebc02..e169eff7846 100644 --- a/dbms/src/IO/S3Common.cpp +++ b/dbms/src/IO/S3Common.cpp @@ -36,7 +36,7 @@ public: Aws::Utils::Logging::LogLevel GetLogLevel() const final { return Aws::Utils::Logging::LogLevel::Trace; } - void Log(Aws::Utils::Logging::LogLevel log_level, const char * tag, const char * format_str, ...) final + void Log(Aws::Utils::Logging::LogLevel log_level, const char * tag, const char * format_str, ...) final // NOLINT { auto & [level, prio] = convertLogLevel(log_level); LOG_SIMPLE(log, std::string(tag) + ": " + format_str, level, prio); diff --git a/dbms/src/IO/WriteBufferFromHDFS.cpp b/dbms/src/IO/WriteBufferFromHDFS.cpp index 9733d761ee4..0793a966559 100644 --- a/dbms/src/IO/WriteBufferFromHDFS.cpp +++ b/dbms/src/IO/WriteBufferFromHDFS.cpp @@ -26,7 +26,7 @@ struct WriteBufferFromHDFS::WriteBufferFromHDFSImpl HDFSBuilderPtr builder; HDFSFSPtr fs; - WriteBufferFromHDFSImpl(const std::string & hdfs_name_) + explicit WriteBufferFromHDFSImpl(const std::string & hdfs_name_) : hdfs_uri(hdfs_name_) , builder(createHDFSBuilder(hdfs_uri)) , fs(createHDFSFS(builder.get())) diff --git a/dbms/src/IO/tests/gtest_aio_seek_back_after_eof.cpp b/dbms/src/IO/tests/gtest_aio_seek_back_after_eof.cpp index 22cfffdda05..5f1c1c0b945 100644 --- a/dbms/src/IO/tests/gtest_aio_seek_back_after_eof.cpp +++ b/dbms/src/IO/tests/gtest_aio_seek_back_after_eof.cpp @@ -5,9 +5,11 @@ #include #include #include +#include #include #include + namespace { std::string createTmpFileForEOFtest() @@ -21,7 +23,7 @@ std::string createTmpFileForEOFtest() { /// We have no tmp in docker /// So we have to use root - std::string almost_rand_dir = std::string{"/"} + std::to_string(rand()) + "foo"; + std::string almost_rand_dir = std::string{"/"} + std::to_string(randomSeed()) + "foo"; return almost_rand_dir; } diff --git a/dbms/src/IO/tests/gtest_bit_io.cpp b/dbms/src/IO/tests/gtest_bit_io.cpp index 5291dddd25e..98539ea85a3 100644 --- a/dbms/src/IO/tests/gtest_bit_io.cpp +++ b/dbms/src/IO/tests/gtest_bit_io.cpp @@ -32,7 +32,7 @@ std::string bin(const T & value, size_t bits = sizeof(T) * 8) static const uint8_t MAX_BITS = sizeof(T)*8; assert(bits <= MAX_BITS); - return std::bitset(static_cast(value)) + return std::bitset(static_cast(value)) .to_string().substr(MAX_BITS - bits, bits); } @@ -112,7 +112,7 @@ struct TestCaseParameter std::vector> bits_and_vals; std::string expected_buffer_binary; - TestCaseParameter(std::vector> vals, std::string binary = std::string{}) + TestCaseParameter(std::vector> vals, std::string binary = std::string{}) // NOLINT : bits_and_vals(std::move(vals)), expected_buffer_binary(binary) {} diff --git a/dbms/src/IO/tests/hashing_read_buffer.cpp b/dbms/src/IO/tests/hashing_read_buffer.cpp index cb6108d15d8..be31d5b05d3 100644 --- a/dbms/src/IO/tests/hashing_read_buffer.cpp +++ b/dbms/src/IO/tests/hashing_read_buffer.cpp @@ -3,14 +3,18 @@ #include #include "hashing_buffer.h" #include +#include + static void test(size_t data_size) { + pcg64 rng; + std::vector vec(data_size); char * data = vec.data(); for (size_t i = 0; i < data_size; ++i) - data[i] = rand() & 255; + data[i] = rng() & 255; CityHash_v1_0_2::uint128 reference = referenceHash(data, data_size); diff --git a/dbms/src/IO/tests/hashing_write_buffer.cpp b/dbms/src/IO/tests/hashing_write_buffer.cpp index cf7c18d1c77..461c39139c4 100644 --- a/dbms/src/IO/tests/hashing_write_buffer.cpp +++ b/dbms/src/IO/tests/hashing_write_buffer.cpp @@ -1,15 +1,18 @@ #include #include +#include #include "hashing_buffer.h" static void test(size_t data_size) { + pcg64 rng; + std::vector vec(data_size); char * data = vec.data(); for (size_t i = 0; i < data_size; ++i) - data[i] = rand() & 255; + data[i] = rng() & 255; CityHash_v1_0_2::uint128 reference = referenceHash(data, data_size); @@ -20,14 +23,14 @@ static void test(size_t data_size) for (size_t pos = 0; pos < data_size;) { - size_t len = std::min(static_cast(rand() % 10000 + 1), data_size - pos); + size_t len = std::min(static_cast(rng() % 10000 + 1), data_size - pos); buf.write(data + pos, len); buf.next(); pos += len; } if (buf.getHash() != reference) - FAIL("failed on data size " << data_size << " writing random chunks of up to 10000 bytes"); + FAIL("failed on data size " << data_size << " writing rngom chunks of up to 10000 bytes"); } { @@ -35,14 +38,14 @@ static void test(size_t data_size) for (size_t pos = 0; pos < data_size;) { - size_t len = std::min(static_cast(rand() % 5 + 1), data_size - pos); + size_t len = std::min(static_cast(rng() % 5 + 1), data_size - pos); buf.write(data + pos, len); buf.next(); pos += len; } if (buf.getHash() != reference) - FAIL("failed on data size " << data_size << " writing random chunks of up to 5 bytes"); + FAIL("failed on data size " << data_size << " writing rngom chunks of up to 5 bytes"); } { @@ -50,14 +53,14 @@ static void test(size_t data_size) for (size_t pos = 0; pos < data_size;) { - size_t len = std::min(static_cast(2048 + rand() % 3 - 1), data_size - pos); + size_t len = std::min(static_cast(2048 + rng() % 3 - 1), data_size - pos); buf.write(data + pos, len); buf.next(); pos += len; } if (buf.getHash() != reference) - FAIL("failed on data size " << data_size << " writing random chunks of 2048 +-1 bytes"); + FAIL("failed on data size " << data_size << " writing rngom chunks of 2048 +-1 bytes"); } { diff --git a/dbms/src/IO/tests/parse_int_perf.cpp b/dbms/src/IO/tests/parse_int_perf.cpp index 11558289d24..8e0185df239 100644 --- a/dbms/src/IO/tests/parse_int_perf.cpp +++ b/dbms/src/IO/tests/parse_int_perf.cpp @@ -40,7 +40,7 @@ int main(int argc, char ** argv) using T = UInt8; - size_t n = atoi(argv[1]); + size_t n = std::stol(argv[1]); std::vector data(n); std::vector data2(n); diff --git a/dbms/src/IO/tests/ryu_test.cpp b/dbms/src/IO/tests/ryu_test.cpp index d8c385f2d0b..0713a01960f 100644 --- a/dbms/src/IO/tests/ryu_test.cpp +++ b/dbms/src/IO/tests/ryu_test.cpp @@ -6,7 +6,7 @@ struct DecomposedFloat64 { - DecomposedFloat64(double x) + explicit DecomposedFloat64(double x) { memcpy(&x_uint, &x, sizeof(x)); } @@ -43,7 +43,7 @@ struct DecomposedFloat64 struct DecomposedFloat32 { - DecomposedFloat32(float x) + explicit DecomposedFloat32(float x) { memcpy(&x_uint, &x, sizeof(x)); } diff --git a/dbms/src/IO/tests/valid_utf8_perf.cpp b/dbms/src/IO/tests/valid_utf8_perf.cpp index b483c333934..ce9edcbb382 100644 --- a/dbms/src/IO/tests/valid_utf8_perf.cpp +++ b/dbms/src/IO/tests/valid_utf8_perf.cpp @@ -12,7 +12,7 @@ int main(int argc, char ** argv) { int repeats = 1; if (argc >= 2) - repeats = atoi(argv[1]); + repeats = std::stol(argv[1]); std::string text((std::istreambuf_iterator(std::cin)), std::istreambuf_iterator()); diff --git a/dbms/src/Interpreters/ActionsVisitor.cpp b/dbms/src/Interpreters/ActionsVisitor.cpp index bfb786dc850..3e07105c2f1 100644 --- a/dbms/src/Interpreters/ActionsVisitor.cpp +++ b/dbms/src/Interpreters/ActionsVisitor.cpp @@ -496,18 +496,17 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & { /// If the argument is not a lambda expression, call it recursively and find out its type. visit(child, data); - std::string name = child_column_name; - if (data.hasColumn(name)) + if (data.hasColumn(child_column_name)) { - argument_types.push_back(data.getSampleBlock().getByName(name).type); - argument_names.push_back(name); + argument_types.push_back(data.getSampleBlock().getByName(child_column_name).type); + argument_names.push_back(child_column_name); } else { if (data.only_consts) arguments_present = false; else - throw Exception("Unknown identifier: " + name, ErrorCodes::UNKNOWN_IDENTIFIER); + throw Exception("Unknown identifier: " + child_column_name, ErrorCodes::UNKNOWN_IDENTIFIER); } } } diff --git a/dbms/src/Interpreters/CatBoostModel.cpp b/dbms/src/Interpreters/CatBoostModel.cpp index 3d365ab3927..ca286aa5ab5 100644 --- a/dbms/src/Interpreters/CatBoostModel.cpp +++ b/dbms/src/Interpreters/CatBoostModel.cpp @@ -54,7 +54,7 @@ struct CatBoostWrapperAPI double * result, size_t resultSize); int (* GetStringCatFeatureHash)(const char * data, size_t size); - int (* GetIntegerCatFeatureHash)(long long val); + int (* GetIntegerCatFeatureHash)(uint64_t val); size_t (* GetFloatFeaturesCount)(ModelCalcerHandle* calcer); size_t (* GetCatFeaturesCount)(ModelCalcerHandle* calcer); diff --git a/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp b/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp index c05e617f84d..4e249ecb44a 100644 --- a/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp +++ b/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp @@ -31,7 +31,7 @@ namespace struct JoinedElement { - JoinedElement(const ASTTablesInSelectQueryElement & table_element) + explicit JoinedElement(const ASTTablesInSelectQueryElement & table_element) : element(table_element) { if (element.table_join) diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp index a10b953e644..4bbd6ea3ee5 100644 --- a/dbms/src/Interpreters/DDLWorker.cpp +++ b/dbms/src/Interpreters/DDLWorker.cpp @@ -794,7 +794,7 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica( /// Does nothing if wasn't previously locked lock->unlock(); - std::this_thread::sleep_for(std::chrono::milliseconds(std::uniform_int_distribution(0, 1000)(rng))); + std::this_thread::sleep_for(std::chrono::milliseconds(std::uniform_int_distribution(0, 1000)(rng))); } /// Not executed by leader so was not executed at all diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index 656df317554..9b60323a885 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -137,7 +137,7 @@ struct SymbolResolver : public llvm::orc::SymbolResolver { llvm::LegacyJITSymbolResolver & impl; - SymbolResolver(llvm::LegacyJITSymbolResolver & impl_) : impl(impl_) {} + explicit SymbolResolver(llvm::LegacyJITSymbolResolver & impl_) : impl(impl_) {} llvm::orc::SymbolNameSet getResponsibilitySet(const llvm::orc::SymbolNameSet & symbols) final { diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index 64c6342cf04..d9f0cfb55cb 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -153,7 +153,7 @@ public: /** Add block of data from right hand of JOIN to the map. * Returns false, if some limit was exceeded and you should not insert more data. */ - bool addJoinedBlock(const Block & block, bool check_limits = true) override; + bool addJoinedBlock(const Block & block, bool check_limits) override; /** Join data from the map (that was previously built by calls to addJoinedBlock) to the block with data from "left" table. * Could be called from different threads in parallel. diff --git a/dbms/src/Interpreters/JoinSwitcher.h b/dbms/src/Interpreters/JoinSwitcher.h index 5e677b5205f..ecf042fb7ac 100644 --- a/dbms/src/Interpreters/JoinSwitcher.h +++ b/dbms/src/Interpreters/JoinSwitcher.h @@ -20,7 +20,7 @@ public: /// Add block of data from right hand of JOIN into current join object. /// If join-in-memory memory limit exceeded switches to join-on-disk and continue with it. /// @returns false, if join-on-disk disk limit exceeded - bool addJoinedBlock(const Block & block, bool check_limits = true) override; + bool addJoinedBlock(const Block & block, bool check_limits) override; void joinBlock(Block & block, std::shared_ptr & not_processed) override { diff --git a/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index 39037613dda..dbe58cfc86a 100644 --- a/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -129,7 +129,7 @@ struct ColumnAliasesMatcher std::vector> compound_identifiers; std::set allowed_long_names; /// original names allowed as aliases '--t.x as t.x' (select expressions only). - Data(const std::vector && tables_) + explicit Data(const std::vector && tables_) : tables(tables_) , public_names(false) {} diff --git a/dbms/src/Interpreters/MergeJoin.h b/dbms/src/Interpreters/MergeJoin.h index 7d934aed06a..74a11fc05e4 100644 --- a/dbms/src/Interpreters/MergeJoin.h +++ b/dbms/src/Interpreters/MergeJoin.h @@ -50,7 +50,7 @@ class MergeJoin : public IJoin public: MergeJoin(std::shared_ptr table_join_, const Block & right_sample_block); - bool addJoinedBlock(const Block & block, bool check_limits = true) override; + bool addJoinedBlock(const Block & block, bool check_limits) override; void joinBlock(Block &, ExtraBlockPtr & not_processed) override; void joinTotals(Block &) const override; void setTotals(const Block &) override; diff --git a/dbms/src/Interpreters/QueryNormalizer.cpp b/dbms/src/Interpreters/QueryNormalizer.cpp index cb084ee2cbc..568b08b8f5a 100644 --- a/dbms/src/Interpreters/QueryNormalizer.cpp +++ b/dbms/src/Interpreters/QueryNormalizer.cpp @@ -26,7 +26,7 @@ namespace ErrorCodes class CheckASTDepth { public: - CheckASTDepth(QueryNormalizer::Data & data_) + explicit CheckASTDepth(QueryNormalizer::Data & data_) : data(data_) { if (data.level > data.settings.max_ast_depth) @@ -47,7 +47,7 @@ private: class RestoreAliasOnExitScope { public: - RestoreAliasOnExitScope(String & alias_) + explicit RestoreAliasOnExitScope(String & alias_) : alias(alias_) , copy(alias_) {} diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index 0379d928442..5e35c03a577 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -806,6 +806,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect( /// TODO: Remove unneeded conversion std::vector tables_with_column_names; + tables_with_column_names.reserve(tables_with_columns.size()); for (const auto & table : tables_with_columns) tables_with_column_names.emplace_back(table.removeTypes()); diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 27542b97691..a0f411dcc96 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -34,7 +34,7 @@ bool TranslateQualifiedNamesMatcher::Data::unknownColumn(size_t table_pos, const auto nested1 = IdentifierSemantic::extractNestedName(identifier, table.table); auto nested2 = IdentifierSemantic::extractNestedName(identifier, table.alias); - String short_name = identifier.shortName(); + const String & short_name = identifier.shortName(); const Names & column_names = tables[table_pos].columns; for (auto & known_name : column_names) { diff --git a/dbms/src/Interpreters/tests/aggregate.cpp b/dbms/src/Interpreters/tests/aggregate.cpp index df498d6039d..9959bca7aac 100644 --- a/dbms/src/Interpreters/tests/aggregate.cpp +++ b/dbms/src/Interpreters/tests/aggregate.cpp @@ -20,7 +20,7 @@ int main(int argc, char ** argv) try { - size_t n = argc == 2 ? atoi(argv[1]) : 10; + size_t n = argc == 2 ? std::stol(argv[1]) : 10; Block block; diff --git a/dbms/src/Interpreters/tests/expression.cpp b/dbms/src/Interpreters/tests/expression.cpp index bf0058e5289..8d64b4f64ce 100644 --- a/dbms/src/Interpreters/tests/expression.cpp +++ b/dbms/src/Interpreters/tests/expression.cpp @@ -63,7 +63,7 @@ int main(int argc, char ** argv) chain.finalize(); ExpressionActionsPtr expression = chain.getLastActions(); - size_t n = argc == 2 ? atoi(argv[1]) : 10; + size_t n = argc == 2 ? std::stol(argv[1]) : 10; Block block; diff --git a/dbms/src/Interpreters/tests/hash_map.cpp b/dbms/src/Interpreters/tests/hash_map.cpp index 7c52953fa9f..bc35bea16bb 100644 --- a/dbms/src/Interpreters/tests/hash_map.cpp +++ b/dbms/src/Interpreters/tests/hash_map.cpp @@ -92,8 +92,8 @@ int main(int argc, char ** argv) using Value = std::vector; #endif - size_t n = argc < 2 ? 10000000 : atoi(argv[1]); - //size_t m = atoi(argv[2]); + size_t n = argc < 2 ? 10000000 : std::stol(argv[1]); + //size_t m = std::stol(argv[2]); AggregateFunctionFactory factory; DataTypes data_types_empty; @@ -149,7 +149,7 @@ int main(int argc, char ** argv) << std::endl; } - if (argc < 3 || atoi(argv[2]) == 1) + if (argc < 3 || std::stol(argv[2]) == 1) { Stopwatch watch; @@ -179,7 +179,7 @@ int main(int argc, char ** argv) << std::endl; } - if (argc < 3 || atoi(argv[2]) == 2) + if (argc < 3 || std::stol(argv[2]) == 2) { Stopwatch watch; @@ -211,7 +211,7 @@ int main(int argc, char ** argv) } #if defined(__x86_64__) - if (argc < 3 || atoi(argv[2]) == 3) + if (argc < 3 || std::stol(argv[2]) == 3) { Stopwatch watch; @@ -243,7 +243,7 @@ int main(int argc, char ** argv) } #endif - if (argc < 3 || atoi(argv[2]) == 4) + if (argc < 3 || std::stol(argv[2]) == 4) { Stopwatch watch; @@ -263,7 +263,7 @@ int main(int argc, char ** argv) << std::endl; } - if (argc < 3 || atoi(argv[2]) == 5) + if (argc < 3 || std::stol(argv[2]) == 5) { Stopwatch watch; @@ -284,7 +284,7 @@ int main(int argc, char ** argv) << std::endl; } - if (argc < 3 || atoi(argv[2]) == 6) + if (argc < 3 || std::stol(argv[2]) == 6) { Stopwatch watch; diff --git a/dbms/src/Interpreters/tests/hash_map_lookup.cpp b/dbms/src/Interpreters/tests/hash_map_lookup.cpp index 13ff3234e8b..926e6b6766e 100644 --- a/dbms/src/Interpreters/tests/hash_map_lookup.cpp +++ b/dbms/src/Interpreters/tests/hash_map_lookup.cpp @@ -97,8 +97,8 @@ int main(int argc, char ** argv) return 1; } - size_t n = atoi(argv[1]); - size_t m = atoi(argv[2]); + size_t n = std::stol(argv[1]); + size_t m = std::stol(argv[2]); std::vector data(n); diff --git a/dbms/src/Interpreters/tests/hash_map_string.cpp b/dbms/src/Interpreters/tests/hash_map_string.cpp index f6103556986..26f69f95e5e 100644 --- a/dbms/src/Interpreters/tests/hash_map_string.cpp +++ b/dbms/src/Interpreters/tests/hash_map_string.cpp @@ -292,8 +292,8 @@ int main(int argc, char ** argv) return 1; } - size_t n = atoi(argv[1]); - size_t m = atoi(argv[2]); + size_t n = std::stol(argv[1]); + size_t m = std::stol(argv[2]); DB::Arena pool; std::vector data(n); diff --git a/dbms/src/Interpreters/tests/hash_map_string_2.cpp b/dbms/src/Interpreters/tests/hash_map_string_2.cpp index 8e13ee46e6d..5f6954cb0b0 100644 --- a/dbms/src/Interpreters/tests/hash_map_string_2.cpp +++ b/dbms/src/Interpreters/tests/hash_map_string_2.cpp @@ -614,8 +614,8 @@ int main(int argc, char ** argv) return 1; } - size_t n = atoi(argv[1]); - size_t m = atoi(argv[2]); + size_t n = std::stol(argv[1]); + size_t m = std::stol(argv[2]); DB::Arena pool; std::vector data(n); diff --git a/dbms/src/Interpreters/tests/hash_map_string_3.cpp b/dbms/src/Interpreters/tests/hash_map_string_3.cpp index cc21129a6a6..3dfbe5fb0f2 100644 --- a/dbms/src/Interpreters/tests/hash_map_string_3.cpp +++ b/dbms/src/Interpreters/tests/hash_map_string_3.cpp @@ -466,8 +466,8 @@ int main(int argc, char ** argv) return 1; } - size_t n = atoi(argv[1]); - size_t m = atoi(argv[2]); + size_t n = std::stol(argv[1]); + size_t m = std::stol(argv[2]); DB::Arena pool; std::vector data(n); diff --git a/dbms/src/Interpreters/tests/hash_map_string_small.cpp b/dbms/src/Interpreters/tests/hash_map_string_small.cpp index 18ee1eb0531..7dac9691dc9 100644 --- a/dbms/src/Interpreters/tests/hash_map_string_small.cpp +++ b/dbms/src/Interpreters/tests/hash_map_string_small.cpp @@ -102,8 +102,8 @@ int main(int argc, char ** argv) return 1; } - size_t n = atoi(argv[1]); - size_t m = atoi(argv[2]); + size_t n = std::stol(argv[1]); + size_t m = std::stol(argv[2]); DB::Arena pool; std::vector data(n); diff --git a/dbms/src/Interpreters/tests/string_hash_map.cpp b/dbms/src/Interpreters/tests/string_hash_map.cpp index 37fbefbe987..3969458fced 100644 --- a/dbms/src/Interpreters/tests/string_hash_map.cpp +++ b/dbms/src/Interpreters/tests/string_hash_map.cpp @@ -211,8 +211,8 @@ int main(int argc, char ** argv) return 1; } - size_t n = atoi(argv[1]); - size_t m = atoi(argv[2]); + size_t n = std::stol(argv[1]); + size_t m = std::stol(argv[2]); DB::Arena pool(128 * 1024 * 1024); std::vector data(n); diff --git a/dbms/src/Interpreters/tests/two_level_hash_map.cpp b/dbms/src/Interpreters/tests/two_level_hash_map.cpp index e1370cd3932..604f0de2976 100644 --- a/dbms/src/Interpreters/tests/two_level_hash_map.cpp +++ b/dbms/src/Interpreters/tests/two_level_hash_map.cpp @@ -31,7 +31,7 @@ int main(int argc, char ** argv) return 1; } - size_t n = atoi(argv[1]); + size_t n = std::stol(argv[1]); std::vector data(n); diff --git a/dbms/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index a6eed90d57a..52ceaf063b7 100644 --- a/dbms/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -75,7 +75,7 @@ namespace ErrorCodes class InputStreamReadBufferAdapter : public avro::InputStream { public: - InputStreamReadBufferAdapter(ReadBuffer & in_) : in(in_) {} + explicit InputStreamReadBufferAdapter(ReadBuffer & in_) : in(in_) {} bool next(const uint8_t ** data, size_t * len) override { @@ -444,7 +444,7 @@ AvroDeserializer::SkipFn AvroDeserializer::createSkipFn(avro::NodePtr root_node) AvroDeserializer::AvroDeserializer(const ColumnsWithTypeAndName & columns, avro::ValidSchema schema) { - auto schema_root = schema.root(); + const auto & schema_root = schema.root(); if (schema_root->type() != avro::AVRO_RECORD) { throw Exception("Root schema must be a record", ErrorCodes::TYPE_MISMATCH); @@ -519,7 +519,7 @@ bool AvroRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &) class AvroConfluentRowInputFormat::SchemaRegistry { public: - SchemaRegistry(const std::string & base_url_, size_t schema_cache_max_size = 1000) + explicit SchemaRegistry(const std::string & base_url_, size_t schema_cache_max_size = 1000) : base_url(base_url_), schema_cache(schema_cache_max_size) { if (base_url.empty()) diff --git a/dbms/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp index 65fac6d87f7..97106d5e297 100644 --- a/dbms/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp @@ -52,7 +52,7 @@ namespace ErrorCodes class OutputStreamWriteBufferAdapter : public avro::OutputStream { public: - OutputStreamWriteBufferAdapter(WriteBuffer & out_) : out(out_) {} + explicit OutputStreamWriteBufferAdapter(WriteBuffer & out_) : out(out_) {} virtual bool next(uint8_t ** data, size_t * len) override { diff --git a/dbms/src/Processors/tests/processors_test.cpp b/dbms/src/Processors/tests/processors_test.cpp index 3e2e6abd1da..b0270932234 100644 --- a/dbms/src/Processors/tests/processors_test.cpp +++ b/dbms/src/Processors/tests/processors_test.cpp @@ -122,7 +122,7 @@ class PrintSink : public ISink public: String getName() const override { return "Print"; } - PrintSink(String prefix_) + explicit PrintSink(String prefix_) : ISink(Block({ColumnWithTypeAndName{ ColumnUInt64::create(), std::make_shared(), "number" }})), prefix(std::move(prefix_)) { diff --git a/dbms/src/Processors/tests/processors_test_chain.cpp b/dbms/src/Processors/tests/processors_test_chain.cpp index b6a4f0ad653..d6d10c56285 100644 --- a/dbms/src/Processors/tests/processors_test_chain.cpp +++ b/dbms/src/Processors/tests/processors_test_chain.cpp @@ -76,7 +76,7 @@ class PrintSink : public ISink public: String getName() const override { return "Print"; } - PrintSink(String prefix_) + explicit PrintSink(String prefix_) : ISink(Block({ColumnWithTypeAndName{ ColumnUInt64::create(), std::make_shared(), "number" }})), prefix(std::move(prefix_)) { diff --git a/dbms/src/Processors/tests/processors_test_expand_pipeline.cpp b/dbms/src/Processors/tests/processors_test_expand_pipeline.cpp index 1d03d75c55d..78fa0bee7dd 100644 --- a/dbms/src/Processors/tests/processors_test_expand_pipeline.cpp +++ b/dbms/src/Processors/tests/processors_test_expand_pipeline.cpp @@ -26,7 +26,7 @@ class PrintSink : public ISink public: String getName() const override { return "Print"; } - PrintSink(String prefix_) + explicit PrintSink(String prefix_) : ISink(Block({ColumnWithTypeAndName{ ColumnUInt64::create(), std::make_shared(), "number" }})), prefix(std::move(prefix_)) { @@ -64,7 +64,7 @@ class OneNumberSource : public ISource public: String getName() const override { return "OneNumber"; } - OneNumberSource(UInt64 number_) + explicit OneNumberSource(UInt64 number_) : ISource(Block({ColumnWithTypeAndName{ ColumnUInt64::create(), std::make_shared(), "number" }})), number(number_) { diff --git a/dbms/src/Processors/tests/processors_test_merge.cpp b/dbms/src/Processors/tests/processors_test_merge.cpp index 3842286bc59..226b08ece09 100644 --- a/dbms/src/Processors/tests/processors_test_merge.cpp +++ b/dbms/src/Processors/tests/processors_test_merge.cpp @@ -211,7 +211,7 @@ class PrintSink : public ISink public: String getName() const override { return "Print"; } - PrintSink(String prefix_) + explicit PrintSink(String prefix_) : ISink(Block({ColumnWithTypeAndName{ ColumnUInt64::create(), std::make_shared(), "number" }})), prefix(std::move(prefix_)) { diff --git a/dbms/src/Processors/tests/processors_test_merging_sorted_transform.cpp b/dbms/src/Processors/tests/processors_test_merging_sorted_transform.cpp index af27973e3fd..f148d46bd19 100644 --- a/dbms/src/Processors/tests/processors_test_merging_sorted_transform.cpp +++ b/dbms/src/Processors/tests/processors_test_merging_sorted_transform.cpp @@ -83,7 +83,7 @@ class PrintSink : public ISink public: String getName() const override { return "Print"; } - PrintSink(String prefix_) + explicit PrintSink(String prefix_) : ISink(Block({ColumnWithTypeAndName{ ColumnUInt64::create(), std::make_shared(), "number" }})), prefix(std::move(prefix_)) { diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index f7e9cb80103..adb106205de 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -2034,8 +2034,7 @@ void MergeTreeData::PartsTemporaryRename::addPart(const String & old_name, const { for (Poco::DirectoryIterator it = Poco::DirectoryIterator(full_path + source_dir); it != Poco::DirectoryIterator(); ++it) { - String name = it.name(); - if (name == old_name) + if (it.name() == old_name) { old_part_name_to_full_path[old_name] = full_path; break; @@ -3103,12 +3102,10 @@ MergeTreeData::getDetachedParts() const for (Poco::DirectoryIterator it(path + "detached"); it != Poco::DirectoryIterator(); ++it) { - auto dir_name = it.name(); - res.emplace_back(); auto & part = res.back(); - DetachedPartInfo::tryParseDetachedPartName(dir_name, part, format_version); + DetachedPartInfo::tryParseDetachedPartName(it.name(), part, format_version); part.disk = disk->getName(); } } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index ac073e76217..cffc654ed55 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -494,7 +494,7 @@ public: /// Vertical merge) or a mutation of a single part. During a single stage all rows are read. struct MergeStageProgress { - MergeStageProgress(Float64 weight_) + explicit MergeStageProgress(Float64 weight_) : is_first(true) , weight(weight_) { } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPartCompact.h b/dbms/src/Storages/MergeTree/MergeTreeDataPartCompact.h index d75e5befe36..a8d31795df6 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPartCompact.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPartCompact.h @@ -42,15 +42,15 @@ public: UncompressedCache * uncompressed_cache, MarkCache * mark_cache, const MergeTreeReaderSettings & reader_settings_, - const ValueSizeMap & avg_value_size_hints = ValueSizeMap{}, - const ReadBufferFromFileBase::ProfileCallback & profile_callback = ReadBufferFromFileBase::ProfileCallback{}) const override; + const ValueSizeMap & avg_value_size_hints, + const ReadBufferFromFileBase::ProfileCallback & profile_callback) const override; MergeTreeWriterPtr getWriter( const NamesAndTypesList & columns_list, const std::vector & indices_to_recalc, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & writer_settings, - const MergeTreeIndexGranularity & computed_index_granularity = {}) const override; + const MergeTreeIndexGranularity & computed_index_granularity) const override; bool isStoredOnDisk() const override { return true; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPartWide.h b/dbms/src/Storages/MergeTree/MergeTreeDataPartWide.h index c0c7e45b7ef..bba70aa8c5f 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPartWide.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPartWide.h @@ -34,15 +34,15 @@ public: UncompressedCache * uncompressed_cache, MarkCache * mark_cache, const MergeTreeReaderSettings & reader_settings_, - const ValueSizeMap & avg_value_size_hints = ValueSizeMap{}, - const ReadBufferFromFileBase::ProfileCallback & profile_callback = ReadBufferFromFileBase::ProfileCallback{}) const override; + const ValueSizeMap & avg_value_size_hints, + const ReadBufferFromFileBase::ProfileCallback & profile_callback) const override; MergeTreeWriterPtr getWriter( const NamesAndTypesList & columns_list, const std::vector & indices_to_recalc, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & writer_settings, - const MergeTreeIndexGranularity & computed_index_granularity = {}) const override; + const MergeTreeIndexGranularity & computed_index_granularity) const override; bool isStoredOnDisk() const override { return true; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h b/dbms/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h index 3b405bbf1c5..598a4dd47fb 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h @@ -18,10 +18,10 @@ public: const MergeTreeWriterSettings & settings, const MergeTreeIndexGranularity & index_granularity); - void write(const Block & block, const IColumn::Permutation * permutation = nullptr, - const Block & primary_key_block = {}, const Block & skip_indexes_block = {}) override; + void write(const Block & block, const IColumn::Permutation * permutation, + const Block & primary_key_block, const Block & skip_indexes_block) override; - void finishDataSerialization(IMergeTreeDataPart::Checksums & checksums, bool sync = false) override; + void finishDataSerialization(IMergeTreeDataPart::Checksums & checksums, bool sync) override; private: /// Write single granule of one column (rows between 2 marks) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h b/dbms/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h index d6b01f9c45c..95e43cd31af 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h @@ -21,10 +21,10 @@ public: const MergeTreeWriterSettings & settings, const MergeTreeIndexGranularity & index_granularity); - void write(const Block & block, const IColumn::Permutation * permutation = nullptr, - const Block & primary_key_block = {}, const Block & skip_indexes_block = {}) override; + void write(const Block & block, const IColumn::Permutation * permutation, + const Block & primary_key_block, const Block & skip_indexes_block) override; - void finishDataSerialization(IMergeTreeDataPart::Checksums & checksums, bool sync = false) override; + void finishDataSerialization(IMergeTreeDataPart::Checksums & checksums, bool sync) override; IDataType::OutputStreamGetter createStreamGetter(const String & name, WrittenOffsetColumns & offset_columns); diff --git a/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp b/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp index 5e3999ffbec..160b15e0f6c 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -33,7 +33,7 @@ class LargestPartsWithRequiredSize UInt64 current_size_sum = 0; public: - LargestPartsWithRequiredSize(UInt64 required_sum_size_) : required_size_sum(required_sum_size_) {} + explicit LargestPartsWithRequiredSize(UInt64 required_sum_size_) : required_size_sum(required_sum_size_) {} void add(MergeTreeData::DataPartPtr part) { diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index 244bca37399..4ef8f39d1f7 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -642,7 +642,7 @@ ClusterPtr StorageDistributed::skipUnusedShards(ClusterPtr cluster, const Select if (!block.has(sharding_key_column_name)) throw Exception("sharding_key_expr should evaluate as a single row", ErrorCodes::TOO_MANY_ROWS); - const auto result = block.getByName(sharding_key_column_name); + const ColumnWithTypeAndName & result = block.getByName(sharding_key_column_name); const auto selector = createSelector(cluster, result); shards.insert(selector.begin(), selector.end()); diff --git a/dbms/src/Storages/StorageJoin.cpp b/dbms/src/Storages/StorageJoin.cpp index 4bec5a05205..f47cc9ae886 100644 --- a/dbms/src/Storages/StorageJoin.cpp +++ b/dbms/src/Storages/StorageJoin.cpp @@ -94,7 +94,7 @@ HashJoinPtr StorageJoin::getJoin(std::shared_ptr analyzed_join) co } -void StorageJoin::insertBlock(const Block & block) { join->addJoinedBlock(block); } +void StorageJoin::insertBlock(const Block & block) { join->addJoinedBlock(block, true); } size_t StorageJoin::getSize() const { return join->getTotalRowCount(); } diff --git a/dbms/src/TableFunctions/TableFunctionRemote.cpp b/dbms/src/TableFunctions/TableFunctionRemote.cpp index 802c3a66aa5..aff9d8c7dce 100644 --- a/dbms/src/TableFunctions/TableFunctionRemote.cpp +++ b/dbms/src/TableFunctions/TableFunctionRemote.cpp @@ -155,6 +155,7 @@ StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & ast_function, const C std::vector shards = parseRemoteDescription(cluster_description, 0, cluster_description.size(), ',', max_addresses); std::vector> names; + names.reserve(shards.size()); for (const auto & shard : shards) names.push_back(parseRemoteDescription(shard, 0, shard.size(), '|', max_addresses)); From 4514f89fb37a592dc7a00a48b8122434b8d80a41 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 18 Mar 2020 06:29:25 +0300 Subject: [PATCH 070/115] Fixed style --- .../AggregateFunctionSimpleLinearRegression.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp b/dbms/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp index 46c9402c36e..44631d5832a 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp @@ -56,7 +56,7 @@ AggregateFunctionPtr createAggregateFunctionSimpleLinearRegression( FOR_LEASTSQR_TYPES_2(M, Float64) #define DISPATCH(T1, T2) \ if (which_x.idx == TypeIndex::T1 && which_y.idx == TypeIndex::T2) \ - return std::make_shared>( /* NOLINT */ \ + return std::make_shared>(/* NOLINT */ \ arguments, \ params \ ); From 1ced2550d0925c092c43ea0e30a52a83e5d5f263 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 18 Mar 2020 16:11:38 +0300 Subject: [PATCH 071/115] Fixed clang-tidy check --- dbms/src/Common/SymbolIndex.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Common/SymbolIndex.cpp b/dbms/src/Common/SymbolIndex.cpp index a5c0835fe9c..14c4d627d61 100644 --- a/dbms/src/Common/SymbolIndex.cpp +++ b/dbms/src/Common/SymbolIndex.cpp @@ -168,7 +168,7 @@ void collectSymbolsFromProgramHeaders(dl_phdr_info * info, const ElfW(Sym) * elf_sym = reinterpret_cast(correct_address(info->dlpi_addr, it->d_un.d_ptr)); /* Iterate over the symbol table */ - for (ElfW(Word) sym_index = 0; sym_index < sym_cnt; ++sym_index) + for (ElfW(Word) sym_index = 0; sym_index < ElfW(Word)(sym_cnt); ++sym_index) { /// We are not interested in empty symbols. if (!elf_sym[sym_index].st_size) From 5e72202b5120ca055c86340a9418d51aeee3fbc6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 18 Mar 2020 19:50:08 +0300 Subject: [PATCH 072/115] Fixed clang-tidy check --- dbms/src/Columns/ColumnVector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Columns/ColumnVector.cpp b/dbms/src/Columns/ColumnVector.cpp index a1c86953dc9..c4339548b19 100644 --- a/dbms/src/Columns/ColumnVector.cpp +++ b/dbms/src/Columns/ColumnVector.cpp @@ -118,7 +118,7 @@ void ColumnVector::getPermutation(bool reverse, size_t limit, int nan_directi if (s >= 256 && s <= std::numeric_limits::max()) { PaddedPODArray> pairs(s); - for (UInt32 i = 0; i < s; ++i) + for (UInt32 i = 0; i < UInt32(s); ++i) pairs[i] = {data[i], i}; RadixSort>::executeLSD(pairs.data(), s); From f0ee6055dcc940953b100018e461da834f398b24 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 18 Mar 2020 21:10:02 +0300 Subject: [PATCH 073/115] Add Slack link to README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 919df5a0049..a6f2c29d628 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,7 @@ ClickHouse is an open-source column-oriented database management system that all * [Tutorial](https://clickhouse.tech/docs/en/getting_started/tutorial/) shows how to set up and query small ClickHouse cluster. * [Documentation](https://clickhouse.tech/docs/en/) provides more in-depth information. * [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format. +* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/enQtOTUzMjM4ODQwNTc5LWJmMjE3Yjc2YmI1ZDBlZmI4ZTc3OWY3ZTIwYTljYzY4MzBlODM3YzBjZTc1YmYyODRlZTJkYTgzYzBiNTA2Yjk) and [Telegram](https://telegram.me/clickhouse_en) allow to chat with ClickHouse users in real-time. * [Blog](https://clickhouse.yandex/blog/en/) contains various ClickHouse-related articles, as well as announces and reports about events. * [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any. * You can also [fill this form](https://forms.yandex.com/surveys/meet-yandex-clickhouse-team/) to meet Yandex ClickHouse team in person. From 5983cf03a6d6d70867edaea54e05318a0e424962 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 18 Mar 2020 21:27:33 +0300 Subject: [PATCH 074/115] Removed always built target --- contrib/avro-cmake/CMakeLists.txt | 10 ++-------- contrib/avro-cmake/include/avro | 1 + 2 files changed, 3 insertions(+), 8 deletions(-) create mode 120000 contrib/avro-cmake/include/avro diff --git a/contrib/avro-cmake/CMakeLists.txt b/contrib/avro-cmake/CMakeLists.txt index f544b3c50cd..a4154a331b7 100644 --- a/contrib/avro-cmake/CMakeLists.txt +++ b/contrib/avro-cmake/CMakeLists.txt @@ -1,5 +1,5 @@ set(AVROCPP_ROOT_DIR ${CMAKE_SOURCE_DIR}/contrib/avro/lang/c++) -set(AVROCPP_INCLUDE_DIR ${AVROCPP_ROOT_DIR}/api) +set(AVROCPP_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/contrib/avro-cmake/include) set(AVROCPP_SOURCE_DIR ${AVROCPP_ROOT_DIR}/impl) set (CMAKE_CXX_STANDARD 17) @@ -44,6 +44,7 @@ add_library (avrocpp ${AVROCPP_SOURCE_FILES}) set_target_properties (avrocpp PROPERTIES VERSION ${AVRO_VERSION_MAJOR}.${AVRO_VERSION_MINOR}) target_include_directories(avrocpp SYSTEM PUBLIC ${AVROCPP_INCLUDE_DIR}) +target_include_directories(avrocpp SYSTEM PRIVATE ${AVROCPP_ROOT_DIR}/api) target_include_directories(avrocpp SYSTEM PUBLIC ${Boost_INCLUDE_DIRS}) target_link_libraries (avrocpp ${Boost_IOSTREAMS_LIBRARY}) @@ -61,10 +62,3 @@ elseif (COMPILER_CLANG) endif () target_compile_options(avrocpp PRIVATE ${SUPPRESS_WARNINGS}) - -# create a symlink to include headers with -ADD_CUSTOM_TARGET(avro_symlink_headers ALL - COMMAND ${CMAKE_COMMAND} -E make_directory ${AVROCPP_ROOT_DIR}/include - COMMAND ${CMAKE_COMMAND} -E create_symlink ${AVROCPP_ROOT_DIR}/api ${AVROCPP_ROOT_DIR}/include/avro -) -add_dependencies(avrocpp avro_symlink_headers) \ No newline at end of file diff --git a/contrib/avro-cmake/include/avro b/contrib/avro-cmake/include/avro new file mode 120000 index 00000000000..4d02fd92e3f --- /dev/null +++ b/contrib/avro-cmake/include/avro @@ -0,0 +1 @@ +../../avro/lang/c++/api \ No newline at end of file From 40649ed0bbe171b03890e0a8b5ae797056538cf2 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 18 Mar 2020 21:43:51 +0300 Subject: [PATCH 075/115] remove extra space (#9736) --- docs/en/data_types/array.md | 2 +- docs/en/data_types/datetime.md | 2 +- docs/en/data_types/datetime64.md | 2 +- docs/en/data_types/float.md | 2 +- docs/en/data_types/index.md | 2 +- .../aggregatefunction.md | 2 +- docs/en/data_types/nullable.md | 2 +- .../data_types/special_data_types/interval.md | 4 +- docs/en/data_types/special_data_types/set.md | 3 +- docs/en/data_types/uuid.md | 2 +- docs/en/faq/general.md | 4 +- docs/en/getting_started/install.md | 4 +- docs/en/guides/apply_catboost_model.md | 12 +- docs/en/interfaces/cli.md | 8 +- docs/en/interfaces/formats.md | 70 ++++---- docs/en/interfaces/http.md | 4 +- docs/en/interfaces/index.md | 2 +- docs/en/interfaces/mysql.md | 2 +- docs/en/interfaces/third-party/gui.md | 2 +- docs/en/operations/configuration_files.md | 2 +- docs/en/operations/quotas.md | 2 +- docs/en/operations/server_settings/index.md | 2 +- .../en/operations/server_settings/settings.md | 64 ++++---- docs/en/operations/settings/index.md | 2 +- .../settings/permissions_for_queries.md | 6 +- .../operations/settings/query_complexity.md | 14 +- docs/en/operations/settings/settings.md | 154 +++++++++--------- docs/en/operations/system_tables.md | 34 ++-- .../table_engines/collapsingmergetree.md | 6 +- docs/en/operations/table_engines/file.md | 2 +- docs/en/operations/table_engines/generate.md | 2 +- .../table_engines/graphitemergetree.md | 12 +- docs/en/operations/table_engines/hdfs.md | 2 +- docs/en/operations/table_engines/index.md | 4 +- docs/en/operations/table_engines/jdbc.md | 2 +- docs/en/operations/table_engines/join.md | 4 +- docs/en/operations/table_engines/kafka.md | 2 +- docs/en/operations/table_engines/mergetree.md | 22 +-- docs/en/operations/table_engines/odbc.md | 2 +- .../operations/table_engines/replication.md | 4 +- docs/en/operations/table_engines/stripelog.md | 8 +- .../table_engines/summingmergetree.md | 2 +- docs/en/operations/table_engines/url.md | 2 +- .../versionedcollapsingmergetree.md | 4 +- docs/en/operations/table_engines/view.md | 2 +- docs/en/operations/troubleshooting.md | 8 +- .../operations/utils/clickhouse-benchmark.md | 8 +- .../agg_functions/combinators.md | 20 +-- docs/en/query_language/agg_functions/index.md | 2 +- .../agg_functions/parametric_functions.md | 10 +- .../query_language/agg_functions/reference.md | 72 ++++---- docs/en/query_language/alter.md | 42 ++--- docs/en/query_language/create.md | 16 +- .../en/query_language/dicts/external_dicts.md | 4 +- .../dicts/external_dicts_dict.md | 2 +- .../dicts/external_dicts_dict_layout.md | 6 +- .../dicts/external_dicts_dict_sources.md | 18 +- .../dicts/external_dicts_dict_structure.md | 6 +- .../en/query_language/dicts/internal_dicts.md | 2 +- .../functions/arithmetic_functions.md | 2 +- .../functions/array_functions.md | 34 ++-- .../en/query_language/functions/array_join.md | 2 +- .../query_language/functions/bit_functions.md | 6 +- .../functions/bitmap_functions.md | 8 +- .../functions/comparison_functions.md | 12 +- .../functions/conditional_functions.md | 4 +- .../functions/date_time_functions.md | 6 +- .../functions/encoding_functions.md | 4 +- .../functions/ext_dict_functions.md | 6 +- docs/en/query_language/functions/geo.md | 2 +- .../functions/hash_functions.md | 16 +- .../functions/higher_order_functions.md | 20 +-- .../query_language/functions/in_functions.md | 2 +- .../query_language/functions/introspection.md | 6 +- .../functions/machine_learning_functions.md | 2 +- .../functions/other_functions.md | 36 ++-- .../functions/rounding_functions.md | 4 +- .../functions/string_functions.md | 20 +-- .../functions/string_search_functions.md | 20 +-- .../functions/type_conversion_functions.md | 10 +- .../functions/uuid_functions.md | 2 +- docs/en/query_language/insert_into.md | 2 +- docs/en/query_language/misc.md | 10 +- docs/en/query_language/operators.md | 10 +- docs/en/query_language/select.md | 40 ++--- docs/en/query_language/show.md | 2 +- docs/en/query_language/syntax.md | 12 +- docs/en/query_language/system.md | 30 ++-- .../en/query_language/table_functions/jdbc.md | 2 +- .../en/query_language/table_functions/odbc.md | 2 +- website/images/clickhouse-black.svg | 2 +- 91 files changed, 522 insertions(+), 523 deletions(-) diff --git a/docs/en/data_types/array.md b/docs/en/data_types/array.md index 4c9eef2cdfe..d23fe60c327 100644 --- a/docs/en/data_types/array.md +++ b/docs/en/data_types/array.md @@ -1,4 +1,4 @@ -# Array(T) { #data_type-array} +# Array(T) {#data_type-array} Array of `T`-type items. diff --git a/docs/en/data_types/datetime.md b/docs/en/data_types/datetime.md index 947b481d166..fded3caa4e5 100644 --- a/docs/en/data_types/datetime.md +++ b/docs/en/data_types/datetime.md @@ -1,4 +1,4 @@ -# DateTime { #data_type-datetime} +# DateTime {#data_type-datetime} Allows to store an instant in time, that can be expressed as a calendar date and a time of a day. diff --git a/docs/en/data_types/datetime64.md b/docs/en/data_types/datetime64.md index f060ba9d83c..90b59b0fc97 100644 --- a/docs/en/data_types/datetime64.md +++ b/docs/en/data_types/datetime64.md @@ -1,4 +1,4 @@ -# DateTime64 { #data_type-datetime64} +# DateTime64 {#data_type-datetime64} Allows to store an instant in time, that can be expressed as a calendar date and a time of a day, with defined sub-second precision diff --git a/docs/en/data_types/float.md b/docs/en/data_types/float.md index c184bf6bfe8..b9f2525e36d 100644 --- a/docs/en/data_types/float.md +++ b/docs/en/data_types/float.md @@ -27,7 +27,7 @@ SELECT 1 - 0.9 - Floating-point calculations might result in numbers such as infinity (`Inf`) and "not-a-number" (`NaN`). This should be taken into account when processing the results of calculations. - When parsing floating-point numbers from text, the result might not be the nearest machine-representable number. -## NaN and Inf { #data_type-float-nan-inf} +## NaN and Inf {#data_type-float-nan-inf} In contrast to standard SQL, ClickHouse supports the following categories of floating-point numbers: diff --git a/docs/en/data_types/index.md b/docs/en/data_types/index.md index 4f0a57959ab..095af244b55 100644 --- a/docs/en/data_types/index.md +++ b/docs/en/data_types/index.md @@ -1,4 +1,4 @@ -# Data Types { #data_types} +# Data Types {#data_types} ClickHouse can store various kinds of data in table cells. diff --git a/docs/en/data_types/nested_data_structures/aggregatefunction.md b/docs/en/data_types/nested_data_structures/aggregatefunction.md index 36b18167164..6304327fab8 100644 --- a/docs/en/data_types/nested_data_structures/aggregatefunction.md +++ b/docs/en/data_types/nested_data_structures/aggregatefunction.md @@ -1,4 +1,4 @@ -# AggregateFunction(name, types_of_arguments...) { #data_type-aggregatefunction} +# AggregateFunction(name, types_of_arguments...) {#data_type-aggregatefunction} The intermediate state of an aggregate function. To get it, use aggregate functions with the `-State` suffix. To get aggregated data in the future, you must use the same aggregate functions with the `-Merge`suffix. diff --git a/docs/en/data_types/nullable.md b/docs/en/data_types/nullable.md index a94967e92eb..e328a3ca024 100644 --- a/docs/en/data_types/nullable.md +++ b/docs/en/data_types/nullable.md @@ -1,4 +1,4 @@ -# Nullable(TypeName) { #data_type-nullable} +# Nullable(TypeName) {#data_type-nullable} Allows to store special marker ([NULL](../query_language/syntax.md)) that denotes "missing value" alongside normal values allowed by `TypeName`. For example, a `Nullable(Int8)` type column can store `Int8` type values, and the rows that don't have a value will store `NULL`. diff --git a/docs/en/data_types/special_data_types/interval.md b/docs/en/data_types/special_data_types/interval.md index 805a5e78b85..436a7196e49 100644 --- a/docs/en/data_types/special_data_types/interval.md +++ b/docs/en/data_types/special_data_types/interval.md @@ -1,4 +1,4 @@ -# Interval { #data-type-interval} +# Interval {#data-type-interval} The family of data types representing time and date intervals. The resulting types of the [INTERVAL](../../query_language/operators.md#operator-interval) operator. @@ -32,7 +32,7 @@ SELECT toTypeName(INTERVAL 4 DAY) └──────────────────────────────┘ ``` -## Usage Remarks { #data-type-interval-usage-remarks} +## Usage Remarks {#data-type-interval-usage-remarks} You can use `Interval`-type values in arithmetical operations with [Date](../../data_types/date.md) and [DateTime](../../data_types/datetime.md)-type values. For example, you can add 4 days to the current time: diff --git a/docs/en/data_types/special_data_types/set.md b/docs/en/data_types/special_data_types/set.md index 2311e55df8f..f8679f9e1b8 100644 --- a/docs/en/data_types/special_data_types/set.md +++ b/docs/en/data_types/special_data_types/set.md @@ -1,6 +1,5 @@ # Set -Used for the right half of an [IN](../../query_language/select.md##select-in-operators) expression. - +Used for the right half of an [IN](../../query_language/select.md#select-in-operators) expression. [Original article](https://clickhouse.tech/docs/en/data_types/special_data_types/set/) diff --git a/docs/en/data_types/uuid.md b/docs/en/data_types/uuid.md index 8aea8b51ace..c5ace976ef9 100644 --- a/docs/en/data_types/uuid.md +++ b/docs/en/data_types/uuid.md @@ -1,4 +1,4 @@ -# UUID { #uuid-data-type} +# UUID {#uuid-data-type} A universally unique identifier (UUID) is a 16-byte number used to identify records. For detailed information about the UUID, see [Wikipedia](https://en.wikipedia.org/wiki/Universally_unique_identifier). diff --git a/docs/en/faq/general.md b/docs/en/faq/general.md index 01735c35041..fb753026812 100644 --- a/docs/en/faq/general.md +++ b/docs/en/faq/general.md @@ -11,7 +11,7 @@ Distributed sorting is one of the main causes of reduced performance when runnin Most MapReduce implementations allow you to execute arbitrary code on a cluster. But a declarative query language is better suited to OLAP in order to run experiments quickly. For example, Hadoop has Hive and Pig. Also consider Cloudera Impala or Shark (outdated) for Spark, as well as Spark SQL, Presto, and Apache Drill. Performance when running such tasks is highly sub-optimal compared to specialized systems, but relatively high latency makes it unrealistic to use these systems as the backend for a web interface. -## What If I Have a Problem with Encodings When Using Oracle Through ODBC? { #oracle-odbc-encodings} +## What If I Have a Problem with Encodings When Using Oracle Through ODBC? {#oracle-odbc-encodings} If you use Oracle through the ODBC driver as a source of external dictionaries, you need to set the correct value for the `NLS_LANG` environment variable in `/etc/default/clickhouse`. For more information, see the [Oracle NLS_LANG FAQ](https://www.oracle.com/technetwork/products/globalization/nls-lang-099431.html). @@ -21,7 +21,7 @@ If you use Oracle through the ODBC driver as a source of external dictionaries, NLS_LANG=RUSSIAN_RUSSIA.UTF8 ``` -## How Do I Export Data from ClickHouse to a File? { #how-to-export-to-file} +## How Do I Export Data from ClickHouse to a File? {#how-to-export-to-file} ### Using INTO OUTFILE Clause diff --git a/docs/en/getting_started/install.md b/docs/en/getting_started/install.md index e62528e14c2..9bcff1cbeab 100644 --- a/docs/en/getting_started/install.md +++ b/docs/en/getting_started/install.md @@ -14,7 +14,7 @@ To run ClickHouse on processors that do not support SSE 4.2 or have AArch64 or P ## Available Installation Options -### From DEB Packages { #install-from-deb-packages} +### From DEB Packages {#install-from-deb-packages} It is recommended to use official pre-compiled `deb` packages for Debian or Ubuntu. @@ -66,7 +66,7 @@ sudo yum install clickhouse-server clickhouse-client You can also download and install packages manually from here: . -### From tgz archives { #from-tgz-archives} +### From tgz archives {#from-tgz-archives} It is recommended to use official pre-compiled `tgz` archives for all Linux distributions, where installation of `deb` or `rpm` packages is not possible. diff --git a/docs/en/guides/apply_catboost_model.md b/docs/en/guides/apply_catboost_model.md index 9ab314e0398..a9d8707f5ca 100644 --- a/docs/en/guides/apply_catboost_model.md +++ b/docs/en/guides/apply_catboost_model.md @@ -1,4 +1,4 @@ -# Applying a Catboost Model in ClickHouse { #applying-catboost-model-in-clickhouse} +# Applying a Catboost Model in ClickHouse {#applying-catboost-model-in-clickhouse} [CatBoost](https://catboost.ai) is a free and open-source gradient boosting library developed at [Yandex](https://yandex.com/company/) for machine learning. @@ -13,7 +13,7 @@ To apply a CatBoost model in ClickHouse: For more information about training CatBoost models, see [Training and applying models](https://catboost.ai/docs/features/training.html#training). -## Prerequisites { #prerequisites} +## Prerequisites {#prerequisites} If you don't have the [Docker](https://docs.docker.com/install/) yet, install it. @@ -44,7 +44,7 @@ yandex/tutorial-catboost-clickhouse latest 622e4d17945b 22 $ docker run -it -p 8888:8888 yandex/tutorial-catboost-clickhouse ``` -## 1. Create a Table { #create-table} +## 1. Create a Table {#create-table} To create a ClickHouse table for the train sample: @@ -83,7 +83,7 @@ ENGINE = MergeTree ORDER BY date :) exit ``` -## 2. Insert the Data to the Table { #insert-data-to-table} +## 2. Insert the Data to the Table {#insert-data-to-table} To insert the data: @@ -112,7 +112,7 @@ FROM amazon_train +---------+ ``` -## 3. Integrate CatBoost into ClickHouse { #integrate-catboost-into-clickhouse} +## 3. Integrate CatBoost into ClickHouse {#integrate-catboost-into-clickhouse} !!! note "Note" **Optional step.** The Docker image contains everything you need to run CatBoost and ClickHouse. @@ -154,7 +154,7 @@ The fastest way to evaluate a CatBoost model is compile `libcatboostmodel./home/catboost/models/*_model.xml ``` -## 4. Run the Model Inference from SQL { #run-model-inference} +## 4. Run the Model Inference from SQL {#run-model-inference} For test model run the ClickHouse client `$ clickhouse client`. diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index 7477e81cd76..48965d11062 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -19,7 +19,7 @@ Different client and server versions are compatible with one another, but some f ClickHouse client version is older than ClickHouse server. It may lack support for new features. ``` -## Usage { #cli_usage} +## Usage {#cli_usage} The client can be used in interactive and non-interactive (batch) mode. To use batch mode, specify the 'query' parameter, or send data to 'stdin' (it verifies that 'stdin' is not a terminal), or both. @@ -71,7 +71,7 @@ You can cancel a long query by pressing Ctrl+C. However, you will still need to The command-line client allows passing external data (external temporary tables) for querying. For more information, see the section "External data for query processing". -### Queries with Parameters { #cli-queries-with-parameters} +### Queries with Parameters {#cli-queries-with-parameters} You can create a query with parameters and pass values to them from client application. This allows to avoid formatting query with specific dynamic values on client side. For example: @@ -79,7 +79,7 @@ You can create a query with parameters and pass values to them from client appli $ clickhouse-client --param_parName="[1, 2]" -q "SELECT * FROM table WHERE a = {parName:Array(UInt16)}" ``` -#### Query Syntax { #cli-queries-with-parameters-syntax} +#### Query Syntax {#cli-queries-with-parameters-syntax} Format a query as usual, then place the values that you want to pass from the app parameters to the query in braces in the following format: @@ -96,7 +96,7 @@ Format a query as usual, then place the values that you want to pass from the ap $ clickhouse-client --param_tuple_in_tuple="(10, ('dt', 10))" -q "SELECT * FROM table WHERE val = {tuple_in_tuple:Tuple(UInt8, Tuple(String, UInt8))}" ``` -## Configuring { #interfaces_cli_configuration} +## Configuring {#interfaces_cli_configuration} You can pass parameters to `clickhouse-client` (all parameters have a default value) using: diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index b6e768513d7..a6deb4ccb02 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1,4 +1,4 @@ -# Formats for Input and Output Data { #formats} +# Formats for Input and Output Data {#formats} ClickHouse can accept and return data in various formats. A format supported for input can be used to parse the data provided to `INSERT`s, to perform `SELECT`s from a file-backed table such as File, URL or HDFS, or to read an external dictionary. A format supported for output can be used to arrange the results of a `SELECT`, and to perform `INSERT`s into a file-backed table. @@ -42,7 +42,7 @@ The supported formats are: You can control some format processing parameters with the ClickHouse settings. For more information read the [Settings](../operations/settings/settings.md) section. -## TabSeparated { #tabseparated} +## TabSeparated {#tabseparated} In TabSeparated format, data is written by row. Each row contains values separated by tabs. Each value is follow by a tab, except the last value in the row, which is followed by a line feed. Strictly Unix line feeds are assumed everywhere. The last row also must contain a line feed at the end. Values are written in text format, without enclosing quotation marks, and with special characters escaped. @@ -130,14 +130,14 @@ SELECT * FROM nestedt FORMAT TSV 1 [1] ['a'] ``` -## TabSeparatedRaw { #tabseparatedraw} +## TabSeparatedRaw {#tabseparatedraw} Differs from `TabSeparated` format in that the rows are written without escaping. This format is only appropriate for outputting a query result, but not for parsing (retrieving data to insert in a table). This format is also available under the name `TSVRaw`. -## TabSeparatedWithNames { #tabseparatedwithnames} +## TabSeparatedWithNames {#tabseparatedwithnames} Differs from the `TabSeparated` format in that the column names are written in the first row. During parsing, the first row is completely ignored. You can't use column names to determine their position or to check their correctness. @@ -145,14 +145,14 @@ During parsing, the first row is completely ignored. You can't use column names This format is also available under the name `TSVWithNames`. -## TabSeparatedWithNamesAndTypes { #tabseparatedwithnamesandtypes} +## TabSeparatedWithNamesAndTypes {#tabseparatedwithnamesandtypes} Differs from the `TabSeparated` format in that the column names are written to the first row, while the column types are in the second row. During parsing, the first and second rows are completely ignored. This format is also available under the name `TSVWithNamesAndTypes`. -## Template { #format-template} +## Template {#format-template} This format allows to specify a custom format string with placeholders for values with specified escaping rule. @@ -268,7 +268,7 @@ Page views: ${PageViews:CSV}, User id: ${UserID:CSV}, Useless field: ${:CSV}, Du `PageViews`, `UserID`, `Duration` and `Sign` inside placeholders are names of columns in the table. Values after `Useless field` in rows and after `\nTotal rows: ` in suffix will be ignored. All delimiters in the input data must be strictly equal to delimiters in specified format strings. -## TemplateIgnoreSpaces { #templateignorespaces} +## TemplateIgnoreSpaces {#templateignorespaces} This format is suitable only for input. Similar to `Template`, but skips whitespace characters between delimiters and values in the input stream. However, if format strings contain whitespace characters, these characters will be expected in the input stream. Also allows to specify empty placeholders (`${}` or `${:None}`) to split some delimiter into separate parts to ignore spaces between them. Such placeholders are used only for skipping whitespace characters. @@ -286,7 +286,7 @@ format_template_resultset = '/some/path/resultset.format', format_template_row = {${}"SearchPhrase"${}:${}${phrase:JSON}${},${}"c"${}:${}${cnt:JSON}${}} ``` -## TSKV { #tskv} +## TSKV {#tskv} Similar to TabSeparated, but outputs a value in name=value format. Names are escaped the same way as in TabSeparated format, and the = symbol is also escaped. @@ -319,7 +319,7 @@ Both data output and parsing are supported in this format. For parsing, any orde Parsing allows the presence of the additional field `tskv` without the equal sign or a value. This field is ignored. -## CSV { #csv} +## CSV {#csv} Comma Separated Values format ([RFC](https://tools.ietf.org/html/rfc4180)). @@ -345,12 +345,12 @@ The CSV format supports the output of totals and extremes the same way as `TabSe Also prints the header row, similar to `TabSeparatedWithNames`. -## CustomSeparated { #format-customseparated} +## CustomSeparated {#format-customseparated} Similar to [Template](#format-template), but it prints or reads all columns and uses escaping rule from setting `format_custom_escaping_rule` and delimiters from settings `format_custom_field_delimiter`, `format_custom_row_before_delimiter`, `format_custom_row_after_delimiter`, `format_custom_row_between_delimiter`, `format_custom_result_before_delimiter` and `format_custom_result_after_delimiter`, not from format strings. There is also `CustomSeparatedIgnoreSpaces` format, which is similar to `TemplateIgnoreSpaces`. -## JSON { #json} +## JSON {#json} Outputs data in JSON format. Besides data tables, it also outputs column names and types, along with some additional information: the total number of output rows, and the number of rows that could have been output if there weren't a LIMIT. Example: @@ -439,7 +439,7 @@ ClickHouse supports [NULL](../query_language/syntax.md), which is displayed as ` See also the [JSONEachRow](#jsoneachrow) format. -## JSONCompact { #jsoncompact} +## JSONCompact {#jsoncompact} Differs from JSON only in that data rows are output in arrays, not in objects. @@ -485,7 +485,7 @@ Example: This format is only appropriate for outputting a query result, but not for parsing (retrieving data to insert in a table). See also the `JSONEachRow` format. -## JSONEachRow { #jsoneachrow} +## JSONEachRow {#jsoneachrow} When using this format, ClickHouse outputs rows as separated, newline-delimited JSON objects, but the data as a whole is not valid JSON. @@ -555,7 +555,7 @@ Unlike the [JSON](#json) format, there is no substitution of invalid UTF-8 seque !!! note "Note" Any set of bytes can be output in the strings. Use the `JSONEachRow` format if you are sure that the data in the table can be formatted as JSON without losing any information. -### Usage of Nested Structures { #jsoneachrow-nested} +### Usage of Nested Structures {#jsoneachrow-nested} If you have a table with [Nested](../data_types/nested_data_structures/nested.md) data type columns, you can insert JSON data with the same structure. Enable this feature with the [input_format_import_nested_json](../operations/settings/settings.md#settings-input_format_import_nested_json) setting. @@ -609,18 +609,18 @@ SELECT * FROM json_each_row_nested └───────────────┴────────┘ ``` -## Native { #native} +## Native {#native} The most efficient format. Data is written and read by blocks in binary format. For each block, the number of rows, number of columns, column names and types, and parts of columns in this block are recorded one after another. In other words, this format is "columnar" – it doesn't convert columns to rows. This is the format used in the native interface for interaction between servers, for using the command-line client, and for C++ clients. You can use this format to quickly generate dumps that can only be read by the ClickHouse DBMS. It doesn't make sense to work with this format yourself. -## Null { #null} +## Null {#null} Nothing is output. However, the query is processed, and when using the command-line client, data is transmitted to the client. This is used for tests, including productivity testing. Obviously, this format is only appropriate for output, not for parsing. -## Pretty { #pretty} +## Pretty {#pretty} Outputs data as Unicode-art tables, also using ANSI-escape sequences for setting colors in the terminal. A full grid of the table is drawn, and each row occupies two lines in the terminal. @@ -684,16 +684,16 @@ Extremes: └────────────┴─────────┘ ``` -## PrettyCompact { #prettycompact} +## PrettyCompact {#prettycompact} Differs from [Pretty](#pretty) in that the grid is drawn between rows and the result is more compact. This format is used by default in the command-line client in interactive mode. -## PrettyCompactMonoBlock { #prettycompactmonoblock} +## PrettyCompactMonoBlock {#prettycompactmonoblock} Differs from [PrettyCompact](#prettycompact) in that up to 10,000 rows are buffered, then output as a single table, not by blocks. -## PrettyNoEscapes { #prettynoescapes} +## PrettyNoEscapes {#prettynoescapes} Differs from Pretty in that ANSI-escape sequences aren't used. This is necessary for displaying this format in a browser, as well as for using the 'watch' command-line utility. @@ -713,11 +713,11 @@ The same as the previous setting. The same as the previous setting. -## PrettySpace { #prettyspace} +## PrettySpace {#prettyspace} Differs from [PrettyCompact](#prettycompact) in that whitespace (space characters) is used instead of the grid. -## RowBinary { #rowbinary} +## RowBinary {#rowbinary} Formats and parses data by row in binary format. Rows and values are listed consecutively, without separators. This format is less efficient than the Native format, since it is row-based. @@ -732,7 +732,7 @@ Array is represented as a varint length (unsigned [LEB128](https://en.wikipedia. For [NULL](../query_language/syntax.md#null-literal) support, an additional byte containing 1 or 0 is added before each [Nullable](../data_types/nullable.md) value. If 1, then the value is `NULL` and this byte is interpreted as a separate value. If 0, the value after the byte is not `NULL`. -## RowBinaryWithNamesAndTypes { #rowbinarywithnamesandtypes} +## RowBinaryWithNamesAndTypes {#rowbinarywithnamesandtypes} Similar to [RowBinary](#rowbinary), but with added header: @@ -740,7 +740,7 @@ Similar to [RowBinary](#rowbinary), but with added header: * N `String`s specifying column names * N `String`s specifying column types -## Values { #data-format-values} +## Values {#data-format-values} Prints every row in brackets. Rows are separated by commas. There is no comma after the last row. The values inside the brackets are also comma-separated. Numbers are output in decimal format without quotes. Arrays are output in square brackets. Strings, dates, and dates with times are output in quotes. Escaping rules and parsing are similar to the [TabSeparated](#tabseparated) format. During formatting, extra spaces aren't inserted, but during parsing, they are allowed and skipped (except for spaces inside array values, which are not allowed). [NULL](../query_language/syntax.md) is represented as `NULL`. @@ -750,7 +750,7 @@ This is the format that is used in `INSERT INTO t VALUES ...`, but you can also See also: [input_format_values_interpret_expressions](../operations/settings/settings.md#settings-input_format_values_interpret_expressions) and [input_format_values_deduce_templates_of_expressions](../operations/settings/settings.md#settings-input_format_values_deduce_templates_of_expressions) settings. -## Vertical { #vertical} +## Vertical {#vertical} Prints each value on a separate line with the column name specified. This format is convenient for printing just one or a few rows, if each row consists of a large number of columns. @@ -783,11 +783,11 @@ test: string with 'quotes' and with some special This format is only appropriate for outputting a query result, but not for parsing (retrieving data to insert in a table). -## VerticalRaw { #verticalraw} +## VerticalRaw {#verticalraw} Similar to [Vertical](#vertical), but with escaping disabled. This format is only suitable for outputting query results, not for parsing (receiving data and inserting it in the table). -## XML { #xml} +## XML {#xml} XML format is suitable only for output, not for parsing. Example: @@ -860,7 +860,7 @@ In string values, the characters `<` and `&` are escaped as `<` and `&`. Arrays are output as `HelloWorld...`,and tuples as `HelloWorld...`. -## CapnProto { #capnproto} +## CapnProto {#capnproto} Cap'n Proto is a binary message format similar to Protocol Buffers and Thrift, but not like JSON or MessagePack. @@ -883,7 +883,7 @@ Deserialization is effective and usually doesn't increase the system load. See also [Format Schema](#formatschema). -## Protobuf { #protobuf} +## Protobuf {#protobuf} Protobuf - is a [Protocol Buffers](https://developers.google.com/protocol-buffers/) format. @@ -950,7 +950,7 @@ ClickHouse inputs and outputs protobuf messages in the `length-delimited` format It means before every message should be written its length as a [varint](https://developers.google.com/protocol-buffers/docs/encoding#varints). See also [how to read/write length-delimited protobuf messages in popular languages](https://cwiki.apache.org/confluence/display/GEODE/Delimiting+Protobuf+Messages). -## Avro { #data-format-avro} +## Avro {#data-format-avro} [Apache Avro](http://avro.apache.org/) is a row-oriented data serialization framework developed within Apache's Hadoop project. @@ -1014,7 +1014,7 @@ Column names must: Output Avro file compression and sync interval can be configured with [output_format_avro_codec](../operations/settings/settings.md#settings-output_format_avro_codec) and [output_format_avro_sync_interval](../operations/settings/settings.md#settings-output_format_avro_sync_interval) respectively. -## AvroConfluent { #data-format-avro-confluent} +## AvroConfluent {#data-format-avro-confluent} AvroConfluent supports decoding single-object Avro messages commonly used with [Kafka](https://kafka.apache.org/) and [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html). @@ -1062,7 +1062,7 @@ SELECT * FROM topic1_stream; Setting `format_avro_schema_registry_url` needs to be configured in `users.xml` to maintain it's value after a restart. -## Parquet { #data-format-parquet} +## Parquet {#data-format-parquet} [Apache Parquet](http://parquet.apache.org/) is a columnar storage format widespread in the Hadoop ecosystem. ClickHouse supports read and write operations for this format. @@ -1110,7 +1110,7 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_ To exchange data with Hadoop, you can use [HDFS table engine](../operations/table_engines/hdfs.md). -## ORC { #data-format-orc} +## ORC {#data-format-orc} [Apache ORC](https://orc.apache.org/) is a columnar storage format widespread in the Hadoop ecosystem. You can only insert data in this format to ClickHouse. @@ -1151,7 +1151,7 @@ $ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT OR To exchange data with Hadoop, you can use [HDFS table engine](../operations/table_engines/hdfs.md). -## Format Schema { #formatschema} +## Format Schema {#formatschema} The file name containing the format schema is set by the setting `format_schema`. It's required to set this setting when it is used one of the formats `Cap'n Proto` and `Protobuf`. @@ -1170,7 +1170,7 @@ in the server configuration. [Original article](https://clickhouse.tech/docs/en/interfaces/formats/) -## Skipping Errors { #skippingerrors} +## Skipping Errors {#skippingerrors} Some formats such as `CSV`, `TabSeparated`, `TSKV`, `JSONEachRow`, `Template`, `CustomSeparated` and `Protobuf` can skip broken row if parsing error occurred and continue parsing from the beginning of next row. See [input_format_allow_errors_num](../operations/settings/settings.md#settings-input_format_allow_errors_num) and [input_format_allow_errors_ratio](../operations/settings/settings.md#settings-input_format_allow_errors_ratio) settings. diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md index 2e4a08675cc..0ce700bdc54 100644 --- a/docs/en/interfaces/http.md +++ b/docs/en/interfaces/http.md @@ -1,4 +1,4 @@ -# HTTP Interface { #http_interface} +# HTTP Interface {#http_interface} The HTTP interface lets you use ClickHouse on any platform from any programming language. We use it for working from Java and Perl, as well as shell scripts. In other departments, the HTTP interface is used from Perl, Python, and Go. The HTTP interface is more limited than the native interface, but it has better compatibility. @@ -261,7 +261,7 @@ $ curl -sS 'http://localhost:8123/?max_result_bytes=4000000&buffer_size=3000000& Use buffering to avoid situations where a query processing error occurred after the response code and HTTP headers were sent to the client. In this situation, an error message is written at the end of the response body, and on the client side, the error can only be detected at the parsing stage. -### Queries with Parameters { #cli-queries-with-parameters} +### Queries with Parameters {#cli-queries-with-parameters} You can create a query with parameters and pass values for them from the corresponding HTTP request parameters. For more information, see [Queries with Parameters for CLI](cli.md#cli-queries-with-parameters). diff --git a/docs/en/interfaces/index.md b/docs/en/interfaces/index.md index a52f0ebb750..9af7c9863dc 100644 --- a/docs/en/interfaces/index.md +++ b/docs/en/interfaces/index.md @@ -1,4 +1,4 @@ -# Interfaces { #interfaces} +# Interfaces {#interfaces} ClickHouse provides two network interfaces (both can be optionally wrapped in TLS for additional security): diff --git a/docs/en/interfaces/mysql.md b/docs/en/interfaces/mysql.md index 660bb80a50c..454cdb9160d 100644 --- a/docs/en/interfaces/mysql.md +++ b/docs/en/interfaces/mysql.md @@ -1,4 +1,4 @@ -# MySQL interface { #mysql_interface} +# MySQL interface {#mysql_interface} ClickHouse supports MySQL wire protocol. It can be enabled by [mysql_port](../operations/server_settings/settings.md#server_settings-mysql_port) setting in configuration file: ```xml diff --git a/docs/en/interfaces/third-party/gui.md b/docs/en/interfaces/third-party/gui.md index d3208ad07e4..a01f524f3c7 100644 --- a/docs/en/interfaces/third-party/gui.md +++ b/docs/en/interfaces/third-party/gui.md @@ -101,7 +101,7 @@ Features: - Refactorings. - Search and Navigation. -### Yandex DataLens { #yandex-datalens} +### Yandex DataLens {#yandex-datalens} [Yandex DataLens](https://cloud.yandex.ru/services/datalens) is a service of data visualization and analytics. diff --git a/docs/en/operations/configuration_files.md b/docs/en/operations/configuration_files.md index 6723ef79e1a..69204506c2d 100644 --- a/docs/en/operations/configuration_files.md +++ b/docs/en/operations/configuration_files.md @@ -1,4 +1,4 @@ -# Configuration Files { #configuration_files} +# Configuration Files {#configuration_files} ClickHouse supports multi-file configuration management. The main server configuration file is `/etc/clickhouse-server/config.xml`. Other files must be in the `/etc/clickhouse-server/config.d` directory. diff --git a/docs/en/operations/quotas.md b/docs/en/operations/quotas.md index e6e1fb03cc7..608e7f39034 100644 --- a/docs/en/operations/quotas.md +++ b/docs/en/operations/quotas.md @@ -1,4 +1,4 @@ -# Quotas { #quotas} +# Quotas {#quotas} Quotas allow you to limit resource usage over a period of time, or simply track the use of resources. Quotas are set up in the user config. This is usually 'users.xml'. diff --git a/docs/en/operations/server_settings/index.md b/docs/en/operations/server_settings/index.md index 472fc41c3e6..39f3a5bb6d4 100644 --- a/docs/en/operations/server_settings/index.md +++ b/docs/en/operations/server_settings/index.md @@ -1,4 +1,4 @@ -# Server configuration parameters { #server_settings} +# Server configuration parameters {#server_settings} This section contains descriptions of server settings that cannot be changed at the session or query level. diff --git a/docs/en/operations/server_settings/settings.md b/docs/en/operations/server_settings/settings.md index 1b53e8af162..1e48b374711 100644 --- a/docs/en/operations/server_settings/settings.md +++ b/docs/en/operations/server_settings/settings.md @@ -16,7 +16,7 @@ Default value: 3600. ``` -## compression { #server-settings-compression} +## compression {#server-settings-compression} Data compression settings for [MergeTree](../table_engines/mergetree.md)-engine tables. @@ -90,7 +90,7 @@ Settings profiles are located in the file specified in the parameter `user_confi ``` -## dictionaries_config { #server_settings-dictionaries_config} +## dictionaries_config {#server_settings-dictionaries_config} The path to the config file for external dictionaries. @@ -108,7 +108,7 @@ See also "[External dictionaries](../../query_language/dicts/external_dicts.md)" ``` -## dictionaries_lazy_load { #server_settings-dictionaries_lazy_load} +## dictionaries_lazy_load {#server_settings-dictionaries_lazy_load} Lazy loading of dictionaries. @@ -125,7 +125,7 @@ The default is `true`. ``` -## format_schema_path { #server_settings-format_schema_path} +## format_schema_path {#server_settings-format_schema_path} The path to the directory with the schemes for the input data, such as schemas for the [CapnProto](../../interfaces/formats.md#capnproto) format. @@ -136,7 +136,7 @@ The path to the directory with the schemes for the input data, such as schemas f format_schemas/ ``` -## graphite { #server_settings-graphite} +## graphite {#server_settings-graphite} Sending data to [Graphite](https://github.com/graphite-project). @@ -171,7 +171,7 @@ You can configure multiple `` clauses. For instance, you can use this ``` -## graphite_rollup { #server_settings-graphite_rollup} +## graphite_rollup {#server_settings-graphite_rollup} Settings for thinning data for Graphite. @@ -215,7 +215,7 @@ If `http_port` is specified, the openSSL configuration is ignored even if it is ``` -## http_server_default_response { #server_settings-http_server_default_response} +## http_server_default_response {#server_settings-http_server_default_response} The page that is shown by default when you access the ClickHouse HTTP(s) server. Default value is "Ok." (with a line feed at the end) @@ -230,7 +230,7 @@ Opens `https://tabix.io/` when accessing ` http://localhost: http_port`. ``` -## include_from { #server_settings-include_from} +## include_from {#server_settings-include_from} The path to the file with substitutions. @@ -268,7 +268,7 @@ Useful for breaking away from a specific network interface. example.yandex.ru ``` -## interserver_http_credentials { #server-settings-interserver_http_credentials} +## interserver_http_credentials {#server-settings-interserver_http_credentials} The username and password used to authenticate during [replication](../table_engines/replication.md) with the Replicated* engines. These credentials are used only for communication between replicas and are unrelated to credentials for ClickHouse clients. The server is checking these credentials for connecting replicas and use the same credentials when connecting to other replicas. So, these credentials should be set the same for all replicas in a cluster. By default, the authentication is not used. @@ -299,7 +299,7 @@ The number of seconds that ClickHouse waits for incoming requests before closing ``` -## listen_host { #server_settings-listen_host} +## listen_host {#server_settings-listen_host} Restriction on hosts that requests can come from. If you want the server to answer all of them, specify `::`. @@ -311,7 +311,7 @@ Examples: ``` -## logger { #server_settings-logger} +## logger {#server_settings-logger} Logging settings. @@ -374,7 +374,7 @@ For more information, see the section "[Creating replicated tables](../../operat ``` -## mark_cache_size { #server-mark-cache-size} +## mark_cache_size {#server-mark-cache-size} Approximate size (in bytes) of the cache of marks used by table engines of the [MergeTree](../table_engines/mergetree.md) family. @@ -443,7 +443,7 @@ The value 0 means that you can delete all tables without any restrictions. ``` -## merge_tree { #server_settings-merge_tree} +## merge_tree {#server_settings-merge_tree} Fine tuning for tables in the [MergeTree](../table_engines/mergetree.md). @@ -458,7 +458,7 @@ For more information, see the MergeTreeSettings.h header file. ``` -## openSSL { #server_settings-openssl} +## openSSL {#server_settings-openssl} SSL client/server configuration. @@ -518,7 +518,7 @@ Keys for server/client settings: ``` -## part_log { #server_settings-part-log} +## part_log {#server_settings-part-log} Logging events that are associated with [MergeTree](../table_engines/mergetree.md). For instance, adding or merging data. You can use the log to simulate merge algorithms and compare their characteristics. You can visualize the merge process. @@ -543,7 +543,7 @@ Use the following parameters to configure logging: ``` -## path { #server_settings-path} +## path {#server_settings-path} The path to the directory containing data. @@ -557,7 +557,7 @@ The path to the directory containing data. ``` -## query_log { #server_settings-query-log} +## query_log {#server_settings-query-log} Setting for logging queries received with the [log_queries=1](../settings/settings.md) setting. @@ -583,7 +583,7 @@ If the table doesn't exist, ClickHouse will create it. If the structure of the q ``` -## query_thread_log { #server_settings-query-thread-log} +## query_thread_log {#server_settings-query-thread-log} Setting for logging threads of queries received with the [log_query_threads=1](../settings/settings.md#settings-log-query-threads) setting. @@ -609,7 +609,7 @@ If the table doesn't exist, ClickHouse will create it. If the structure of the q ``` -## trace_log { #server_settings-trace_log} +## trace_log {#server_settings-trace_log} Settings for the [trace_log](../system_tables.md#system_tables-trace_log) system table operation. @@ -662,7 +662,7 @@ The masking rules are applied on whole query (to prevent leaks of sensitive data For distributed queries each server have to be configured separately, otherwise subquries passed to other nodes will be stored without masking. -## remote_servers { #server_settings_remote_servers} +## remote_servers {#server_settings_remote_servers} Configuration of clusters used by the [Distributed](../../operations/table_engines/distributed.md) table engine and by the `cluster` table function. @@ -678,7 +678,7 @@ For the value of the `incl` attribute, see the section "[Configuration files](.. - [skip_unavailable_shards](../settings/settings.md#settings-skip_unavailable_shards) -## timezone { #server_settings-timezone} +## timezone {#server_settings-timezone} The server's time zone. @@ -693,7 +693,7 @@ The time zone is necessary for conversions between String and DateTime formats w ``` -## tcp_port { #server_settings-tcp_port} +## tcp_port {#server_settings-tcp_port} Port for communicating with clients over the TCP protocol. @@ -703,7 +703,7 @@ Port for communicating with clients over the TCP protocol. 9000 ``` -## tcp_port_secure { #server_settings-tcp_port_secure} +## tcp_port_secure {#server_settings-tcp_port_secure} TCP port for secure communication with clients. Use it with [OpenSSL](#server_settings-openssl) settings. @@ -717,7 +717,7 @@ Positive integer. 9440 ``` -## mysql_port { #server_settings-mysql_port} +## mysql_port {#server_settings-mysql_port} Port for communicating with clients over MySQL protocol. @@ -731,7 +731,7 @@ Example 9004 ``` -## tmp_path { #server-settings-tmp_path} +## tmp_path {#server-settings-tmp_path} Path to temporary data for processing large queries. @@ -745,7 +745,7 @@ Path to temporary data for processing large queries. ``` -## tmp_policy { #server-settings-tmp_policy} +## tmp_policy {#server-settings-tmp_policy} Policy from [`storage_configuration`](../table_engines/mergetree.md#table_engine-mergetree-multiple-volumes) to store temporary files. If not set [`tmp_path`](#server-settings-tmp_path) is used, otherwise it is ignored. @@ -756,7 +756,7 @@ If not set [`tmp_path`](#server-settings-tmp_path) is used, otherwise it is igno - `max_data_part_size_bytes` is ignored - you must have exactly one volume in that policy -## uncompressed_cache_size { #server-settings-uncompressed_cache_size} +## uncompressed_cache_size {#server-settings-uncompressed_cache_size} Cache size (in bytes) for uncompressed data used by table engines from the [MergeTree](../table_engines/mergetree.md). @@ -770,7 +770,7 @@ The uncompressed cache is advantageous for very short queries in individual case 8589934592 ``` -## user_files_path { #server_settings-user_files_path} +## user_files_path {#server_settings-user_files_path} The directory with user files. Used in the table function [file()](../../query_language/table_functions/file.md). @@ -797,7 +797,7 @@ Path to the file that contains: ``` -## zookeeper { #server-settings_zookeeper} +## zookeeper {#server-settings_zookeeper} Contains settings that allow ClickHouse to interact with a [ZooKeeper](http://zookeeper.apache.org/) cluster. @@ -848,7 +848,7 @@ This section contains the following parameters: - [Replication](../../operations/table_engines/replication.md) - [ZooKeeper Programmer's Guide](http://zookeeper.apache.org/doc/current/zookeeperProgrammers.html) -## use_minimalistic_part_header_in_zookeeper { #server-settings-use_minimalistic_part_header_in_zookeeper} +## use_minimalistic_part_header_in_zookeeper {#server-settings-use_minimalistic_part_header_in_zookeeper} Storage method for data part headers in ZooKeeper. @@ -876,14 +876,14 @@ If `use_minimalistic_part_header_in_zookeeper = 1`, then [replicated](../table_e **Default value:** 0. -## disable_internal_dns_cache { #server-settings-disable_internal_dns_cache} +## disable_internal_dns_cache {#server-settings-disable_internal_dns_cache} Disables the internal DNS cache. Recommended for operating ClickHouse in systems with frequently changing infrastructure such as Kubernetes. **Default value:** 0. -## dns_cache_update_period { #server-settings-dns_cache_update_period} +## dns_cache_update_period {#server-settings-dns_cache_update_period} The period of updating IP addresses stored in the ClickHouse internal DNS cache (in seconds). The update is performed asynchronously, in a separate system thread. diff --git a/docs/en/operations/settings/index.md b/docs/en/operations/settings/index.md index 3f56eaaf99a..3dd02876955 100644 --- a/docs/en/operations/settings/index.md +++ b/docs/en/operations/settings/index.md @@ -1,4 +1,4 @@ -# Settings { #settings} +# Settings {#settings} There are multiple ways to make all the settings described below. Settings are configured in layers, so each subsequent layer redefines the previous settings. diff --git a/docs/en/operations/settings/permissions_for_queries.md b/docs/en/operations/settings/permissions_for_queries.md index 4c56dd93d12..e6dcd490e97 100644 --- a/docs/en/operations/settings/permissions_for_queries.md +++ b/docs/en/operations/settings/permissions_for_queries.md @@ -1,4 +1,4 @@ -# Permissions for queries { #permissions_for_queries} +# Permissions for queries {#permissions_for_queries} Queries in ClickHouse can be divided into several types: @@ -15,7 +15,7 @@ The following settings regulate user permissions by the type of query: `KILL QUERY` can be performed with any settings. -## readonly { #settings_readonly} +## readonly {#settings_readonly} Restricts permissions for read data, write data and change settings queries. @@ -36,7 +36,7 @@ from changing only specific settings, for details see [constraints on settings]( Default value: 0 -## allow_ddl { #settings_allow_ddl} +## allow_ddl {#settings_allow_ddl} Allows or denies [DDL](https://en.wikipedia.org/wiki/Data_definition_language) queries. diff --git a/docs/en/operations/settings/query_complexity.md b/docs/en/operations/settings/query_complexity.md index 1d7ac4e5ae6..5b9db828d03 100644 --- a/docs/en/operations/settings/query_complexity.md +++ b/docs/en/operations/settings/query_complexity.md @@ -16,7 +16,7 @@ It can take one of two values: `throw` or `break`. Restrictions on aggregation ( `any (only for group_by_overflow_mode)` – Continuing aggregation for the keys that got into the set, but don't add new keys to the set. -## max_memory_usage { #settings_max_memory_usage} +## max_memory_usage {#settings_max_memory_usage} The maximum amount of RAM to use for running a query on a single server. @@ -64,7 +64,7 @@ Maximum number of bytes (uncompressed data) that can be read from a table when r What to do when the volume of data read exceeds one of the limits: 'throw' or 'break'. By default, throw. -## max_rows_to_group_by { #settings-max_rows_to_group_by} +## max_rows_to_group_by {#settings-max_rows_to_group_by} Maximum number of unique keys received from aggregation. This setting lets you limit memory consumption when aggregating. @@ -73,7 +73,7 @@ Maximum number of unique keys received from aggregation. This setting lets you l What to do when the number of unique keys for aggregation exceeds the limit: 'throw', 'break', or 'any'. By default, throw. Using the 'any' value lets you run an approximation of GROUP BY. The quality of this approximation depends on the statistical nature of the data. -## max_bytes_before_external_group_by { #settings-max_bytes_before_external_group_by} +## max_bytes_before_external_group_by {#settings-max_bytes_before_external_group_by} Enables or disables execution of `GROUP BY` clauses in external memory. See [GROUP BY in external memory](../../query_language/select.md#select-group-by-in-external-memory). @@ -96,7 +96,7 @@ Maximum number of bytes before sorting. What to do if the number of rows received before sorting exceeds one of the limits: 'throw' or 'break'. By default, throw. -## max_result_rows { #setting-max_result_rows} +## max_result_rows {#setting-max_result_rows} Limit on the number of rows in the result. Also checked for subqueries, and on remote servers when running parts of a distributed query. @@ -223,7 +223,7 @@ Maximum number of bytes (uncompressed data) that can be passed to a remote serve What to do when the amount of data exceeds one of the limits: 'throw' or 'break'. By default, throw. -## max_rows_in_join { #settings-max_rows_in_join} +## max_rows_in_join {#settings-max_rows_in_join} Limits the number of rows in the hash table that is used when joining tables. @@ -240,7 +240,7 @@ Possible values: Default value: 0. -## max_bytes_in_join { #settings-max_bytes_in_join} +## max_bytes_in_join {#settings-max_bytes_in_join} Limits the size in bytes of the hash table used when joining tables. @@ -257,7 +257,7 @@ Possible values: Default value: 0. -## join_overflow_mode { #settings-join_overflow_mode} +## join_overflow_mode {#settings-join_overflow_mode} Defines what action ClickHouse performs when any of the following join limits is reached: diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 4bb08485208..b10553cbe77 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -45,7 +45,7 @@ If `enable_optimize_predicate_expression = 1`, then the execution time of these If `enable_optimize_predicate_expression = 0`, then the execution time of the second query is much longer, because the `WHERE` clause applies to all the data after the subquery finishes. -## fallback_to_stale_replicas_for_distributed_queries { #settings-fallback_to_stale_replicas_for_distributed_queries} +## fallback_to_stale_replicas_for_distributed_queries {#settings-fallback_to_stale_replicas_for_distributed_queries} Forces a query to an out-of-date replica if updated data is not available. See [Replication](../table_engines/replication.md). @@ -55,7 +55,7 @@ Used when performing `SELECT` from a distributed table that points to replicated By default, 1 (enabled). -## force_index_by_date { #settings-force_index_by_date} +## force_index_by_date {#settings-force_index_by_date} Disables query execution if the index can't be used by date. @@ -82,7 +82,7 @@ Enables or disables [fsync](http://pubs.opengroup.org/onlinepubs/9699919799/func It makes sense to disable it if the server has millions of tiny tables that are constantly being created and destroyed. -## enable_http_compression { #settings-enable_http_compression} +## enable_http_compression {#settings-enable_http_compression} Enables or disables data compression in the response to an HTTP request. @@ -95,7 +95,7 @@ Possible values: Default value: 0. -## http_zlib_compression_level { #settings-http_zlib_compression_level} +## http_zlib_compression_level {#settings-http_zlib_compression_level} Sets the level of data compression in the response to an HTTP request if [enable_http_compression = 1](#settings-enable_http_compression). @@ -104,7 +104,7 @@ Possible values: Numbers from 1 to 9. Default value: 3. -## http_native_compression_disable_checksumming_on_decompress { #settings-http_native_compression_disable_checksumming_on_decompress} +## http_native_compression_disable_checksumming_on_decompress {#settings-http_native_compression_disable_checksumming_on_decompress} Enables or disables checksum verification when decompressing the HTTP POST data from the client. Used only for ClickHouse native compression format (not used with `gzip` or `deflate`). @@ -117,7 +117,7 @@ Possible values: Default value: 0. -## send_progress_in_http_headers { #settings-send_progress_in_http_headers} +## send_progress_in_http_headers {#settings-send_progress_in_http_headers} Enables or disables `X-ClickHouse-Progress` HTTP response headers in `clickhouse-server` responses. @@ -130,7 +130,7 @@ Possible values: Default value: 0. -## max_http_get_redirects { #setting-max_http_get_redirects} +## max_http_get_redirects {#setting-max_http_get_redirects} Limits the maximum number of HTTP GET redirect hops for [URL](../table_engines/url.md)-engine tables. The setting applies to both types of tables: those created by the [CREATE TABLE](../../query_language/create/#create-table-query) query and by the [url](../../query_language/table_functions/url.md) table function. @@ -141,7 +141,7 @@ Possible values: Default value: 0. -## input_format_allow_errors_num { #settings-input_format_allow_errors_num} +## input_format_allow_errors_num {#settings-input_format_allow_errors_num} Sets the maximum number of acceptable errors when reading from text formats (CSV, TSV, etc.). @@ -153,7 +153,7 @@ If an error occurred while reading rows but the error counter is still less than If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` are exceeded, ClickHouse throws an exception. -## input_format_allow_errors_ratio { #settings-input_format_allow_errors_ratio} +## input_format_allow_errors_ratio {#settings-input_format_allow_errors_ratio} Sets the maximum percentage of errors allowed when reading from text formats (CSV, TSV, etc.). The percentage of errors is set as a floating-point number between 0 and 1. @@ -167,7 +167,7 @@ If an error occurred while reading rows but the error counter is still less than If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` are exceeded, ClickHouse throws an exception. -## input_format_values_interpret_expressions { #settings-input_format_values_interpret_expressions} +## input_format_values_interpret_expressions {#settings-input_format_values_interpret_expressions} Enables or disables the full SQL parser if the fast stream parser can't parse the data. This setting is used only for the [Values](../../interfaces/formats.md#data-format-values) format at the data insertion. For more information about syntax parsing, see the [Syntax](../../query_language/syntax.md) section. @@ -217,7 +217,7 @@ INSERT INTO datetime_t SELECT now() Ok. ``` -## input_format_values_deduce_templates_of_expressions { #settings-input_format_values_deduce_templates_of_expressions} +## input_format_values_deduce_templates_of_expressions {#settings-input_format_values_deduce_templates_of_expressions} Enables or disables template deduction for an SQL expressions in [Values](../../interfaces/formats.md#data-format-values) format. It allows to parse and interpret expressions in `Values` much faster if expressions in consecutive rows have the same structure. ClickHouse will try to deduce template of an expression, parse the following rows using this template and evaluate the expression on batch of successfully parsed rows. For the following query: @@ -231,7 +231,7 @@ INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), ( Enabled by default. -## input_format_values_accurate_types_of_literals { #settings-input_format_values_accurate_types_of_literals} +## input_format_values_accurate_types_of_literals {#settings-input_format_values_accurate_types_of_literals} This setting is used only when `input_format_values_deduce_templates_of_expressions = 1`. It can happen, that expressions for some column have the same structure, but contain numeric literals of different types, e.g ```sql @@ -244,7 +244,7 @@ When this setting is enabled, ClickHouse will check actual type of literal and w When disabled, ClickHouse may use more general type for some literals (e.g. `Float64` or `Int64` instead of `UInt64` for `42`), but it may cause overflow and precision issues. Enabled by default. -## input_format_defaults_for_omitted_fields { #session_settings-input_format_defaults_for_omitted_fields} +## input_format_defaults_for_omitted_fields {#session_settings-input_format_defaults_for_omitted_fields} When performing `INSERT` queries, replace omitted input column values with default values of the respective columns. This option only applies to [JSONEachRow](../../interfaces/formats.md#jsoneachrow), [CSV](../../interfaces/formats.md#csv) and [TabSeparated](../../interfaces/formats.md#tabseparated) formats. @@ -258,18 +258,18 @@ Possible values: Default value: 1. -## input_format_tsv_empty_as_default { #settings-input_format_tsv_empty_as_default} +## input_format_tsv_empty_as_default {#settings-input_format_tsv_empty_as_default} When enabled, replace empty input fields in TSV with default values. For complex default expressions `input_format_defaults_for_omitted_fields` must be enabled too. Disabled by default. -## input_format_null_as_default { #settings-input_format_null_as_default} +## input_format_null_as_default {#settings-input_format_null_as_default} Enables or disables using default values if input data contain `NULL`, but data type of corresponding column in not `Nullable(T)` (for text input formats). -## input_format_skip_unknown_fields { #settings-input_format_skip_unknown_fields} +## input_format_skip_unknown_fields {#settings-input_format_skip_unknown_fields} Enables or disables skipping insertion of extra data. @@ -289,7 +289,7 @@ Possible values: Default value: 0. -## input_format_import_nested_json { #settings-input_format_import_nested_json} +## input_format_import_nested_json {#settings-input_format_import_nested_json} Enables or disables the insertion of JSON data with nested objects. @@ -308,7 +308,7 @@ See also: - [Usage of Nested Structures](../../interfaces/formats.md#jsoneachrow-nested) with the `JSONEachRow` format. -## input_format_with_names_use_header { #settings-input_format_with_names_use_header} +## input_format_with_names_use_header {#settings-input_format_with_names_use_header} Enables or disables checking the column order when inserting data. @@ -326,7 +326,7 @@ Possible values: Default value: 1. -## date_time_input_format { #settings-date_time_input_format} +## date_time_input_format {#settings-date_time_input_format} Allows to choose a parser of text representation of date and time. @@ -349,7 +349,7 @@ See also: - [DateTime data type.](../../data_types/datetime.md) - [Functions for working with dates and times.](../../query_language/functions/date_time_functions.md) -## join_default_strictness { #settings-join_default_strictness} +## join_default_strictness {#settings-join_default_strictness} Sets default strictness for [JOIN clauses](../../query_language/select.md#select-join). @@ -362,7 +362,7 @@ Possible values: Default value: `ALL`. -## join_any_take_last_row { #settings-join_any_take_last_row} +## join_any_take_last_row {#settings-join_any_take_last_row} Changes behavior of join operations with `ANY` strictness. @@ -382,7 +382,7 @@ See also: - [Join table engine](../table_engines/join.md) - [join_default_strictness](#settings-join_default_strictness) -## join_use_nulls { #join_use_nulls} +## join_use_nulls {#join_use_nulls} Sets the type of [JOIN](../../query_language/select.md) behavior. When merging tables, empty cells may appear. ClickHouse fills them differently based on this setting. @@ -393,7 +393,7 @@ Possible values: Default value: 0. -## max_block_size { #setting-max_block_size} +## max_block_size {#setting-max_block_size} In ClickHouse, data is processed by blocks (sets of column parts). The internal processing cycles for a single block are efficient enough, but there are noticeable expenditures on each block. The `max_block_size` setting is a recommendation for what size of block (in number of rows) to load from tables. The block size shouldn't be too small, so that the expenditures on each block are still noticeable, but not too large, so that the query with LIMIT that is completed after the first block is processed quickly. The goal is to avoid consuming too much memory when extracting a large number of columns in multiple threads, and to preserve at least some cache locality. @@ -407,7 +407,7 @@ Used for the same purpose as `max_block_size`, but it sets the recommended block However, the block size cannot be more than `max_block_size` rows. By default: 1,000,000. It only works when reading from MergeTree engines. -## merge_tree_min_rows_for_concurrent_read { #setting-merge_tree_min_rows_for_concurrent_read} +## merge_tree_min_rows_for_concurrent_read {#setting-merge_tree_min_rows_for_concurrent_read} If the number of rows to be read from a file of a [MergeTree](../table_engines/mergetree.md) table exceeds `merge_tree_min_rows_for_concurrent_read` then ClickHouse tries to perform a concurrent reading from this file on several threads. @@ -417,7 +417,7 @@ Possible values: Default value: 163840. -## merge_tree_min_bytes_for_concurrent_read { #setting-merge_tree_min_bytes_for_concurrent_read} +## merge_tree_min_bytes_for_concurrent_read {#setting-merge_tree_min_bytes_for_concurrent_read} If the number of bytes to read from one file of a [MergeTree](../table_engines/mergetree.md)-engine table exceeds `merge_tree_min_bytes_for_concurrent_read`, then ClickHouse tries to concurrently read from this file in several threads. @@ -427,7 +427,7 @@ Possible value: Default value: 251658240. -## merge_tree_min_rows_for_seek { #setting-merge_tree_min_rows_for_seek} +## merge_tree_min_rows_for_seek {#setting-merge_tree_min_rows_for_seek} If the distance between two data blocks to be read in one file is less than `merge_tree_min_rows_for_seek` rows, then ClickHouse does not seek through the file, but reads the data sequentially. @@ -437,7 +437,7 @@ Possible values: Default value: 0. -## merge_tree_min_bytes_for_seek { #setting-merge_tree_min_bytes_for_seek} +## merge_tree_min_bytes_for_seek {#setting-merge_tree_min_bytes_for_seek} If the distance between two data blocks to be read in one file is less than `merge_tree_min_bytes_for_seek` bytes, then ClickHouse sequentially reads range of file that contains both blocks, thus avoiding extra seek. @@ -448,7 +448,7 @@ Possible values: Default value: 0. -## merge_tree_coarse_index_granularity { #setting-merge_tree_coarse_index_granularity} +## merge_tree_coarse_index_granularity {#setting-merge_tree_coarse_index_granularity} When searching data, ClickHouse checks the data marks in the index file. If ClickHouse finds that required keys are in some range, it divides this range into `merge_tree_coarse_index_granularity` subranges and searches the required keys there recursively. @@ -458,7 +458,7 @@ Possible values: Default value: 8. -## merge_tree_max_rows_to_use_cache { #setting-merge_tree_max_rows_to_use_cache} +## merge_tree_max_rows_to_use_cache {#setting-merge_tree_max_rows_to_use_cache} If ClickHouse should read more than `merge_tree_max_rows_to_use_cache` rows in one query, it doesn't use the cache of uncompressed blocks. @@ -470,7 +470,7 @@ Possible values: Default value: 128 ✕ 8192. -## merge_tree_max_bytes_to_use_cache { #setting-merge_tree_max_bytes_to_use_cache} +## merge_tree_max_bytes_to_use_cache {#setting-merge_tree_max_bytes_to_use_cache} If ClickHouse should read more than `merge_tree_max_bytes_to_use_cache` bytes in one query, it doesn't use the cache of uncompressed blocks. @@ -482,7 +482,7 @@ Possible value: Default value: 2013265920. -## min_bytes_to_use_direct_io { #settings-min_bytes_to_use_direct_io} +## min_bytes_to_use_direct_io {#settings-min_bytes_to_use_direct_io} The minimum data volume required for using direct I/O access to the storage disk. @@ -495,7 +495,7 @@ Possible values: Default value: 0. -## log_queries { #settings-log-queries} +## log_queries {#settings-log-queries} Setting up query logging. @@ -507,7 +507,7 @@ Example: log_queries=1 ``` -## log_query_threads { #settings-log-query-threads} +## log_query_threads {#settings-log-query-threads} Setting up query threads logging. @@ -519,7 +519,7 @@ Example: log_query_threads=1 ``` -## max_insert_block_size { #settings-max_insert_block_size} +## max_insert_block_size {#settings-max_insert_block_size} The size of blocks to form for insertion into a table. This setting only applies in cases when the server forms the blocks. @@ -531,7 +531,7 @@ Default value: 1,048,576. The default is slightly more than `max_block_size`. The reason for this is because certain table engines (`*MergeTree`) form a data part on the disk for each inserted block, which is a fairly large entity. Similarly, `*MergeTree` tables sort data during insertion, and a large enough block size allows sorting more data in RAM. -## max_replica_delay_for_distributed_queries { #settings-max_replica_delay_for_distributed_queries} +## max_replica_delay_for_distributed_queries {#settings-max_replica_delay_for_distributed_queries} Disables lagging replicas for distributed queries. See [Replication](../../operations/table_engines/replication.md). @@ -541,7 +541,7 @@ Default value: 300. Used when performing `SELECT` from a distributed table that points to replicated tables. -## max_threads { #settings-max_threads} +## max_threads {#settings-max_threads} The maximum number of query processing threads, excluding threads for retrieving data from remote servers (see the 'max_distributed_connections' parameter). @@ -556,7 +556,7 @@ For queries that are completed quickly because of a LIMIT, you can set a lower ' The smaller the `max_threads` value, the less memory is consumed. -## max_insert_threads { #settings-max_insert_threads} +## max_insert_threads {#settings-max_insert_threads} The maximum number of threads to execute the `INSERT SELECT` query. @@ -590,7 +590,7 @@ We are writing a URL column with the String type (average size of 60 bytes per v There usually isn't any reason to change this setting. -## max_query_size { #settings-max_query_size} +## max_query_size {#settings-max_query_size} The maximum part of a query that can be taken to RAM for parsing with the SQL parser. The INSERT query also contains data for INSERT that is processed by a separate stream parser (that consumes O(1) RAM), which is not included in this restriction. @@ -654,7 +654,7 @@ Default value: 3. Whether to count extreme values (the minimums and maximums in columns of a query result). Accepts 0 or 1. By default, 0 (disabled). For more information, see the section "Extreme values". -## use_uncompressed_cache { #setting-use_uncompressed_cache} +## use_uncompressed_cache {#setting-use_uncompressed_cache} Whether to use a cache of uncompressed blocks. Accepts 0 or 1. By default, 0 (disabled). Using the uncompressed cache (only for tables in the MergeTree family) can significantly reduce latency and increase throughput when working with a large number of short queries. Enable this setting for users who send frequent short requests. Also pay attention to the [uncompressed_cache_size](../server_settings/settings.md#server-settings-uncompressed_cache_size) configuration parameter (only set in the config file) – the size of uncompressed cache blocks. By default, it is 8 GiB. The uncompressed cache is filled in as needed and the least-used data is automatically deleted. @@ -682,7 +682,7 @@ The default value is 7500. The smaller the value, the more often data is flushed into the table. Setting the value too low leads to poor performance. -## load_balancing { #settings-load_balancing} +## load_balancing {#settings-load_balancing} Specifies the algorithm of replicas selection that is used for distributed query processing. @@ -693,7 +693,7 @@ ClickHouse supports the following algorithms of choosing replicas: - [In order](#load_balancing-in_order) - [First or random](#load_balancing-first_or_random) -### Random (by default) { #load_balancing-random} +### Random (by default) {#load_balancing-random} ```sql load_balancing = random @@ -702,7 +702,7 @@ load_balancing = random The number of errors is counted for each replica. The query is sent to the replica with the fewest errors, and if there are several of these, to any one of them. Disadvantages: Server proximity is not accounted for; if the replicas have different data, you will also get different data. -### Nearest Hostname { #load_balancing-nearest_hostname} +### Nearest Hostname {#load_balancing-nearest_hostname} ```sql load_balancing = nearest_hostname @@ -716,7 +716,7 @@ This method might seem primitive, but it doesn't require external data about net Thus, if there are equivalent replicas, the closest one by name is preferred. We can also assume that when sending a query to the same server, in the absence of failures, a distributed query will also go to the same servers. So even if different data is placed on the replicas, the query will return mostly the same results. -### In Order { #load_balancing-in_order} +### In Order {#load_balancing-in_order} ```sql load_balancing = in_order @@ -726,7 +726,7 @@ Replicas with the same number of errors are accessed in the same order as they a This method is appropriate when you know exactly which replica is preferable. -### First or Random { #load_balancing-first_or_random} +### First or Random {#load_balancing-first_or_random} ```sql load_balancing = first_or_random @@ -736,7 +736,7 @@ This algorithm chooses the first replica in the set or a random replica if the f The `first_or_random` algorithm solves the problem of the `in_order` algorithm. With `in_order`, if one replica goes down, the next one gets a double load while the remaining replicas handle the usual amount of traffic. When using the `first_or_random` algorithm, load is evenly distributed among replicas that are still available. -## prefer_localhost_replica { #settings-prefer_localhost_replica} +## prefer_localhost_replica {#settings-prefer_localhost_replica} Enables/disables preferable using the localhost replica when processing distributed queries. @@ -760,7 +760,7 @@ See the section "WITH TOTALS modifier". The threshold for `totals_mode = 'auto'`. See the section "WITH TOTALS modifier". -## max_parallel_replicas { #settings-max_parallel_replicas} +## max_parallel_replicas {#settings-max_parallel_replicas} The maximum number of replicas for each shard when executing a query. For consistency (to get different parts of the same data split), this option only works when the sampling key is set. @@ -782,27 +782,27 @@ If the value is 1 or more, compilation occurs asynchronously in a separate threa Compiled code is required for each different combination of aggregate functions used in the query and the type of keys in the GROUP BY clause. The results of compilation are saved in the build directory in the form of .so files. There is no restriction on the number of compilation results, since they don't use very much space. Old results will be used after server restarts, except in the case of a server upgrade – in this case, the old results are deleted. -## output_format_json_quote_64bit_integers { #session_settings-output_format_json_quote_64bit_integers} +## output_format_json_quote_64bit_integers {#session_settings-output_format_json_quote_64bit_integers} If the value is true, integers appear in quotes when using JSON\* Int64 and UInt64 formats (for compatibility with most JavaScript implementations); otherwise, integers are output without the quotes. -## format_csv_delimiter { #settings-format_csv_delimiter} +## format_csv_delimiter {#settings-format_csv_delimiter} The character interpreted as a delimiter in the CSV data. By default, the delimiter is `,`. -## input_format_csv_unquoted_null_literal_as_null { #settings-input_format_csv_unquoted_null_literal_as_null} +## input_format_csv_unquoted_null_literal_as_null {#settings-input_format_csv_unquoted_null_literal_as_null} For CSV input format enables or disables parsing of unquoted `NULL` as literal (synonym for `\N`). -## output_format_csv_crlf_end_of_line { #settings-output_format_csv_crlf_end_of_line} +## output_format_csv_crlf_end_of_line {#settings-output_format_csv_crlf_end_of_line} Use DOS/Windows style line separator (CRLF) in CSV instead of Unix style (LF). -## output_format_tsv_crlf_end_of_line { #settings-output_format_tsv_crlf_end_of_line} +## output_format_tsv_crlf_end_of_line {#settings-output_format_tsv_crlf_end_of_line} Use DOC/Windows style line separator (CRLF) in TSV instead of Unix style (LF). -## insert_quorum { #settings-insert_quorum} +## insert_quorum {#settings-insert_quorum} Enables quorum writes. @@ -829,7 +829,7 @@ See also: - [insert_quorum_timeout](#settings-insert_quorum_timeout) - [select_sequential_consistency](#settings-select_sequential_consistency) -## insert_quorum_timeout { #settings-insert_quorum_timeout} +## insert_quorum_timeout {#settings-insert_quorum_timeout} Quorum write timeout in seconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica. @@ -841,7 +841,7 @@ See also: - [select_sequential_consistency](#settings-select_sequential_consistency) -## select_sequential_consistency { #settings-select_sequential_consistency} +## select_sequential_consistency {#settings-select_sequential_consistency} Enables or disables sequential consistency for `SELECT` queries: @@ -861,7 +861,7 @@ See also: - [insert_quorum](#settings-insert_quorum) - [insert_quorum_timeout](#settings-insert_quorum_timeout) -## insert_deduplicate { #settings-insert_deduplicate} +## insert_deduplicate {#settings-insert_deduplicate} Enables or disables block deduplication of `INSERT` (for Replicated* tables). @@ -874,7 +874,7 @@ Default value: 1. By default, blocks inserted into replicated tables by the `INSERT` statement are deduplicated (see [Data Replication] (../ table_engines/replication.md). -## deduplicate_blocks_in_dependent_materialized_views { #settings-deduplicate_blocks_in_dependent_materialized_views} +## deduplicate_blocks_in_dependent_materialized_views {#settings-deduplicate_blocks_in_dependent_materialized_views} Enables or disables the deduplication check for materialized views that receive data from Replicated* tables. @@ -892,7 +892,7 @@ If an INSERTed block is skipped due to deduplication in the source table, there At the same time, this behavior "breaks" `INSERT` idempotency. If an `INSERT` into the main table was successful and `INSERT` into a materialized view failed (e.g. because of communication failure with Zookeeper) a client will get an error and can retry the operation. However, the materialized view won't receive the second insert because it will be discarded by deduplication in the main (source) table. The setting `deduplicate_blocks_in_dependent_materialized_views` allows to change this behavior. On retry a materialized view will receive the repeat insert and will perform deduplication check by itself, ignoring check result for the source table, and will insert rows lost because of first failure. -## max_network_bytes { #settings-max_network_bytes} +## max_network_bytes {#settings-max_network_bytes} Limits the data volume (in bytes) that is received or transmitted over the network when executing a query. This setting applies to every individual query. Possible values: @@ -902,7 +902,7 @@ Possible values: Default value: 0. -## max_network_bandwidth { #settings-max_network_bandwidth} +## max_network_bandwidth {#settings-max_network_bandwidth} Limits the speed of the data exchange over the network in bytes per second. This setting applies to every query. @@ -913,7 +913,7 @@ Possible values: Default value: 0. -## max_network_bandwidth_for_user { #settings-max_network_bandwidth_for_user} +## max_network_bandwidth_for_user {#settings-max_network_bandwidth_for_user} Limits the speed of the data exchange over the network in bytes per second. This setting applies to all concurrently running queries performed by a single user. @@ -924,7 +924,7 @@ Possible values: Default value: 0. -## max_network_bandwidth_for_all_users { #settings-max_network_bandwidth_for_all_users} +## max_network_bandwidth_for_all_users {#settings-max_network_bandwidth_for_all_users} Limits the speed that data is exchanged at over the network in bytes per second. This setting applies to all concurrently running queries on the server. @@ -935,7 +935,7 @@ Possible values: Default value: 0. -## count_distinct_implementation { #settings-count_distinct_implementation} +## count_distinct_implementation {#settings-count_distinct_implementation} Specifies which of the `uniq*` functions should be used to perform the [COUNT(DISTINCT ...)](../../query_language/agg_functions/reference.md#agg_function-count) construction. @@ -949,7 +949,7 @@ Possible values: Default value: `uniqExact`. -## skip_unavailable_shards { #settings-skip_unavailable_shards} +## skip_unavailable_shards {#settings-skip_unavailable_shards} Enables or disables silently skipping of unavailable shards. @@ -979,13 +979,13 @@ Possible values: Default value: 0. -## optimize_skip_unused_shards { #settings-optimize_skip_unused_shards} +## optimize_skip_unused_shards {#settings-optimize_skip_unused_shards} Enables or disables skipping of unused shards for SELECT queries that has sharding key condition in PREWHERE/WHERE (assumes that the data is distributed by sharding key, otherwise do nothing). Default value: 0 -## force_optimize_skip_unused_shards { #settings-force_optimize_skip_unused_shards} +## force_optimize_skip_unused_shards {#settings-force_optimize_skip_unused_shards} Enables or disables query execution if [`optimize_skip_unused_shards`](#settings-optimize_skip_unused_shards) enabled and skipping of unused shards is not possible. If the skipping is not possible and the setting is enabled exception will be thrown. @@ -997,7 +997,7 @@ Possible values: Default value: 0 -## optimize_throw_if_noop { #setting-optimize_throw_if_noop} +## optimize_throw_if_noop {#setting-optimize_throw_if_noop} Enables or disables throwing an exception if an [OPTIMIZE](../../query_language/misc.md#misc_operations-optimize) query didn't perform a merge. @@ -1011,7 +1011,7 @@ Possible values: Default value: 0. -## distributed_replica_error_half_life { #settings-distributed_replica_error_half_life} +## distributed_replica_error_half_life {#settings-distributed_replica_error_half_life} - Type: seconds - Default value: 60 seconds @@ -1023,7 +1023,7 @@ See also: - [Table engine Distributed](../../operations/table_engines/distributed.md) - [distributed_replica_error_cap](#settings-distributed_replica_error_cap) -## distributed_replica_error_cap { #settings-distributed_replica_error_cap} +## distributed_replica_error_cap {#settings-distributed_replica_error_cap} - Type: unsigned int - Default value: 1000 @@ -1035,7 +1035,7 @@ See also: - [Table engine Distributed](../../operations/table_engines/distributed.md) - [distributed_replica_error_half_life](#settings-distributed_replica_error_half_life) -## distributed_directory_monitor_sleep_time_ms { #distributed_directory_monitor_sleep_time_ms} +## distributed_directory_monitor_sleep_time_ms {#distributed_directory_monitor_sleep_time_ms} Base interval for the [Distributed](../table_engines/distributed.md) table engine to send data. The actual interval grows exponentially in the event of errors. @@ -1045,7 +1045,7 @@ Possible values: Default value: 100 milliseconds. -## distributed_directory_monitor_max_sleep_time_ms { #distributed_directory_monitor_max_sleep_time_ms} +## distributed_directory_monitor_max_sleep_time_ms {#distributed_directory_monitor_max_sleep_time_ms} Maximum interval for the [Distributed](../table_engines/distributed.md) table engine to send data. Limits exponential growth of the interval set in the [distributed_directory_monitor_sleep_time_ms](#distributed_directory_monitor_sleep_time_ms) setting. @@ -1055,7 +1055,7 @@ Possible values: Default value: 30000 milliseconds (30 seconds). -## distributed_directory_monitor_batch_inserts { #distributed_directory_monitor_batch_inserts} +## distributed_directory_monitor_batch_inserts {#distributed_directory_monitor_batch_inserts} Enables/disables sending of inserted data in batches. @@ -1068,7 +1068,7 @@ Possible values: Default value: 0. -## os_thread_priority { #setting-os_thread_priority} +## os_thread_priority {#setting-os_thread_priority} Sets the priority ([nice](https://en.wikipedia.org/wiki/Nice_(Unix))) for threads that execute queries. The OS scheduler considers this priority when choosing the next thread to run on each available CPU core. @@ -1083,7 +1083,7 @@ Lower values mean higher priority. Threads with low `nice` priority values are e Default value: 0. -## query_profiler_real_time_period_ns { #query_profiler_real_time_period_ns} +## query_profiler_real_time_period_ns {#query_profiler_real_time_period_ns} Sets the period for a real clock timer of the [query profiler](../../operations/performance/sampling_query_profiler.md). Real clock timer counts wall-clock time. @@ -1106,7 +1106,7 @@ See also: - System table [trace_log](../system_tables.md#system_tables-trace_log) -## query_profiler_cpu_time_period_ns { #query_profiler_cpu_time_period_ns} +## query_profiler_cpu_time_period_ns {#query_profiler_cpu_time_period_ns} Sets the period for a CPU clock timer of the [query profiler](../../operations/performance/sampling_query_profiler.md). This timer counts only CPU time. @@ -1129,7 +1129,7 @@ See also: - System table [trace_log](../system_tables.md#system_tables-trace_log) -## allow_introspection_functions { #settings-allow_introspection_functions} +## allow_introspection_functions {#settings-allow_introspection_functions} Enables of disables [introspections functions](../../query_language/functions/introspection.md) for query profiling. @@ -1159,7 +1159,7 @@ Enable order-preserving parallel parsing of data formats. Supported only for TSV The minimum chunk size in bytes, which each thread will parse in parallel. -## output_format_avro_codec { #settings-output_format_avro_codec} +## output_format_avro_codec {#settings-output_format_avro_codec} Sets the compression codec used for output Avro file. @@ -1173,7 +1173,7 @@ Possible values: Default value: `snappy` (if available) or `deflate`. -## output_format_avro_sync_interval { #settings-output_format_avro_sync_interval} +## output_format_avro_sync_interval {#settings-output_format_avro_sync_interval} Sets minimum data size (in bytes) between synchronization markers for output Avro file. @@ -1183,7 +1183,7 @@ Possible values: 32 (32 bytes) - 1073741824 (1 GiB) Default value: 32768 (32 KiB) -## format_avro_schema_registry_url { #settings-format_avro_schema_registry_url} +## format_avro_schema_registry_url {#settings-format_avro_schema_registry_url} Sets Confluent Schema Registry URL to use with [AvroConfluent](../../interfaces/formats.md#data-format-avro-confluent) format diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md index 763b43b1fbf..0eb1b8d67f6 100644 --- a/docs/en/operations/system_tables.md +++ b/docs/en/operations/system_tables.md @@ -6,7 +6,7 @@ System tables don't have files with data on the disk or files with metadata. The System tables are read-only. They are located in the 'system' database. -## system.asynchronous_metrics { #system_tables-asynchronous_metrics} +## system.asynchronous_metrics {#system_tables-asynchronous_metrics} Contains metrics that are calculated periodically in the background. For example, the amount of RAM in use. @@ -92,7 +92,7 @@ The `system.columns` table contains the following columns (the column type is sh - `is_in_primary_key` (UInt8) — Flag that indicates whether the column is in the primary key expression. - `is_in_sampling_key` (UInt8) — Flag that indicates whether the column is in the sampling key expression. -## system.contributors { #system_contributors} +## system.contributors {#system_contributors} Contains information about contributors. All constributors in random order. The order is random at query execution time. @@ -138,7 +138,7 @@ This table contains a single String column called 'name' – the name of a datab Each database that the server knows about has a corresponding entry in the table. This system table is used for implementing the `SHOW DATABASES` query. -## system.detached_parts { #system_tables-detached_parts} +## system.detached_parts {#system_tables-detached_parts} Contains information about detached parts of [MergeTree](table_engines/mergetree.md) tables. The `reason` column specifies why the part was detached. For user-detached parts, the reason is empty. Such parts can be attached with [ALTER TABLE ATTACH PARTITION|PART](../query_language/query_language/alter/#alter_attach-partition) command. For the description of other columns, see [system.parts](#system_tables-parts). If part name is invalid, values of some columns may be `NULL`. Such parts can be deleted with [ALTER TABLE DROP DETACHED PART](../query_language/query_language/alter/#alter_drop-detached). @@ -164,7 +164,7 @@ Columns: Note that the amount of memory used by the dictionary is not proportional to the number of items stored in it. So for flat and cached dictionaries, all the memory cells are pre-assigned, regardless of how full the dictionary actually is. -## system.events { #system_tables-events} +## system.events {#system_tables-events} Contains information about the number of events that have occurred in the system. For example, in the table, you can find how many `SELECT` queries were processed since the ClickHouse server started. @@ -243,7 +243,7 @@ Columns: - `bytes_written_uncompressed` (UInt64) — Number of bytes written, uncompressed. - `rows_written` (UInt64) — Number of rows written. -## system.metrics { #system_tables-metrics} +## system.metrics {#system_tables-metrics} Contains metrics which can be calculated instantly, or have a current value. For example, the number of simultaneously processed queries or the current replica delay. This table is always up to date. @@ -283,7 +283,7 @@ SELECT * FROM system.metrics LIMIT 10 - [system.metric_log](#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. - [Monitoring](monitoring.md) — Base concepts of ClickHouse monitoring. -## system.metric_log { #system_tables-metric_log} +## system.metric_log {#system_tables-metric_log} Contains history of metrics values from tables `system.metrics` and `system.events`, periodically flushed to disk. To turn on metrics history collection on `system.metric_log`, create `/etc/clickhouse-server/config.d/metric_log.xml` with following content: @@ -356,7 +356,7 @@ This table contains a single row with a single 'dummy' UInt8 column containing t This table is used if a SELECT query doesn't specify the FROM clause. This is similar to the DUAL table found in other DBMSs. -## system.parts { #system_tables-parts} +## system.parts {#system_tables-parts} Contains information about parts of [MergeTree](table_engines/mergetree.md) tables. @@ -406,7 +406,7 @@ Columns: - `marks_size` (`UInt64`) – Alias for `marks_bytes`. -## system.part_log { #system_tables-part-log} +## system.part_log {#system_tables-part-log} The `system.part_log` table is created only if the [part_log](server_settings/settings.md#server_settings-part-log) server setting is specified. @@ -439,7 +439,7 @@ The `system.part_log` table contains the following columns: The `system.part_log` table is created after the first inserting data to the `MergeTree` table. -## system.processes { #system_tables-processes} +## system.processes {#system_tables-processes} This system table is used for implementing the `SHOW PROCESSLIST` query. @@ -455,7 +455,7 @@ Columns: - `query` (String) – The query text. For `INSERT`, it doesn't include the data to insert. - `query_id` (String) – Query ID, if defined. -## system.text_log { #system_tables-text_log} +## system.text_log {#system_tables-text_log} Contains logging entries. Logging level which goes to this table can be limited with `text_log.level` server setting. @@ -483,7 +483,7 @@ Columns: - `source_line` (`UInt64`) - Source line from which the logging was done. -## system.query_log { #system_tables-query_log} +## system.query_log {#system_tables-query_log} Contains information about execution of queries. For each query, you can see processing start time, duration of processing, error messages and other information. @@ -569,7 +569,7 @@ When the table is deleted manually, it will be automatically created on the fly. You can specify an arbitrary partitioning key for the `system.query_log` table in the [query_log](server_settings/settings.md#server_settings-query-log) server setting (see the `partition_by` parameter). -## system.query_thread_log { #system_tables-query-thread-log} +## system.query_thread_log {#system_tables-query-thread-log} The table contains information about each query execution thread. @@ -634,7 +634,7 @@ When the table is deleted manually, it will be automatically created on the fly. You can specify an arbitrary partitioning key for the `system.query_thread_log` table in the [query_thread_log](server_settings/settings.md#server_settings-query-thread-log) server setting (see the `partition_by` parameter). -## system.trace_log { #system_tables-trace_log} +## system.trace_log {#system_tables-trace_log} Contains stack traces collected by the sampling query profiler. @@ -677,7 +677,7 @@ query_id: acc4d61f-5bd1-4a3e-bc91-2180be37c915 trace: [94222141367858,94222152240175,94222152325351,94222152329944,94222152330796,94222151449980,94222144088167,94222151682763,94222144088167,94222151682763,94222144088167,94222144058283,94222144059248,94222091840750,94222091842302,94222091831228,94222189631488,140509950166747,140509942945935] ``` -## system.replicas { #system_tables-replicas} +## system.replicas {#system_tables-replicas} Contains information and status for replicated tables residing on the local server. This table can be used for monitoring. The table contains a row for every Replicated\* table. @@ -960,7 +960,7 @@ pzxid: 987021252247 path: /clickhouse/tables/01-08/visits/replicas ``` -## system.mutations { #system_tables-mutations} +## system.mutations {#system_tables-mutations} The table contains information about [mutations](../query_language/alter.md#alter-mutations) of MergeTree tables and their progress. Each mutation command is represented by a single row. The table has the following columns: @@ -987,7 +987,7 @@ If there were problems with mutating some parts, the following columns contain a **latest_fail_reason** - The exception message that caused the most recent part mutation failure. -## system.disks { #system_tables-disks} +## system.disks {#system_tables-disks} Contains information about disks defined in the [server configuration](table_engines/mergetree.md#table_engine-mergetree-multiple-volumes_configure). @@ -1000,7 +1000,7 @@ Columns: - `keep_free_space` ([UInt64](../data_types/int_uint.md)) — Amount of disk space that should stay free on disk in bytes. Defined in the `keep_free_space_bytes` parameter of disk configuration. -## system.storage_policies { #system_tables-storage_policies} +## system.storage_policies {#system_tables-storage_policies} Contains information about storage policies and volumes defined in the [server configuration](table_engines/mergetree.md#table_engine-mergetree-multiple-volumes_configure). diff --git a/docs/en/operations/table_engines/collapsingmergetree.md b/docs/en/operations/table_engines/collapsingmergetree.md index 2c1f2b96a3c..9c50dd959ed 100644 --- a/docs/en/operations/table_engines/collapsingmergetree.md +++ b/docs/en/operations/table_engines/collapsingmergetree.md @@ -1,4 +1,4 @@ -# CollapsingMergeTree { #table_engine-collapsingmergetree} +# CollapsingMergeTree {#table_engine-collapsingmergetree} The engine inherits from [MergeTree](mergetree.md) and adds the logic of rows collapsing to data parts merge algorithm. @@ -55,7 +55,7 @@ All of the parameters excepting `sign` have the same meaning as in `MergeTree`. -## Collapsing { #table_engine-collapsingmergetree-collapsing} +## Collapsing {#table_engine-collapsingmergetree-collapsing} ### Data @@ -103,7 +103,7 @@ Why we need 2 rows for each change read in the [Algorithm](#table_engine-collaps 2. Long growing arrays in columns reduce the efficiency of the engine due to load for writing. The more straightforward data, the higher efficiency. 3. The `SELECT` results depend strongly on the consistency of object changes history. Be accurate when preparing data for inserting. You can get unpredictable results in inconsistent data, for example, negative values for non-negative metrics such as session depth. -### Algorithm { #table_engine-collapsingmergetree-collapsing-algorithm} +### Algorithm {#table_engine-collapsingmergetree-collapsing-algorithm} When ClickHouse merges data parts, each group of consecutive rows with the same sorting key (`ORDER BY`) is reduced to not more than two rows, one with `Sign = 1` ("state" row) and another with `Sign = -1` ("cancel" row). In other words, entries collapse. diff --git a/docs/en/operations/table_engines/file.md b/docs/en/operations/table_engines/file.md index 8a0745b7635..d45eb596ea1 100644 --- a/docs/en/operations/table_engines/file.md +++ b/docs/en/operations/table_engines/file.md @@ -1,4 +1,4 @@ -# File { #table_engines-file} +# File {#table_engines-file} The File table engine keeps the data in a file in one of the supported [file formats](../../interfaces/formats.md#formats) (TabSeparated, Native, etc.). diff --git a/docs/en/operations/table_engines/generate.md b/docs/en/operations/table_engines/generate.md index 5fc176a5c65..fd98b3c9d18 100644 --- a/docs/en/operations/table_engines/generate.md +++ b/docs/en/operations/table_engines/generate.md @@ -1,4 +1,4 @@ -# GenerateRandom { #table_engines-generate} +# GenerateRandom {#table_engines-generate} The GenerateRandom table engine produces random data for given table schema. diff --git a/docs/en/operations/table_engines/graphitemergetree.md b/docs/en/operations/table_engines/graphitemergetree.md index 30aedff7979..c260acca315 100644 --- a/docs/en/operations/table_engines/graphitemergetree.md +++ b/docs/en/operations/table_engines/graphitemergetree.md @@ -1,4 +1,4 @@ -# GraphiteMergeTree { #graphitemergetree} +# GraphiteMergeTree {#graphitemergetree} This engine is designed for thinning and aggregating/averaging (rollup) [Graphite](http://graphite.readthedocs.io/en/latest/index.html) data. It may be helpful to developers who want to use ClickHouse as a data store for Graphite. @@ -6,7 +6,7 @@ You can use any ClickHouse table engine to store the Graphite data if you don't The engine inherits properties from [MergeTree](mergetree.md). -## Creating a Table { #creating-table} +## Creating a Table {#creating-table} ```sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] @@ -66,7 +66,7 @@ All of the parameters excepting `config_section` have the same meaning as in `Me - `config_section` — Name of the section in the configuration file, where are the rules of rollup set. -## Rollup configuration { #rollup-configuration} +## Rollup configuration {#rollup-configuration} The settings for rollup are defined by the [graphite_rollup](../server_settings/settings.md#server_settings-graphite_rollup) parameter in the server configuration. The name of the parameter could be any. You can create several configurations and use them for different tables. @@ -77,14 +77,14 @@ required-columns patterns ``` -### Required Columns { #required-columns} +### Required Columns {#required-columns} - `path_column_name` — The name of the column storing the metric name (Graphite sensor). Default value: `Path`. - `time_column_name` — The name of the column storing the time of measuring the metric. Default value: `Time`. - `value_column_name` — The name of the column storing the value of the metric at the time set in `time_column_name`. Default value: `Value`. - `version_column_name` — The name of the column storing the version of the metric. Default value: `Timestamp`. -### Patterns { #patterns} +### Patterns {#patterns} Structure of the `patterns` section: @@ -126,7 +126,7 @@ Fields for `pattern` and `default` sections: - `function` – The name of the aggregating function to apply to data whose age falls within the range `[age, age + precision]`. -### Configuration Example { #configuration-example} +### Configuration Example {#configuration-example} ```xml diff --git a/docs/en/operations/table_engines/hdfs.md b/docs/en/operations/table_engines/hdfs.md index 56a08daefad..fb254f11112 100644 --- a/docs/en/operations/table_engines/hdfs.md +++ b/docs/en/operations/table_engines/hdfs.md @@ -1,4 +1,4 @@ -# HDFS { #table_engines-hdfs} +# HDFS {#table_engines-hdfs} This engine provides integration with [Apache Hadoop](https://en.wikipedia.org/wiki/Apache_Hadoop) ecosystem by allowing to manage data on [HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)via ClickHouse. This engine is similar to the [File](file.md) and [URL](url.md) engines, but provides Hadoop-specific features. diff --git a/docs/en/operations/table_engines/index.md b/docs/en/operations/table_engines/index.md index f47457012d7..4ee5eb249e4 100644 --- a/docs/en/operations/table_engines/index.md +++ b/docs/en/operations/table_engines/index.md @@ -1,4 +1,4 @@ -# Table engines { #table_engines} +# Table engines {#table_engines} The table engine (type of table) determines: @@ -64,7 +64,7 @@ Engines in the family: - [Memory](memory.md) - [Buffer](buffer.md) -## Virtual columns { #table_engines-virtual_columns} +## Virtual columns {#table_engines-virtual_columns} Virtual column is an integral table engine attribute that is defined in the engine source code. diff --git a/docs/en/operations/table_engines/jdbc.md b/docs/en/operations/table_engines/jdbc.md index 7035e6469e2..2f87cd7794e 100644 --- a/docs/en/operations/table_engines/jdbc.md +++ b/docs/en/operations/table_engines/jdbc.md @@ -1,4 +1,4 @@ -# JDBC { #table_engine-jdbc} +# JDBC {#table_engine-jdbc} Allows ClickHouse to connect to external databases via [JDBC](https://en.wikipedia.org/wiki/Java_Database_Connectivity). diff --git a/docs/en/operations/table_engines/join.md b/docs/en/operations/table_engines/join.md index 927ede7e34a..4b74ac5b2af 100644 --- a/docs/en/operations/table_engines/join.md +++ b/docs/en/operations/table_engines/join.md @@ -2,7 +2,7 @@ Prepared data structure for using in [JOIN](../../query_language/select.md#select-join) operations. -## Creating a Table { #creating-a-table} +## Creating a Table {#creating-a-table} ```sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] @@ -77,7 +77,7 @@ You cannot perform a `SELECT` query directly from the table. Instead, use one of - Place the table to the right side in a `JOIN` clause. - Call the [joinGet](../../query_language/functions/other_functions.md#joinget) function, which lets you extract data from the table the same way as from a dictionary. -### Limitations and Settings { #join-limitations-and-settings} +### Limitations and Settings {#join-limitations-and-settings} When creating a table, the following settings are applied: diff --git a/docs/en/operations/table_engines/kafka.md b/docs/en/operations/table_engines/kafka.md index d695ae8c745..5731d68a61d 100644 --- a/docs/en/operations/table_engines/kafka.md +++ b/docs/en/operations/table_engines/kafka.md @@ -9,7 +9,7 @@ Kafka lets you: - Process streams as they become available. -## Creating a Table { #table_engine-kafka-creating-a-table} +## Creating a Table {#table_engine-kafka-creating-a-table} ```sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] diff --git a/docs/en/operations/table_engines/mergetree.md b/docs/en/operations/table_engines/mergetree.md index 3cc57327580..e80de9def1c 100644 --- a/docs/en/operations/table_engines/mergetree.md +++ b/docs/en/operations/table_engines/mergetree.md @@ -1,4 +1,4 @@ -# MergeTree { #table_engines-mergetree} +# MergeTree {#table_engines-mergetree} The `MergeTree` engine and other engines of this family (`*MergeTree`) are the most robust ClickHouse table engines. @@ -26,7 +26,7 @@ Main features: The [Merge](merge.md) engine does not belong to the `*MergeTree` family. -## Creating a Table { #table_engine-mergetree-creating-a-table} +## Creating a Table {#table_engine-mergetree-creating-a-table} ```sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] @@ -50,7 +50,7 @@ For a description of parameters, see the [CREATE query description](../../query_ !!!note "Note" `INDEX` is an experimental feature, see [Data Skipping Indexes](#table_engine-mergetree-data_skipping-indexes). -### Query Clauses { #mergetree-query-clauses} +### Query Clauses {#mergetree-query-clauses} - `ENGINE` — Name and parameters of the engine. `ENGINE = MergeTree()`. The `MergeTree` engine does not have parameters. @@ -134,7 +134,7 @@ MergeTree(EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID) The `MergeTree` engine is configured in the same way as in the example above for the main engine configuration method. -## Data Storage { #mergetree-data-storage} +## Data Storage {#mergetree-data-storage} A table consists of data parts sorted by primary key. @@ -146,7 +146,7 @@ Each data part is logically divided into granules. A granule is the smallest ind The granule size is restricted by the `index_granularity` and `index_granularity_bytes` settings of the table engine. The number of rows in a granule lays in the `[1, index_granularity]` range, depending on the size of the rows. The size of a granule can exceed `index_granularity_bytes` if the size of a single row is greater than the value of the setting. In this case, the size of the granule equals the size of the row. -## Primary Keys and Indexes in Queries { #primary-keys-and-indexes-in-queries} +## Primary Keys and Indexes in Queries {#primary-keys-and-indexes-in-queries} Take the `(CounterID, Date)` primary key as an example. In this case, the sorting and index can be illustrated as follows: @@ -248,7 +248,7 @@ ClickHouse cannot use an index if the values of the primary key in the query par ClickHouse uses this logic not only for days of the month sequences, but for any primary key that represents a partially-monotonic sequence. -### Data Skipping Indexes (Experimental) { #table_engine-mergetree-data_skipping-indexes} +### Data Skipping Indexes (Experimental) {#table_engine-mergetree-data_skipping-indexes} The index declaration is in the columns section of the `CREATE` query. ```sql @@ -368,7 +368,7 @@ For concurrent table access, we use multi-versioning. In other words, when a tab Reading from a table is automatically parallelized. -## TTL for Columns and Tables { #table_engine-mergetree-ttl} +## TTL for Columns and Tables {#table_engine-mergetree-ttl} Determines the lifetime of values. @@ -390,7 +390,7 @@ TTL date_time + INTERVAL 1 MONTH TTL date_time + INTERVAL 15 HOUR ``` -### Column TTL { #mergetree-column-ttl} +### Column TTL {#mergetree-column-ttl} When the values in the column expire, ClickHouse replaces them with the default values for the column data type. If all the column values in the data part expire, ClickHouse deletes this column from the data part in a filesystem. @@ -429,7 +429,7 @@ ALTER TABLE example_table c String TTL d + INTERVAL 1 MONTH; ``` -### Table TTL { #mergetree-table-ttl} +### Table TTL {#mergetree-table-ttl} Table can have an expression for removal of expired rows, and multiple expressions for automatic move of parts between [disks or volumes](#table_engine-mergetree-multiple-volumes). When rows in the table expire, ClickHouse deletes all corresponding rows. For parts moving feature, all rows of a part must satisfy the movement expression criteria. @@ -479,7 +479,7 @@ If you perform the `SELECT` query between merges, you may get expired data. To a [Original article](https://clickhouse.tech/docs/en/operations/table_engines/mergetree/) -## Using Multiple Block Devices for Data Storage { #table_engine-mergetree-multiple-volumes} +## Using Multiple Block Devices for Data Storage {#table_engine-mergetree-multiple-volumes} ### Introduction @@ -496,7 +496,7 @@ Data part is the minimum movable unit for `MergeTree`-engine tables. The data be The names given to the described entities can be found in the system tables, [system.storage_policies](../system_tables.md#system_tables-storage_policies) and [system.disks](../system_tables.md#system_tables-disks). To apply one of the configured storage policies for a table, use the `storage_policy` setting of `MergeTree`-engine family tables. -### Configuration { #table_engine-mergetree-multiple-volumes_configure} +### Configuration {#table_engine-mergetree-multiple-volumes_configure} Disks, volumes and storage policies should be declared inside the `` tag either in the main file `config.xml` or in a distinct file in the `config.d` directory. diff --git a/docs/en/operations/table_engines/odbc.md b/docs/en/operations/table_engines/odbc.md index 3ba3f87fe69..8bce818d5ed 100644 --- a/docs/en/operations/table_engines/odbc.md +++ b/docs/en/operations/table_engines/odbc.md @@ -1,4 +1,4 @@ -# ODBC { #table_engine-odbc} +# ODBC {#table_engine-odbc} Allows ClickHouse to connect to external databases via [ODBC](https://en.wikipedia.org/wiki/Open_Database_Connectivity). diff --git a/docs/en/operations/table_engines/replication.md b/docs/en/operations/table_engines/replication.md index a9fbb435b5e..dbf1f491015 100644 --- a/docs/en/operations/table_engines/replication.md +++ b/docs/en/operations/table_engines/replication.md @@ -1,4 +1,4 @@ -# Data Replication { #table_engines-replication} +# Data Replication {#table_engines-replication} Replication is only supported for tables in the MergeTree family: @@ -73,7 +73,7 @@ You can have any number of replicas of the same data. Yandex.Metrica uses double The system monitors data synchronicity on replicas and is able to recover after a failure. Failover is automatic (for small differences in data) or semi-automatic (when data differs too much, which may indicate a configuration error). -## Creating Replicated Tables { #creating-replicated-tables} +## Creating Replicated Tables {#creating-replicated-tables} The `Replicated` prefix is added to the table engine name. For example:`ReplicatedMergeTree`. diff --git a/docs/en/operations/table_engines/stripelog.md b/docs/en/operations/table_engines/stripelog.md index e8c3ea02111..59c918defc1 100644 --- a/docs/en/operations/table_engines/stripelog.md +++ b/docs/en/operations/table_engines/stripelog.md @@ -4,7 +4,7 @@ This engine belongs to the family of log engines. See the common properties of l Use this engine in scenarios when you need to write many tables with a small amount of data (less than 1 million rows). -## Creating a Table { #table_engines-stripelog-creating-a-table} +## Creating a Table {#table_engines-stripelog-creating-a-table} ```sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] @@ -17,7 +17,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] See the detailed description of the [CREATE TABLE](../../query_language/create.md#create-table-query) query. -## Writing the Data { #table_engines-stripelog-writing-the-data} +## Writing the Data {#table_engines-stripelog-writing-the-data} The `StripeLog` engine stores all the columns in one file. For each `INSERT` query, ClickHouse appends the data block to the end of a table file, writing columns one by one. @@ -28,11 +28,11 @@ For each table ClickHouse writes the files: The `StripeLog` engine does not support the `ALTER UPDATE` and `ALTER DELETE` operations. -## Reading the Data { #table_engines-stripelog-reading-the-data} +## Reading the Data {#table_engines-stripelog-reading-the-data} The file with marks allows ClickHouse to parallelize the reading of data. This means that a `SELECT` query returns rows in an unpredictable order. Use the `ORDER BY` clause to sort rows. -## Example of Use { #table_engines-stripelog-example-of-use} +## Example of Use {#table_engines-stripelog-example-of-use} Creating a table: diff --git a/docs/en/operations/table_engines/summingmergetree.md b/docs/en/operations/table_engines/summingmergetree.md index 2c2713f083f..bb66d1b7a22 100644 --- a/docs/en/operations/table_engines/summingmergetree.md +++ b/docs/en/operations/table_engines/summingmergetree.md @@ -86,7 +86,7 @@ SELECT key, sum(value) FROM summtt GROUP BY key ``` -## Data Processing { #data-processing} +## Data Processing {#data-processing} When data are inserted into a table, they are saved as-is. Clickhouse merges the inserted parts of data periodically and this is when rows with the same primary key are summed and replaced with one for each resulting part of data. diff --git a/docs/en/operations/table_engines/url.md b/docs/en/operations/table_engines/url.md index 9666a5cb89f..60d8b5e1f2a 100644 --- a/docs/en/operations/table_engines/url.md +++ b/docs/en/operations/table_engines/url.md @@ -1,4 +1,4 @@ -# URL(URL, Format) { #table_engines-url} +# URL(URL, Format) {#table_engines-url} Manages data on a remote HTTP/HTTPS server. This engine is similar to the [File](file.md) engine. diff --git a/docs/en/operations/table_engines/versionedcollapsingmergetree.md b/docs/en/operations/table_engines/versionedcollapsingmergetree.md index 097ffb36cd4..4e7747ffae1 100644 --- a/docs/en/operations/table_engines/versionedcollapsingmergetree.md +++ b/docs/en/operations/table_engines/versionedcollapsingmergetree.md @@ -71,7 +71,7 @@ All of the parameters except `sign` and `version` have the same meaning as in `M -## Collapsing { #table_engines_versionedcollapsingmergetree} +## Collapsing {#table_engines_versionedcollapsingmergetree} ### Data @@ -119,7 +119,7 @@ To find out why we need two rows for each change, see [Algorithm](#table_engines 2. Long growing arrays in columns reduce the efficiency of the engine due to the load for writing. The more straightforward the data, the better the efficiency. 3. `SELECT` results depend strongly on the consistency of the history of object changes. Be accurate when preparing data for inserting. You can get unpredictable results with inconsistent data, such as negative values for non-negative metrics like session depth. -### Algorithm { #table_engines-versionedcollapsingmergetree-algorithm} +### Algorithm {#table_engines-versionedcollapsingmergetree-algorithm} When ClickHouse merges data parts, it deletes each pair of rows that have the same primary key and version and different `Sign`. The order of rows does not matter. diff --git a/docs/en/operations/table_engines/view.md b/docs/en/operations/table_engines/view.md index ef0c93446cf..8c2e1295156 100644 --- a/docs/en/operations/table_engines/view.md +++ b/docs/en/operations/table_engines/view.md @@ -1,4 +1,4 @@ -# View { #table_engines-view} +# View {#table_engines-view} Used for implementing views (for more information, see the `CREATE VIEW query`). It does not store data, but only stores the specified `SELECT` query. When reading from a table, it runs this query (and deletes all unnecessary columns from the query). diff --git a/docs/en/operations/troubleshooting.md b/docs/en/operations/troubleshooting.md index 72b23c92b3e..656a12bad2a 100644 --- a/docs/en/operations/troubleshooting.md +++ b/docs/en/operations/troubleshooting.md @@ -5,7 +5,7 @@ - [Query processing](#troubleshooting-does-not-process-queries) - [Efficiency of query processing](#troubleshooting-too-slow) -## Installation { #troubleshooting-installation-errors} +## Installation {#troubleshooting-installation-errors} ### You Cannot Get Deb Packages from ClickHouse Repository With apt-get @@ -13,7 +13,7 @@ - If you cannot access the repository for any reason, download packages as described in the [Getting started](../getting_started/index.md) article and install them manually using the `sudo dpkg -i ` command. You will also need the `tzdata` package. -## Connecting to the Server { #troubleshooting-accepts-no-connections} +## Connecting to the Server {#troubleshooting-accepts-no-connections} Possible issues: @@ -120,7 +120,7 @@ Check: You might be using the wrong user name or password. -## Query Processing { #troubleshooting-does-not-process-queries} +## Query Processing {#troubleshooting-does-not-process-queries} If ClickHouse is not able to process the query, it sends an error description to the client. In the `clickhouse-client` you get a description of the error in the console. If you are using the HTTP interface, ClickHouse sends the error description in the response body. For example: @@ -133,7 +133,7 @@ If you start `clickhouse-client` with the `stack-trace` parameter, ClickHouse re You might see a message about a broken connection. In this case, you can repeat the query. If the connection breaks every time you perform the query, check the server logs for errors. -## Efficiency of Query Processing { #troubleshooting-too-slow} +## Efficiency of Query Processing {#troubleshooting-too-slow} If you see that ClickHouse is working too slowly, you need to profile the load on the server resources and network for your queries. diff --git a/docs/en/operations/utils/clickhouse-benchmark.md b/docs/en/operations/utils/clickhouse-benchmark.md index 7d7c1983d20..5707158e671 100644 --- a/docs/en/operations/utils/clickhouse-benchmark.md +++ b/docs/en/operations/utils/clickhouse-benchmark.md @@ -25,7 +25,7 @@ Then pass this file to a standard input of `clickhouse-benchmark`. clickhouse-benchmark [keys] < queries_file ``` -## Keys { #clickhouse-benchmark-keys} +## Keys {#clickhouse-benchmark-keys} - `-c N`, `--concurrency=N` — Number of queries that `clickhouse-benchmark` sends simultaneously. Default value: 1. - `-d N`, `--delay=N` — Interval in seconds between intermediate reports (set 0 to disable reports). Default value: 1. @@ -47,7 +47,7 @@ clickhouse-benchmark [keys] < queries_file If you want to apply some [settings](../../operations/settings/index.md) for queries, pass them as a key `--= SETTING_VALUE`. For example, `--max_memory_usage=1048576`. -## Output { #clickhouse-benchmark-output} +## Output {#clickhouse-benchmark-output} By default, `clickhouse-benchmark` reports for each `--delay` interval. @@ -90,13 +90,13 @@ In the report you can find: - Percentiles of queries execution time. -## Comparison mode { #clickhouse-benchmark-comparison-mode} +## Comparison mode {#clickhouse-benchmark-comparison-mode} `clickhouse-benchmark` can compare performances for two running ClickHouse servers. To use the comparison mode, specify endpoints of both servers by two pairs of `--host`, `--port` keys. Keys matched together by position in arguments list, the first `--host` is matched with the first `--port` and so on. `clickhouse-benchmark` establishes connections to both servers, then sends queries. Each query addressed to a randomly selected server. The results are shown for each server separately. -## Example { #clickhouse-benchmark-example} +## Example {#clickhouse-benchmark-example} ```bash $ echo "SELECT * FROM system.numbers LIMIT 10000000 OFFSET 10000000" | clickhouse-benchmark -i 10 diff --git a/docs/en/query_language/agg_functions/combinators.md b/docs/en/query_language/agg_functions/combinators.md index 0174dd5c163..9cb6c4be4fa 100644 --- a/docs/en/query_language/agg_functions/combinators.md +++ b/docs/en/query_language/agg_functions/combinators.md @@ -1,8 +1,8 @@ -# Aggregate function combinators { #aggregate_functions_combinators} +# Aggregate function combinators {#aggregate_functions_combinators} The name of an aggregate function can have a suffix appended to it. This changes the way the aggregate function works. -## -If { #agg-functions-combinator-if} +## -If {#agg-functions-combinator-if} The suffix -If can be appended to the name of any aggregate function. In this case, the aggregate function accepts an extra argument – a condition (Uint8 type). The aggregate function processes only the rows that trigger the condition. If the condition was not triggered even once, it returns a default value (usually zeros or empty strings). @@ -10,7 +10,7 @@ Examples: `sumIf(column, cond)`, `countIf(cond)`, `avgIf(x, cond)`, `quantilesTi With conditional aggregate functions, you can calculate aggregates for several conditions at once, without using subqueries and `JOIN`s. For example, in Yandex.Metrica, conditional aggregate functions are used to implement the segment comparison functionality. -## -Array { #agg-functions-combinator-array} +## -Array {#agg-functions-combinator-array} The -Array suffix can be appended to any aggregate function. In this case, the aggregate function takes arguments of the 'Array(T)' type (arrays) instead of 'T' type arguments. If the aggregate function accepts multiple arguments, this must be arrays of equal lengths. When processing arrays, the aggregate function works like the original aggregate function across all array elements. @@ -20,7 +20,7 @@ Example 2: `uniqArray(arr)` – Counts the number of unique elements in all 'arr -If and -Array can be combined. However, 'Array' must come first, then 'If'. Examples: `uniqArrayIf(arr, cond)`, `quantilesTimingArrayIf(level1, level2)(arr, cond)`. Due to this order, the 'cond' argument won't be an array. -## -State { #agg-functions-combinator-state} +## -State {#agg-functions-combinator-state} If you apply this combinator, the aggregate function doesn't return the resulting value (such as the number of unique values for the [uniq](reference.md#agg_function-uniq) function), but an intermediate state of the aggregation (for `uniq`, this is the hash table for calculating the number of unique values). This is an `AggregateFunction(...)` that can be used for further processing or stored in a table to finish aggregating later. @@ -32,19 +32,19 @@ To work with these states, use: - [-Merge](#aggregate_functions_combinators_merge) combinator. - [-MergeState](#aggregate_functions_combinators_mergestate) combinator. -## -Merge { #aggregate_functions_combinators_merge} +## -Merge {#aggregate_functions_combinators_merge} If you apply this combinator, the aggregate function takes the intermediate aggregation state as an argument, combines the states to finish aggregation, and returns the resulting value. -## -MergeState { #aggregate_functions_combinators_mergestate} +## -MergeState {#aggregate_functions_combinators_mergestate} Merges the intermediate aggregation states in the same way as the -Merge combinator. However, it doesn't return the resulting value, but an intermediate aggregation state, similar to the -State combinator. -## -ForEach { #agg-functions-combinator-foreach} +## -ForEach {#agg-functions-combinator-foreach} Converts an aggregate function for tables into an aggregate function for arrays that aggregates the corresponding array items and returns an array of results. For example, `sumForEach` for the arrays `[1, 2]`, `[3, 4, 5]`and`[6, 7]`returns the result `[10, 13, 5]` after adding together the corresponding array items. -## -OrDefault { #agg-functions-combinator-ordefault} +## -OrDefault {#agg-functions-combinator-ordefault} Fills the default value of the aggregate function's return type if there is nothing to aggregate. @@ -57,7 +57,7 @@ SELECT avg(number), avgOrDefault(number) FROM numbers(0) └─────────────┴──────────────────────┘ ``` -## -OrNull { #agg-functions-combinator-ornull} +## -OrNull {#agg-functions-combinator-ornull} Fills `null` if there is nothing to aggregate. The return column will be nullable. @@ -85,7 +85,7 @@ FROM └────────────────────────────────┘ ``` -## -Resample { #agg_functions-combinator-resample} +## -Resample {#agg_functions-combinator-resample} Lets you divide data into groups, and then separately aggregates the data in those groups. Groups are created by splitting the values from one column into intervals. diff --git a/docs/en/query_language/agg_functions/index.md b/docs/en/query_language/agg_functions/index.md index dec14812f04..613518611f6 100644 --- a/docs/en/query_language/agg_functions/index.md +++ b/docs/en/query_language/agg_functions/index.md @@ -1,4 +1,4 @@ -# Aggregate functions { #aggregate_functions} +# Aggregate functions {#aggregate_functions} Aggregate functions work in the [normal](http://www.sql-tutorial.com/sql-aggregate-functions-sql-tutorial) way as expected by database experts. diff --git a/docs/en/query_language/agg_functions/parametric_functions.md b/docs/en/query_language/agg_functions/parametric_functions.md index 7da0bdca8b8..9a7a356d7c4 100644 --- a/docs/en/query_language/agg_functions/parametric_functions.md +++ b/docs/en/query_language/agg_functions/parametric_functions.md @@ -1,4 +1,4 @@ -# Parametric aggregate functions { #aggregate_functions_parametric} +# Parametric aggregate functions {#aggregate_functions_parametric} Some aggregate functions can accept not only argument columns (used for compression), but a set of parameters – constants for initialization. The syntax is two pairs of brackets instead of one. The first is for parameters, and the second is for arguments. @@ -71,7 +71,7 @@ FROM In this case, you should remember that you don't know the histogram bin borders. -## sequenceMatch(pattern)(timestamp, cond1, cond2, ...) { #function-sequencematch} +## sequenceMatch(pattern)(timestamp, cond1, cond2, ...) {#function-sequencematch} Checks whether the sequence contains an event chain that matches the pattern. @@ -161,7 +161,7 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM - [sequenceCount](#function-sequencecount) -## sequenceCount(pattern)(time, cond1, cond2, ...) { #function-sequencecount} +## sequenceCount(pattern)(time, cond1, cond2, ...) {#function-sequencecount} Counts the number of event chains that matched the pattern. The function searches event chains that don't overlap. It starts to search for the next chain after the current chain is matched. @@ -219,7 +219,7 @@ SELECT sequenceCount('(?1).*(?2)')(time, number = 1, number = 2) FROM t - [sequenceMatch](#function-sequencematch) -## windowFunnel { #windowfunnel} +## windowFunnel {#windowfunnel} Searches for event chains in a sliding time window and calculates the maximum number of events that occurred from the chain. @@ -309,7 +309,7 @@ Result: └───────┴───┘ ``` -## retention { #retention} +## retention {#retention} The function takes as arguments a set of conditions from 1 to 32 arguments of type `UInt8` that indicate whether a certain condition was met for the event. Any condition can be specified as an argument (as in [WHERE](../../query_language/select.md#select-where)). diff --git a/docs/en/query_language/agg_functions/reference.md b/docs/en/query_language/agg_functions/reference.md index eeeead8e69e..7c099c26580 100644 --- a/docs/en/query_language/agg_functions/reference.md +++ b/docs/en/query_language/agg_functions/reference.md @@ -1,6 +1,6 @@ # Function Reference -## count { #agg_function-count} +## count {#agg_function-count} Counts the number of rows or not-NULL values. @@ -65,7 +65,7 @@ SELECT count(DISTINCT num) FROM t This example shows that `count(DISTINCT num)` is performed by the `uniqExact` function according to the `count_distinct_implementation` setting value. -## any(x) { #agg_function-any} +## any(x) {#agg_function-any} Selects the first encountered value. The query can be executed in any order and even in a different order each time, so the result of this function is indeterminate. @@ -277,15 +277,15 @@ num 3 ``` -## min(x) { #agg_function-min} +## min(x) {#agg_function-min} Calculates the minimum. -## max(x) { #agg_function-max} +## max(x) {#agg_function-max} Calculates the maximum. -## argMin(arg, val) { #agg_function-argMin} +## argMin(arg, val) {#agg_function-argMin} Calculates the 'arg' value for a minimal 'val' value. If there are several different values of 'arg' for minimal values of 'val', the first of these values encountered is output. @@ -306,12 +306,12 @@ SELECT argMin(user, salary) FROM salary └──────────────────────┘ ``` -## argMax(arg, val) { #agg_function-argMax} +## argMax(arg, val) {#agg_function-argMax} Calculates the 'arg' value for a maximum 'val' value. If there are several different values of 'arg' for maximum values of 'val', the first of these values encountered is output. -## sum(x) { #agg_function-sum} +## sum(x) {#agg_function-sum} Calculates the sum. Only works for numbers. @@ -323,7 +323,7 @@ Computes the sum of the numbers, using the same data type for the result as for Only works for numbers. -## sumMap(key, value) { #agg_functions-summap} +## sumMap(key, value) {#agg_functions-summap} Totals the 'value' array according to the keys specified in the 'key' array. The number of elements in 'key' and 'value' must be the same for each row that is totaled. @@ -451,7 +451,7 @@ The kurtosis of the given distribution. Type — [Float64](../../data_types/floa SELECT kurtSamp(value) FROM series_with_value_column ``` -## timeSeriesGroupSum(uid, timestamp, value) { #agg_function-timeseriesgroupsum} +## timeSeriesGroupSum(uid, timestamp, value) {#agg_function-timeseriesgroupsum} `timeSeriesGroupSum` can aggregate different time series that sample timestamp not alignment. It will use linear interpolation between two sample timestamp and then sum time-series together. @@ -498,7 +498,7 @@ And the result will be: [(2,0.2),(3,0.9),(7,2.1),(8,2.4),(12,3.6),(17,5.1),(18,5.4),(24,7.2),(25,2.5)] ``` -## timeSeriesGroupRateSum(uid, ts, val) { #agg_function-timeseriesgroupratesum} +## timeSeriesGroupRateSum(uid, ts, val) {#agg_function-timeseriesgroupratesum} Similarly timeSeriesGroupRateSum, timeSeriesGroupRateSum will Calculate the rate of time-series and then sum rates together. Also, timestamp should be in ascend order before use this function. @@ -507,13 +507,13 @@ Use this function, the result above case will be: [(2,0),(3,0.1),(7,0.3),(8,0.3),(12,0.3),(17,0.3),(18,0.3),(24,0.3),(25,0.1)] ``` -## avg(x) { #agg_function-avg} +## avg(x) {#agg_function-avg} Calculates the average. Only works for numbers. The result is always Float64. -## uniq { #agg_function-uniq} +## uniq {#agg_function-uniq} Calculates the approximate number of different values of the argument. @@ -549,7 +549,7 @@ We recommend using this function in almost all scenarios. - [uniqHLL12](#agg_function-uniqhll12) - [uniqExact](#agg_function-uniqexact) -## uniqCombined { #agg_function-uniqcombined} +## uniqCombined {#agg_function-uniqcombined} Calculates the approximate number of different argument values. @@ -596,11 +596,11 @@ Compared to the [uniq](#agg_function-uniq) function, the `uniqCombined`: - [uniqHLL12](#agg_function-uniqhll12) - [uniqExact](#agg_function-uniqexact) -## uniqCombined64 { #agg_function-uniqcombined64} +## uniqCombined64 {#agg_function-uniqcombined64} Same as [uniqCombined](#agg_function-uniqcombined), but uses 64-bit hash for all data types. -## uniqHLL12 { #agg_function-uniqhll12} +## uniqHLL12 {#agg_function-uniqhll12} Calculates the approximate number of different argument values, using the [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) algorithm. @@ -636,7 +636,7 @@ We don't recommend using this function. In most cases, use the [uniq](#agg_funct - [uniqExact](#agg_function-uniqexact) -## uniqExact { #agg_function-uniqexact} +## uniqExact {#agg_function-uniqexact} Calculates the exact number of different argument values. @@ -659,7 +659,7 @@ The function takes a variable number of parameters. Parameters can be `Tuple`, ` - [uniqHLL12](#agg_function-uniqhll12) -## groupArray(x), groupArray(max_size)(x) { #agg_function-grouparray} +## groupArray(x), groupArray(max_size)(x) {#agg_function-grouparray} Creates an array of argument values. Values can be added to the array in any (indeterminate) order. @@ -684,7 +684,7 @@ Optional parameters: - The default value for substituting in empty positions. - The length of the resulting array. This allows you to receive arrays of the same size for all the aggregate keys. When using this parameter, the default value must be specified. -## groupArrayMovingSum { #agg_function-grouparraymovingsum} +## groupArrayMovingSum {#agg_function-grouparraymovingsum} Calculates the moving sum of input values. @@ -753,7 +753,7 @@ FROM t └────────────┴─────────────────────────────────┴────────────────────────┘ ``` -## groupArrayMovingAvg { #agg_function-grouparraymovingavg} +## groupArrayMovingAvg {#agg_function-grouparraymovingavg} Calculates the moving average of input values. @@ -831,7 +831,7 @@ Creates an array from different argument values. Memory consumption is the same The second version (with the `max_size` parameter) limits the size of the resulting array to `max_size` elements. For example, `groupUniqArray(1)(x)` is equivalent to `[any(x)]`. -## quantile { #quantile} +## quantile {#quantile} Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. @@ -896,7 +896,7 @@ Result: - [quantiles](#quantiles) -## quantileDeterministic { #quantiledeterministic} +## quantileDeterministic {#quantiledeterministic} Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. @@ -962,7 +962,7 @@ Result: - [quantiles](#quantiles) -## quantileExact { #quantileexact} +## quantileExact {#quantileexact} Exactly computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. @@ -1014,7 +1014,7 @@ Result: - [median](#median) - [quantiles](#quantiles) -## quantileExactWeighted { #quantileexactweighted} +## quantileExactWeighted {#quantileexactweighted} Exactly computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence, taking into account the weight of each element. @@ -1078,7 +1078,7 @@ Result: - [median](#median) - [quantiles](#quantiles) -## quantileTiming { #quantiletiming} +## quantileTiming {#quantiletiming} With the determined precision computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. @@ -1160,7 +1160,7 @@ Result: - [median](#median) - [quantiles](#quantiles) -## quantileTimingWeighted { #quantiletimingweighted} +## quantileTimingWeighted {#quantiletimingweighted} With the determined precision computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence according to the weight of each sequence member. @@ -1243,7 +1243,7 @@ Result: - [quantiles](#quantiles) -## quantileTDigest { #quantiletdigest} +## quantileTDigest {#quantiletdigest} Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using the [t-digest](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf) algorithm. @@ -1298,7 +1298,7 @@ Result: - [median](#median) - [quantiles](#quantiles) -## quantileTDigestWeighted { #quantiletdigestweighted} +## quantileTDigestWeighted {#quantiletdigestweighted} Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using the [t-digest](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf) algorithm. The function takes into account the weight of each sequence member. The maximum error is 1%. Memory consumption is `log(n)`, where `n` is a number of values. @@ -1354,7 +1354,7 @@ Result: - [quantiles](#quantiles) -## median { #median} +## median {#median} The `median*` functions are the aliases for the corresponding `quantile*` functions. They calculate median of a numeric data sample. @@ -1397,7 +1397,7 @@ Result: ``` -## quantiles(level1, level2, ...)(x) { #quantiles} +## quantiles(level1, level2, ...)(x) {#quantiles} All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values. @@ -1462,7 +1462,7 @@ FROM ontime └─────────────────────┘ ``` -## topKWeighted { #topkweighted} +## topKWeighted {#topkweighted} Similar to `topK` but takes one additional argument of integer type - `weight`. Every value is accounted `weight` times for frequency calculation. @@ -1562,12 +1562,12 @@ SELECT arrayReduce('simpleLinearRegression', [0, 1, 2, 3], [3, 4, 5, 6]) └───────────────────────────────────────────────────────────────────┘ ``` -## stochasticLinearRegression { #agg_functions-stochasticlinearregression} +## stochasticLinearRegression {#agg_functions-stochasticlinearregression} This function implements stochastic linear regression. It supports custom parameters for learning rate, L2 regularization coefficient, mini-batch size and has few methods for updating weights ([Adam](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Adam) (used by default), [simple SGD](https://en.wikipedia.org/wiki/Stochastic_gradient_descent), [Momentum](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Momentum), [Nesterov](https://mipt.ru/upload/medialibrary/d7e/41-91.pdf)). -### Parameters { #agg_functions-stochasticlinearregression-parameters} +### Parameters {#agg_functions-stochasticlinearregression-parameters} There are 4 customizable parameters. They are passed to the function sequentially, but there is no need to pass all four - default values will be used, however good model required some parameter tuning. @@ -1581,7 +1581,7 @@ stochasticLinearRegression(1.0, 1.0, 10, 'SGD') 4. `method for updating weights`, they are: `Adam` (by default), `SGD`, `Momentum`, `Nesterov`. `Momentum` and `Nesterov` require little bit more computations and memory, however they happen to be useful in terms of speed of convergance and stability of stochastic gradient methods. -### Usage { #agg_functions-stochasticlinearregression-usage} +### Usage {#agg_functions-stochasticlinearregression-usage} `stochasticLinearRegression` is used in two steps: fitting the model and predicting on new data. In order to fit the model and save its state for later usage we use `-State` combinator, which basically saves the state (model weights, etc). To predict we use function [evalMLMethod](../functions/machine_learning_functions.md#machine_learning_methods-evalmlmethod), which takes a state as an argument as well as features to predict on. @@ -1622,7 +1622,7 @@ The query will return a column of predicted values. Note that first argument of `test_data` is a table like `train_data` but may not contain target value. -### Notes { #agg_functions-stochasticlinearregression-notes} +### Notes {#agg_functions-stochasticlinearregression-notes} 1. To merge two models user may create such query: ```sql @@ -1642,12 +1642,12 @@ The query will return a column of predicted values. Note that first argument of - [Difference between linear and logistic regressions](https://stackoverflow.com/questions/12146914/what-is-the-difference-between-linear-regression-and-logistic-regression) -## stochasticLogisticRegression { #agg_functions-stochasticlogisticregression} +## stochasticLogisticRegression {#agg_functions-stochasticlogisticregression} This function implements stochastic logistic regression. It can be used for binary classification problem, supports the same custom parameters as stochasticLinearRegression and works the same way. -### Parameters { #agg_functions-stochasticlogisticregression-parameters} +### Parameters {#agg_functions-stochasticlogisticregression-parameters} Parameters are exactly the same as in stochasticLinearRegression: `learning rate`, `l2 regularization coefficient`, `mini-batch size`, `method for updating weights`. diff --git a/docs/en/query_language/alter.md b/docs/en/query_language/alter.md index 4bb40df4c3d..eeb11282f65 100644 --- a/docs/en/query_language/alter.md +++ b/docs/en/query_language/alter.md @@ -1,4 +1,4 @@ -## ALTER { #query_language_queries_alter} +## ALTER {#query_language_queries_alter} The `ALTER` query is only supported for `*MergeTree` tables, as well as `Merge`and`Distributed`. The query has several variations. @@ -23,7 +23,7 @@ The following actions are supported: These actions are described in detail below. -#### ADD COLUMN { #alter_add-column} +#### ADD COLUMN {#alter_add-column} ```sql ADD COLUMN [IF NOT EXISTS] name [type] [default_expr] [codec] [AFTER name_after] @@ -43,7 +43,7 @@ Example: ALTER TABLE visits ADD COLUMN browser String AFTER user_id ``` -#### DROP COLUMN { #alter_drop-column} +#### DROP COLUMN {#alter_drop-column} ```sql DROP COLUMN [IF EXISTS] name @@ -59,7 +59,7 @@ Example: ALTER TABLE visits DROP COLUMN browser ``` -#### CLEAR COLUMN { #alter_clear-column} +#### CLEAR COLUMN {#alter_clear-column} ```sql CLEAR COLUMN [IF EXISTS] name IN PARTITION partition_name @@ -75,7 +75,7 @@ Example: ALTER TABLE visits CLEAR COLUMN browser IN PARTITION tuple() ``` -#### COMMENT COLUMN { #alter_comment-column} +#### COMMENT COLUMN {#alter_comment-column} ```sql COMMENT COLUMN [IF EXISTS] name 'comment' @@ -93,7 +93,7 @@ Example: ALTER TABLE visits COMMENT COLUMN browser 'The table shows the browser used for accessing the site.' ``` -#### MODIFY COLUMN { #alter_modify-column} +#### MODIFY COLUMN {#alter_modify-column} ```sql MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [TTL] @@ -191,7 +191,7 @@ Constraint check *will not be executed* on existing data if it was added. All changes on replicated tables are broadcasting to ZooKeeper so will be applied on other replicas. -### Manipulations With Partitions and Parts { #alter_manipulations-with-partitions} +### Manipulations With Partitions and Parts {#alter_manipulations-with-partitions} The following operations with [partitions](../operations/table_engines/custom_partitioning_key.md) are available: @@ -207,7 +207,7 @@ The following operations with [partitions](../operations/table_engines/custom_pa - [FREEZE PARTITION](#alter_freeze-partition) – Creates a backup of a partition. - [FETCH PARTITION](#alter_fetch-partition) – Downloads a partition from another server. - [MOVE PARTITION|PART](#alter_move-partition) – Move partition/data part to another disk or volume. -#### DETACH PARTITION { #alter_detach-partition} +#### DETACH PARTITION {#alter_detach-partition} ```sql ALTER TABLE table_name DETACH PARTITION partition_expr @@ -227,7 +227,7 @@ After the query is executed, you can do whatever you want with the data in the ` This query is replicated – it moves the data to the `detached` directory on all replicas. Note that you can execute this query only on a leader replica. To find out if a replica is a leader, perform the `SELECT` query to the [system.replicas](../operations/system_tables.md#system_tables-replicas) table. Alternatively, it is easier to make a `DETACH` query on all replicas - all the replicas throw an exception, except the leader replica. -#### DROP PARTITION { #alter_drop-partition} +#### DROP PARTITION {#alter_drop-partition} ```sql ALTER TABLE table_name DROP PARTITION partition_expr @@ -239,7 +239,7 @@ Read about setting the partition expression in a section [How to specify the par The query is replicated – it deletes data on all replicas. -#### DROP DETACHED PARTITION|PART { #alter_drop-detached} +#### DROP DETACHED PARTITION|PART {#alter_drop-detached} ```sql ALTER TABLE table_name DROP DETACHED PARTITION|PART partition_expr @@ -249,7 +249,7 @@ Removes the specified part or all parts of the specified partition from `detache Read more about setting the partition expression in a section [How to specify the partition expression](#alter-how-to-specify-part-expr). -#### ATTACH PARTITION|PART { #alter_attach-partition} +#### ATTACH PARTITION|PART {#alter_attach-partition} ``` sql ALTER TABLE table_name ATTACH PARTITION|PART partition_expr @@ -268,7 +268,7 @@ This query is replicated. The replica-initiator checks whether there is data in So you can put data to the `detached` directory on one replica, and use the `ALTER ... ATTACH` query to add it to the table on all replicas. -#### ATTACH PARTITION FROM { #alter_attach-partition-from} +#### ATTACH PARTITION FROM {#alter_attach-partition-from} ```sql ALTER TABLE table2 ATTACH PARTITION partition_expr FROM table1 @@ -281,7 +281,7 @@ For the query to run successfully, the following conditions must be met: - Both tables must have the same structure. - Both tables must have the same partition key. -#### REPLACE PARTITION { #alter_replace-partition} +#### REPLACE PARTITION {#alter_replace-partition} ```sql ALTER TABLE table2 REPLACE PARTITION partition_expr FROM table1 @@ -294,7 +294,7 @@ For the query to run successfully, the following conditions must be met: - Both tables must have the same structure. - Both tables must have the same partition key. -#### MOVE PARTITION TO TABLE { #alter_move_to_table-partition} +#### MOVE PARTITION TO TABLE {#alter_move_to_table-partition} ``` sql ALTER TABLE table_source MOVE PARTITION partition_expr TO TABLE table_dest @@ -311,7 +311,7 @@ For the query to run successfully, the following conditions must be met: -#### CLEAR COLUMN IN PARTITION { #alter_clear-column-partition} +#### CLEAR COLUMN IN PARTITION {#alter_clear-column-partition} ```sql ALTER TABLE table_name CLEAR COLUMN column_name IN PARTITION partition_expr @@ -325,7 +325,7 @@ Example: ALTER TABLE visits CLEAR COLUMN hour in PARTITION 201902 ``` -#### FREEZE PARTITION { #alter_freeze-partition} +#### FREEZE PARTITION {#alter_freeze-partition} ```sql ALTER TABLE table_name FREEZE [PARTITION partition_expr] @@ -364,7 +364,7 @@ Restoring from a backup doesn't require stopping the server. For more information about backups and restoring data, see the [Data Backup](../operations/backup.md) section. -#### CLEAR INDEX IN PARTITION { #alter_clear-index-partition} +#### CLEAR INDEX IN PARTITION {#alter_clear-index-partition} ```sql ALTER TABLE table_name CLEAR INDEX index_name IN PARTITION partition_expr @@ -372,7 +372,7 @@ ALTER TABLE table_name CLEAR INDEX index_name IN PARTITION partition_expr The query works similar to `CLEAR COLUMN`, but it resets an index instead of a column data. -#### FETCH PARTITION { #alter_fetch-partition} +#### FETCH PARTITION {#alter_fetch-partition} ```sql ALTER TABLE table_name FETCH PARTITION partition_expr FROM 'path-in-zookeeper' @@ -400,7 +400,7 @@ Before downloading, the system checks if the partition exists and the table stru Although the query is called `ALTER TABLE`, it does not change the table structure and does not immediately change the data available in the table. -#### MOVE PARTITION|PART { #alter_move-partition} +#### MOVE PARTITION|PART {#alter_move-partition} Moves partitions or data parts to another volume or disk for `MergeTree`-engine tables. See [Using Multiple Block Devices for Data Storage](../operations/table_engines/mergetree.md#table_engine-mergetree-multiple-volumes). @@ -421,7 +421,7 @@ ALTER TABLE hits MOVE PART '20190301_14343_16206_438' TO VOLUME 'slow' ALTER TABLE hits MOVE PARTITION '2019-09-01' TO DISK 'fast_ssd' ``` -#### How To Set Partition Expression { #alter-how-to-specify-part-expr} +#### How To Set Partition Expression {#alter-how-to-specify-part-expr} You can specify the partition expression in `ALTER ... PARTITION` queries in different ways: @@ -458,7 +458,7 @@ For non-replicatable tables, all `ALTER` queries are performed synchronously. Fo For `ALTER ... ATTACH|DETACH|DROP` queries, you can use the `replication_alter_partitions_sync` setting to set up waiting. Possible values: `0` – do not wait; `1` – only wait for own execution (default); `2` – wait for all. -### Mutations { #alter-mutations} +### Mutations {#alter-mutations} Mutations are an ALTER query variant that allows changing or deleting rows in a table. In contrast to standard `UPDATE` and `DELETE` queries that are intended for point data changes, mutations are intended for heavy operations that change a lot of rows in a table. Supported for the `MergeTree` family of table engines including the engines with replication support. diff --git a/docs/en/query_language/create.md b/docs/en/query_language/create.md index f29fd7fe8ae..f399eb27a4f 100644 --- a/docs/en/query_language/create.md +++ b/docs/en/query_language/create.md @@ -1,6 +1,6 @@ # CREATE Queries -## CREATE DATABASE { #query_language-create-database} +## CREATE DATABASE {#query_language-create-database} Creates database. @@ -29,7 +29,7 @@ CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] [ENGINE = engine(.. By default, ClickHouse uses its own [database engine](../database_engines/index.md). -## CREATE TABLE { #create-table-query} +## CREATE TABLE {#create-table-query} The `CREATE TABLE` query can have several forms. @@ -70,7 +70,7 @@ In all cases, if `IF NOT EXISTS` is specified, the query won't return an error i There can be other clauses after the `ENGINE` clause in the query. See detailed documentation on how to create tables in the descriptions of [table engines](../operations/table_engines/index.md#table_engines). -### Default Values { #create-default-values} +### Default Values {#create-default-values} The column description can specify an expression for a default value, in one of the following ways:`DEFAULT expr`, `MATERIALIZED expr`, `ALIAS expr`. Example: `URLDomain String DEFAULT domain(URL)`. @@ -105,7 +105,7 @@ If you add a new column to a table but later change its default expression, the It is not possible to set default values for elements in nested data structures. -### Constraints { #constraints} +### Constraints {#constraints} Along with columns descriptions constraints could be defined: @@ -127,7 +127,7 @@ Adding large amount of constraints can negatively affect performance of big `INS Defines storage time for values. Can be specified only for MergeTree-family tables. For the detailed description, see [TTL for columns and tables](../operations/table_engines/mergetree.md#table_engine-mergetree-ttl). -### Column Compression Codecs { #codecs} +### Column Compression Codecs {#codecs} By default, ClickHouse applies the `lz4` compression method. For `MergeTree`-engine family you can change the default compression method in the [compression](../operations/server_settings/settings.md#server-settings-compression) section of a server configuration. You can also define the compression method for each individual column in the `CREATE TABLE` query. @@ -158,7 +158,7 @@ Compression is supported for the following table engines: ClickHouse supports common purpose codecs and specialized codecs. -#### Specialized Codecs { #create-query-specialized-codecs} +#### Specialized Codecs {#create-query-specialized-codecs} These codecs are designed to make compression more effective by using specific features of data. Some of these codecs don't compress data themself. Instead, they prepare the data for a common purpose codec, which compresses it better than without this preparation. @@ -180,7 +180,7 @@ CREATE TABLE codec_example ENGINE = MergeTree() ``` -#### Common purpose codecs { #create-query-common-purpose-codecs} +#### Common purpose codecs {#create-query-common-purpose-codecs} Codecs: @@ -275,7 +275,7 @@ Views look the same as normal tables. For example, they are listed in the result There isn't a separate query for deleting views. To delete a view, use `DROP TABLE`. -## CREATE DICTIONARY { #create-dictionary-query} +## CREATE DICTIONARY {#create-dictionary-query} ```sql CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name diff --git a/docs/en/query_language/dicts/external_dicts.md b/docs/en/query_language/dicts/external_dicts.md index 2c6f89be9e5..46733654f68 100644 --- a/docs/en/query_language/dicts/external_dicts.md +++ b/docs/en/query_language/dicts/external_dicts.md @@ -1,4 +1,4 @@ -# External Dictionaries { #dicts-external_dicts} +# External Dictionaries {#dicts-external_dicts} You can add your own dictionaries from various data sources. The data source for a dictionary can be a local text or executable file, an HTTP(s) resource, or another DBMS. For more information, see "[Sources for external dictionaries](external_dicts_dict_sources.md)". @@ -37,7 +37,7 @@ You can [configure](external_dicts_dict.md) any number of dictionaries in the sa !!! attention "Attention" You can convert values for a small dictionary by describing it in a `SELECT` query (see the [transform](../functions/other_functions.md) function). This functionality is not related to external dictionaries. -## See also { #ext-dicts-see-also} +## See also {#ext-dicts-see-also} - [Configuring an External Dictionary](external_dicts_dict.md) - [Storing Dictionaries in Memory](external_dicts_dict_layout.md) diff --git a/docs/en/query_language/dicts/external_dicts_dict.md b/docs/en/query_language/dicts/external_dicts_dict.md index 02b33d5c353..fba76199784 100644 --- a/docs/en/query_language/dicts/external_dicts_dict.md +++ b/docs/en/query_language/dicts/external_dicts_dict.md @@ -1,4 +1,4 @@ -# Configuring an External Dictionary { #dicts-external_dicts_dict} +# Configuring an External Dictionary {#dicts-external_dicts_dict} If dictionary is configured using xml file, than dictionary configuration has the following structure: diff --git a/docs/en/query_language/dicts/external_dicts_dict_layout.md b/docs/en/query_language/dicts/external_dicts_dict_layout.md index fbb978b1859..87034a5aa14 100644 --- a/docs/en/query_language/dicts/external_dicts_dict_layout.md +++ b/docs/en/query_language/dicts/external_dicts_dict_layout.md @@ -1,4 +1,4 @@ -# Storing Dictionaries in Memory { #dicts-external_dicts_dict_layout} +# Storing Dictionaries in Memory {#dicts-external_dicts_dict_layout} There are a variety of ways to store dictionaries in memory. @@ -79,7 +79,7 @@ or LAYOUT(FLAT()) ``` -### hashed { #dicts-external_dicts_dict_layout-hashed} +### hashed {#dicts-external_dicts_dict_layout-hashed} The dictionary is completely stored in memory in the form of a hash table. The dictionary can contain any number of elements with any identifiers In practice, the number of keys can reach tens of millions of items. @@ -99,7 +99,7 @@ or LAYOUT(HASHED()) ``` -### sparse_hashed { #dicts-external_dicts_dict_layout-sparse_hashed} +### sparse_hashed {#dicts-external_dicts_dict_layout-sparse_hashed} Similar to `hashed`, but uses less memory in favor more CPU usage. diff --git a/docs/en/query_language/dicts/external_dicts_dict_sources.md b/docs/en/query_language/dicts/external_dicts_dict_sources.md index 091452f7484..b52b07b4256 100644 --- a/docs/en/query_language/dicts/external_dicts_dict_sources.md +++ b/docs/en/query_language/dicts/external_dicts_dict_sources.md @@ -1,4 +1,4 @@ -# Sources of External Dictionaries { #dicts-external_dicts_dict_sources} +# Sources of External Dictionaries {#dicts-external_dicts_dict_sources} An external dictionary can be connected from many different sources. @@ -43,7 +43,7 @@ Types of sources (`source_type`): - [Redis](#dicts-external_dicts_dict_sources-redis) -## Local File { #dicts-external_dicts_dict_sources-local_file} +## Local File {#dicts-external_dicts_dict_sources-local_file} Example of settings: @@ -68,7 +68,7 @@ Setting fields: - `format` – The file format. All the formats described in "[Formats](../../interfaces/formats.md#formats)" are supported. -## Executable File { #dicts-external_dicts_dict_sources-executable} +## Executable File {#dicts-external_dicts_dict_sources-executable} Working with executable files depends on [how the dictionary is stored in memory](external_dicts_dict_layout.md). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request to the executable file's STDIN. Otherwise, ClickHouse starts executable file and treats its output as dictionary data. @@ -95,7 +95,7 @@ Setting fields: - `format` – The file format. All the formats described in "[Formats](../../interfaces/formats.md#formats)" are supported. -## HTTP(s) { #dicts-external_dicts_dict_sources-http} +## HTTP(s) {#dicts-external_dicts_dict_sources-http} Working with an HTTP(s) server depends on [how the dictionary is stored in memory](external_dicts_dict_layout.md). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request via the `POST` method. @@ -146,7 +146,7 @@ Setting fields: - `value` – Value set for a specific identifiant name. -## ODBC { #dicts-external_dicts_dict_sources-odbc} +## ODBC {#dicts-external_dicts_dict_sources-odbc} You can use this method to connect any database that has an ODBC driver. @@ -390,7 +390,7 @@ LIFETIME(MIN 300 MAX 360) ## DBMS -### MySQL { #dicts-external_dicts_dict_sources-mysql} +### MySQL {#dicts-external_dicts_dict_sources-mysql} Example of settings: @@ -482,7 +482,7 @@ SOURCE(MYSQL( ``` -### ClickHouse { #dicts-external_dicts_dict_sources-clickhouse} +### ClickHouse {#dicts-external_dicts_dict_sources-clickhouse} Example of settings: @@ -526,7 +526,7 @@ Setting fields: - `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](external_dicts_dict_lifetime.md). -### MongoDB { #dicts-external_dicts_dict_sources-mongodb} +### MongoDB {#dicts-external_dicts_dict_sources-mongodb} Example of settings: @@ -566,7 +566,7 @@ Setting fields: - `collection` – Name of the collection. -### Redis { #dicts-external_dicts_dict_sources-redis} +### Redis {#dicts-external_dicts_dict_sources-redis} Example of settings: diff --git a/docs/en/query_language/dicts/external_dicts_dict_structure.md b/docs/en/query_language/dicts/external_dicts_dict_structure.md index 925affd4a70..f5a0b0b6017 100644 --- a/docs/en/query_language/dicts/external_dicts_dict_structure.md +++ b/docs/en/query_language/dicts/external_dicts_dict_structure.md @@ -43,7 +43,7 @@ Attributes are described in the query body: - `AttrName AttrType` — [Data column](external_dicts_dict_structure.md#ext_dict_structure-attributes). There can be a multiple number of attributes. -## Key { #ext_dict_structure-key} +## Key {#ext_dict_structure-key} ClickHouse supports the following types of keys: @@ -56,7 +56,7 @@ An xml structure can contain either `` or ``. DDL-query must contain si You must not describe key as an attribute. -### Numeric Key { #ext_dict-numeric-key} +### Numeric Key {#ext_dict-numeric-key} Type: `UInt64`. @@ -126,7 +126,7 @@ PRIMARY KEY field1, field2 For a query to the `dictGet*` function, a tuple is passed as the key. Example: `dictGetString('dict_name', 'attr_name', tuple('string for field1', num_for_field2))`. -## Attributes { #ext_dict_structure-attributes} +## Attributes {#ext_dict_structure-attributes} Configuration example: diff --git a/docs/en/query_language/dicts/internal_dicts.md b/docs/en/query_language/dicts/internal_dicts.md index d0a97ca5922..7df915dd998 100644 --- a/docs/en/query_language/dicts/internal_dicts.md +++ b/docs/en/query_language/dicts/internal_dicts.md @@ -1,4 +1,4 @@ -# Internal dictionaries { #internal_dicts} +# Internal dictionaries {#internal_dicts} ClickHouse contains a built-in feature for working with a geobase. diff --git a/docs/en/query_language/functions/arithmetic_functions.md b/docs/en/query_language/functions/arithmetic_functions.md index 6de44cfb75f..55b62b404ca 100644 --- a/docs/en/query_language/functions/arithmetic_functions.md +++ b/docs/en/query_language/functions/arithmetic_functions.md @@ -63,7 +63,7 @@ Differs from 'modulo' in that it returns zero when the divisor is zero. Calculates a number with the reverse sign. The result is always signed. -## abs(a) { #arithm_func-abs} +## abs(a) {#arithm_func-abs} Calculates the absolute value of the number (a). That is, if a < 0, it returns -a. For unsigned types it doesn't do anything. For signed integer types, it returns an unsigned number. diff --git a/docs/en/query_language/functions/array_functions.md b/docs/en/query_language/functions/array_functions.md index 36849cdc455..6c33b018c25 100644 --- a/docs/en/query_language/functions/array_functions.md +++ b/docs/en/query_language/functions/array_functions.md @@ -1,18 +1,18 @@ # Functions for working with arrays -## empty { #function-empty} +## empty {#function-empty} Returns 1 for an empty array, or 0 for a non-empty array. The result type is UInt8. The function also works for strings. -## notEmpty { #function-notempty} +## notEmpty {#function-notempty} Returns 0 for an empty array, or 1 for a non-empty array. The result type is UInt8. The function also works for strings. -## length { #array_functions-length} +## length {#array_functions-length} Returns the number of items in the array. The result type is UInt64. @@ -202,7 +202,7 @@ SELECT countEqual([1, 2, NULL, NULL], NULL) └──────────────────────────────────────┘ ``` -## arrayEnumerate(arr) { #array_functions-arrayenumerate} +## arrayEnumerate(arr) {#array_functions-arrayenumerate} Returns the array \[1, 2, 3, ..., length (arr) \] @@ -455,7 +455,7 @@ SELECT arraySlice([1, 2, NULL, 4, 5], 2, 3) AS res Array elements set to `NULL` are handled as normal values. -## arraySort(\[func,\] arr, ...) { #array_functions-sort} +## arraySort(\[func,\] arr, ...) {#array_functions-sort} Sorts the elements of the `arr` array in ascending order. If the `func` function is specified, sorting order is determined by the result of the `func` function applied to the elements of the array. If `func` accepts multiple arguments, the `arraySort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arraySort` description. @@ -548,7 +548,7 @@ SELECT arraySort((x, y) -> -y, [0, 1, 2], [1, 2, 3]) as res; !!! note To improve sorting efficiency, the [Schwartzian transform](https://en.wikipedia.org/wiki/Schwartzian_transform) is used. -## arrayReverseSort([func,] arr, ...) { #array_functions-reverse-sort} +## arrayReverseSort([func,] arr, ...) {#array_functions-reverse-sort} Sorts the elements of the `arr` array in descending order. If the `func` function is specified, `arr` is sorted according to the result of the `func` function applied to the elements of the array, and then the sorted array is reversed. If `func` accepts multiple arguments, the `arrayReverseSort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arrayReverseSort` description. @@ -648,11 +648,11 @@ If multiple arguments are passed, it counts the number of different tuples of el If you want to get a list of unique items in an array, you can use arrayReduce('groupUniqArray', arr). -## arrayJoin(arr) { #array_functions-join} +## arrayJoin(arr) {#array_functions-join} A special function. See the section ["ArrayJoin function"](array_join.md#functions_arrayjoin). -## arrayDifference { #arraydifference} +## arrayDifference {#arraydifference} Calculates the difference between adjacent array elements. Returns an array where the first element will be 0, the second is the difference between `a[1] - a[0]`, etc. The type of elements in the resulting array is determined by the type inference rules for subtraction (e.g. `UInt8` - `UInt8` = `Int16`). @@ -704,7 +704,7 @@ Result: └────────────────────────────────────────────┘ ``` -## arrayDistinct { #arraydistinct} +## arrayDistinct {#arraydistinct} Takes an array, returns an array containing the distinct elements only. @@ -738,7 +738,7 @@ Result: └────────────────────────────────┘ ``` -## arrayEnumerateDense(arr) { #array_functions-arrayenumeratedense} +## arrayEnumerateDense(arr) {#array_functions-arrayenumeratedense} Returns an array of the same size as the source array, indicating where each element first appears in the source array. @@ -754,7 +754,7 @@ SELECT arrayEnumerateDense([10, 20, 10, 30]) └───────────────────────────────────────┘ ``` -## arrayIntersect(arr) { #array_functions-arrayintersect} +## arrayIntersect(arr) {#array_functions-arrayintersect} Takes multiple arrays, returns an array with elements that are present in all source arrays. Elements order in the resulting array is the same as in the first array. @@ -772,7 +772,7 @@ SELECT └──────────────┴───────────┘ ``` -## arrayReduce(agg_func, arr1, ...) { #array_functions-arrayreduce} +## arrayReduce(agg_func, arr1, ...) {#array_functions-arrayreduce} Applies an aggregate function to array elements and returns its result. The name of the aggregation function is passed as a string in single quotes `'max'`, `'sum'`. When using parametric aggregate functions, the parameter is indicated after the function name in parentheses `'uniqUpTo(6)'`. @@ -814,7 +814,7 @@ SELECT arrayReduce('uniqUpTo(3)', [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) └─────────────────────────────────────────────────────────────┘ ``` -## arrayReverse(arr) { #array_functions-arrayreverse} +## arrayReverse(arr) {#array_functions-arrayreverse} Returns an array of the same size as the original array containing the elements in reverse order. @@ -830,11 +830,11 @@ SELECT arrayReverse([1, 2, 3]) └─────────────────────────┘ ``` -## reverse(arr) { #array_functions-reverse} +## reverse(arr) {#array_functions-reverse} Synonym for ["arrayReverse"](#array_functions-arrayreverse) -## arrayFlatten { #arrayflatten} +## arrayFlatten {#arrayflatten} Converts an array of arrays to a flat array. @@ -869,7 +869,7 @@ SELECT flatten([[[1]], [[2], [3]]]) └─────────────────────────────────────────────┘ ``` -## arrayCompact { #arraycompact} +## arrayCompact {#arraycompact} Removes consecutive duplicate elements from an array. The order of result values is determined by the order in the source array. @@ -905,7 +905,7 @@ Result: └────────────────────────────────────────────┘ ``` -## arrayZip { #arrayzip} +## arrayZip {#arrayzip} Combine multiple Array type columns into one Array[Tuple(...)] column diff --git a/docs/en/query_language/functions/array_join.md b/docs/en/query_language/functions/array_join.md index ee040cf7b8f..5db4b0e601e 100644 --- a/docs/en/query_language/functions/array_join.md +++ b/docs/en/query_language/functions/array_join.md @@ -1,4 +1,4 @@ -# arrayJoin function { #functions_arrayjoin} +# arrayJoin function {#functions_arrayjoin} This is a very unusual function. diff --git a/docs/en/query_language/functions/bit_functions.md b/docs/en/query_language/functions/bit_functions.md index b184f35fdf4..5774375a546 100644 --- a/docs/en/query_language/functions/bit_functions.md +++ b/docs/en/query_language/functions/bit_functions.md @@ -20,7 +20,7 @@ The result type is an integer with bits equal to the maximum bits of its argumen ## bitRotateRight(a, b) -## bitTest { #bittest} +## bitTest {#bittest} Takes any integer and converts it into [binary form](https://en.wikipedia.org/wiki/Binary_number), returns the value of a bit at specified position. The countdown starts from 0 from the right to the left. @@ -75,7 +75,7 @@ Result: └────────────────┘ ``` -## bitTestAll { #bittestall} +## bitTestAll {#bittestall} Returns result of [logical conjuction](https://en.wikipedia.org/wiki/Logical_conjunction) (AND operator) of all bits at given positions. The countdown starts from 0 from the right to the left. @@ -140,7 +140,7 @@ Result: └───────────────────────────────┘ ``` -## bitTestAny { #bittestany} +## bitTestAny {#bittestany} Returns result of [logical disjunction](https://en.wikipedia.org/wiki/Logical_disjunction) (OR operator) of all bits at given positions. The countdown starts from 0 from the right to the left. diff --git a/docs/en/query_language/functions/bitmap_functions.md b/docs/en/query_language/functions/bitmap_functions.md index 23bc4b51ffe..6e6edc6aa08 100644 --- a/docs/en/query_language/functions/bitmap_functions.md +++ b/docs/en/query_language/functions/bitmap_functions.md @@ -9,7 +9,7 @@ RoaringBitmap is wrapped into a data structure while actual storage of Bitmap ob For more information on RoaringBitmap, see: [CRoaring](https://github.com/RoaringBitmap/CRoaring). -## bitmapBuild { #bitmap_functions-bitmapbuild} +## bitmapBuild {#bitmap_functions-bitmapbuild} Build a bitmap from unsigned integer array. @@ -56,7 +56,7 @@ SELECT bitmapToArray(bitmapBuild([1, 2, 3, 4, 5])) AS res └─────────────┘ ``` -## bitmapSubsetInRange { #bitmap_functions-bitmapsubsetinrange} +## bitmapSubsetInRange {#bitmap_functions-bitmapsubsetinrange} Return subset in specified range (not include the range_end). @@ -82,7 +82,7 @@ SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11, └───────────────────┘ ``` -## bitmapSubsetLimit { #bitmapsubsetlimit} +## bitmapSubsetLimit {#bitmapsubsetlimit} Creates a subset of bitmap with n elements taken between `range_start` and `cardinality_limit`. @@ -120,7 +120,7 @@ Result: └───────────────────────────┘ ``` -## bitmapContains { #bitmap_functions-bitmapcontains} +## bitmapContains {#bitmap_functions-bitmapcontains} Checks whether the bitmap contains an element. diff --git a/docs/en/query_language/functions/comparison_functions.md b/docs/en/query_language/functions/comparison_functions.md index 7fa30f72443..0c4bf8324f6 100644 --- a/docs/en/query_language/functions/comparison_functions.md +++ b/docs/en/query_language/functions/comparison_functions.md @@ -17,17 +17,17 @@ Strings are compared by bytes. A shorter string is smaller than all strings that Note. Up until version 1.1.54134, signed and unsigned numbers were compared the same way as in C++. In other words, you could get an incorrect result in cases like SELECT 9223372036854775807 > -1. This behavior changed in version 1.1.54134 and is now mathematically correct. -## equals, a = b and a == b operator { #function-equals} +## equals, a = b and a == b operator {#function-equals} -## notEquals, a ! operator= b and a `<>` b { #function-notequals} +## notEquals, a ! operator= b and a `<>` b {#function-notequals} -## less, `< operator` { #function-less} +## less, `< operator` {#function-less} -## greater, `> operator` { #function-greater} +## greater, `> operator` {#function-greater} -## lessOrEquals, `<= operator` { #function-lessorequals} +## lessOrEquals, `<= operator` {#function-lessorequals} -## greaterOrEquals, `>= operator` { #function-greaterorequals} +## greaterOrEquals, `>= operator` {#function-greaterorequals} [Original article](https://clickhouse.tech/docs/en/query_language/functions/comparison_functions/) diff --git a/docs/en/query_language/functions/conditional_functions.md b/docs/en/query_language/functions/conditional_functions.md index 301e0013bd5..6822d40bb21 100644 --- a/docs/en/query_language/functions/conditional_functions.md +++ b/docs/en/query_language/functions/conditional_functions.md @@ -1,6 +1,6 @@ # Conditional functions -## if { #if} +## if {#if} Controls conditional branching. Unlike most systems, ClickHouse always evaluate both expressions `then` and `else`. @@ -88,7 +88,7 @@ WHERE isNotNull(left) AND isNotNull(right) ``` Note: `NULL` values are not used in this example, check [NULL values in conditionals](#null-values-in-conditionals) section. -## Ternary Operator { #ternary-operator} +## Ternary Operator {#ternary-operator} It works same as `if` function. diff --git a/docs/en/query_language/functions/date_time_functions.md b/docs/en/query_language/functions/date_time_functions.md index 84366798521..3a3adba38a0 100644 --- a/docs/en/query_language/functions/date_time_functions.md +++ b/docs/en/query_language/functions/date_time_functions.md @@ -62,7 +62,7 @@ Converts a date with time to a UInt8 number containing the number of the minute Converts a date with time to a UInt8 number containing the number of the second in the minute (0-59). Leap seconds are not accounted for. -## toUnixTimestamp { #to_unix_timestamp} +## toUnixTimestamp {#to_unix_timestamp} For DateTime argument: converts value to its internal numeric representation (Unix Timestamp). For String argument: parse datetime from string according to the timezone (optional second argument, server timezone is used by default) and returns the corresponding unix timestamp. @@ -342,7 +342,7 @@ SELECT └──────────────────────────┴───────────────────────────────┘ ``` -## dateDiff { #datediff} +## dateDiff {#datediff} Returns the difference between two Date or DateTime values. @@ -401,7 +401,7 @@ For a time interval starting at 'StartTime' and continuing for 'Duration' second For example, `timeSlots(toDateTime('2012-01-01 12:20:00'), 600) = [toDateTime('2012-01-01 12:00:00'), toDateTime('2012-01-01 12:30:00')]`. This is necessary for searching for pageviews in the corresponding session. -## formatDateTime(Time, Format\[, Timezone\]) { #formatdatetime} +## formatDateTime(Time, Format\[, Timezone\]) {#formatdatetime} Function formats a Time according given Format string. N.B.: Format is a constant expression, e.g. you can not have multiple formats for single result column. diff --git a/docs/en/query_language/functions/encoding_functions.md b/docs/en/query_language/functions/encoding_functions.md index ed55be7488b..9adb69e2cf6 100644 --- a/docs/en/query_language/functions/encoding_functions.md +++ b/docs/en/query_language/functions/encoding_functions.md @@ -1,6 +1,6 @@ # Encoding functions -## char { #char} +## char {#char} Returns the string with the length as the number of passed arguments and each byte has the value of corresponding argument. Accepts multiple arguments of numeric types. If the value of argument is out of range of UInt8 data type, it is converted to UInt8 with possible rounding and overflow. @@ -60,7 +60,7 @@ Result: └───────┘ ``` -## hex { #hex} +## hex {#hex} Returns a string containing the argument's hexadecimal representation. diff --git a/docs/en/query_language/functions/ext_dict_functions.md b/docs/en/query_language/functions/ext_dict_functions.md index a0cf648327a..26e05721b6a 100644 --- a/docs/en/query_language/functions/ext_dict_functions.md +++ b/docs/en/query_language/functions/ext_dict_functions.md @@ -1,4 +1,4 @@ -# Functions for Working with External Dictionaries { #ext_dict_functions} +# Functions for Working with External Dictionaries {#ext_dict_functions} For information on connecting and configuring external dictionaries, see [External dictionaries](../dicts/external_dicts.md). @@ -111,7 +111,7 @@ dictHas('dict_name', id_expr) Type: `UInt8`. -## dictGetHierarchy { #dictgethierarchy} +## dictGetHierarchy {#dictgethierarchy} Creates an array, containing all the parents of a key in the [hierarchical dictionary](../dicts/external_dicts_dict_hierarchical.md). @@ -154,7 +154,7 @@ dictIsIn('dict_name', child_id_expr, ancestor_id_expr) Type: `UInt8`. -## Other functions { #ext_dict_functions-other} +## Other functions {#ext_dict_functions-other} ClickHouse supports specialized functions that convert dictionary attribute values to a specific data type regardless of the dictionary configuration. diff --git a/docs/en/query_language/functions/geo.md b/docs/en/query_language/functions/geo.md index df5fc87cafa..81e43c93ad3 100644 --- a/docs/en/query_language/functions/geo.md +++ b/docs/en/query_language/functions/geo.md @@ -152,7 +152,7 @@ SELECT geohashDecode('ezs42') AS res └─────────────────────────────────┘ ``` -## geoToH3 { #geotoh3} +## geoToH3 {#geotoh3} Returns [H3](https://uber.github.io/h3/#/documentation/overview/introduction) point index `(lon, lat)` with specified resolution. diff --git a/docs/en/query_language/functions/hash_functions.md b/docs/en/query_language/functions/hash_functions.md index 9bbf86db66a..5c51bf33b20 100644 --- a/docs/en/query_language/functions/hash_functions.md +++ b/docs/en/query_language/functions/hash_functions.md @@ -2,7 +2,7 @@ Hash functions can be used for the deterministic pseudo-random shuffling of elements. -## halfMD5 { #hash_functions-halfmd5} +## halfMD5 {#hash_functions-halfmd5} [Interprets](../../query_language/functions/type_conversion_functions.md#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the [MD5](https://en.wikipedia.org/wiki/MD5) hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the resulting string, and interprets them as `UInt64` in big-endian byte order. @@ -32,13 +32,13 @@ SELECT halfMD5(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00') └────────────────────┴────────┘ ``` -## MD5 { #hash_functions-md5} +## MD5 {#hash_functions-md5} Calculates the MD5 from a string and returns the resulting set of bytes as FixedString(16). If you don't need MD5 in particular, but you need a decent cryptographic 128-bit hash, use the 'sipHash128' function instead. If you want to get the same result as output by the md5sum utility, use lower(hex(MD5(s))). -## sipHash64 { #hash_functions-siphash64} +## sipHash64 {#hash_functions-siphash64} Produces a 64-bit [SipHash](https://131002.net/siphash/) hash value. @@ -74,7 +74,7 @@ SELECT sipHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00 └──────────────────────┴────────┘ ``` -## sipHash128 { #hash_functions-siphash128} +## sipHash128 {#hash_functions-siphash128} Calculates SipHash from a string. Accepts a String-type argument. Returns FixedString(16). @@ -175,7 +175,7 @@ SELECT farmHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:0 └──────────────────────┴────────┘ ``` -## javaHash { #hash_functions-javahash} +## javaHash {#hash_functions-javahash} Calculates [JavaHash](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452) from a string. This hash function is neither fast nor having a good quality. The only reason to use it is when this algorithm is already used in another system and you have to calculate exactly the same result. @@ -205,7 +205,7 @@ Result: └───────────────────────────┘ ``` -## javaHashUTF16LE { #javahashutf16le} +## javaHashUTF16LE {#javahashutf16le} Calculates [JavaHash](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452) from a string, assuming it contains bytes representing a string in UTF-16LE encoding. @@ -241,7 +241,7 @@ Result: └──────────────────────────────────────────────────────────────┘ ``` -## hiveHash { #hash_functions-hivehash} +## hiveHash {#hash_functions-hivehash} Calculates `HiveHash` from a string. @@ -391,7 +391,7 @@ SELECT murmurHash3_128('example_string') AS MurmurHash3, toTypeName(MurmurHash3) └──────────────────┴─────────────────┘ ``` -## xxHash32, xxHash64 { #hash_functions-xxhash32} +## xxHash32, xxHash64 {#hash_functions-xxhash32} Calculates `xxHash` from a string. It is proposed in two flavors, 32 and 64 bits. diff --git a/docs/en/query_language/functions/higher_order_functions.md b/docs/en/query_language/functions/higher_order_functions.md index d5d9f025a11..ef5fd0c398a 100644 --- a/docs/en/query_language/functions/higher_order_functions.md +++ b/docs/en/query_language/functions/higher_order_functions.md @@ -23,7 +23,7 @@ A lambda function can't be omitted for the following functions: - [arrayFirst](#higher_order_functions-array-first) - [arrayFirstIndex](#higher_order_functions-array-first-index) -### arrayMap(func, arr1, ...) { #higher_order_functions-array-map} +### arrayMap(func, arr1, ...) {#higher_order_functions-array-map} Returns an array obtained from the original application of the `func` function to each element in the `arr` array. @@ -50,7 +50,7 @@ SELECT arrayMap((x, y) -> (x, y), [1, 2, 3], [4, 5, 6]) AS res Note that the first argument (lambda function) can't be omitted in the `arrayMap` function. -### arrayFilter(func, arr1, ...) { #higher_order_functions-array-filter} +### arrayFilter(func, arr1, ...) {#higher_order_functions-array-filter} Returns an array containing only the elements in `arr1` for which `func` returns something other than 0. @@ -83,7 +83,7 @@ SELECT Note that the first argument (lambda function) can't be omitted in the `arrayFilter` function. -### arrayFill(func, arr1, ...) { #higher_order_functions-array-fill} +### arrayFill(func, arr1, ...) {#higher_order_functions-array-fill} Scan through `arr1` from the first element to the last element and replace `arr1[i]` by `arr1[i - 1]` if `func` returns 0. The first element of `arr1` will not be replaced. @@ -101,7 +101,7 @@ SELECT arrayFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, Note that the first argument (lambda function) can't be omitted in the `arrayFill` function. -### arrayReverseFill(func, arr1, ...) { #higher_order_functions-array-reverse-fill} +### arrayReverseFill(func, arr1, ...) {#higher_order_functions-array-reverse-fill} Scan through `arr1` from the last element to the first element and replace `arr1[i]` by `arr1[i + 1]` if `func` returns 0. The last element of `arr1` will not be replaced. @@ -119,7 +119,7 @@ SELECT arrayReverseFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, Note that the first argument (lambda function) can't be omitted in the `arrayReverseFill` function. -### arraySplit(func, arr1, ...) { #higher_order_functions-array-split} +### arraySplit(func, arr1, ...) {#higher_order_functions-array-split} Split `arr1` into multiple arrays. When `func` returns something other than 0, the array will be split on the left hand side of the element. The array will not be split before the first element. @@ -137,7 +137,7 @@ SELECT arraySplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res Note that the first argument (lambda function) can't be omitted in the `arraySplit` function. -### arrayReverseSplit(func, arr1, ...) { #higher_order_functions-array-reverse-split} +### arrayReverseSplit(func, arr1, ...) {#higher_order_functions-array-reverse-split} Split `arr1` into multiple arrays. When `func` returns something other than 0, the array will be split on the right hand side of the element. The array will not be split after the last element. @@ -155,7 +155,7 @@ SELECT arrayReverseSplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res Note that the first argument (lambda function) can't be omitted in the `arraySplit` function. -### arrayCount(\[func,\] arr1, ...) { #higher_order_functions-array-count} +### arrayCount(\[func,\] arr1, ...) {#higher_order_functions-array-count} Returns the number of elements in the arr array for which func returns something other than 0. If 'func' is not specified, it returns the number of non-zero elements in the array. @@ -167,17 +167,17 @@ Returns 1 if there is at least one element in 'arr' for which 'func' returns som Returns 1 if 'func' returns something other than 0 for all the elements in 'arr'. Otherwise, it returns 0. -### arraySum(\[func,\] arr1, ...) { #higher_order_functions-array-sum} +### arraySum(\[func,\] arr1, ...) {#higher_order_functions-array-sum} Returns the sum of the 'func' values. If the function is omitted, it just returns the sum of the array elements. -### arrayFirst(func, arr1, ...) { #higher_order_functions-array-first} +### arrayFirst(func, arr1, ...) {#higher_order_functions-array-first} Returns the first element in the 'arr1' array for which 'func' returns something other than 0. Note that the first argument (lambda function) can't be omitted in the `arrayFirst` function. -### arrayFirstIndex(func, arr1, ...) { #higher_order_functions-array-first-index} +### arrayFirstIndex(func, arr1, ...) {#higher_order_functions-array-first-index} Returns the index of the first element in the 'arr1' array for which 'func' returns something other than 0. diff --git a/docs/en/query_language/functions/in_functions.md b/docs/en/query_language/functions/in_functions.md index c9473820ae8..287d401a0a5 100644 --- a/docs/en/query_language/functions/in_functions.md +++ b/docs/en/query_language/functions/in_functions.md @@ -1,6 +1,6 @@ # Functions for implementing the IN operator -## in, notIn, globalIn, globalNotIn { #in-functions} +## in, notIn, globalIn, globalNotIn {#in-functions} See the section [IN operators](../select.md#select-in-operators). diff --git a/docs/en/query_language/functions/introspection.md b/docs/en/query_language/functions/introspection.md index 76f6fade169..520c89feaeb 100644 --- a/docs/en/query_language/functions/introspection.md +++ b/docs/en/query_language/functions/introspection.md @@ -14,7 +14,7 @@ For proper operation of introspection functions: ClickHouse saves profiler reports to the [trace_log](../../operations/system_tables.md#system_tables-trace_log) system table. Make sure the table and profiler are configured properly. -## addressToLine { #addresstoline} +## addressToLine {#addresstoline} Converts virtual memory address inside ClickHouse server process to the filename and the line number in ClickHouse source code. @@ -104,7 +104,7 @@ trace_source_code_lines: /lib/x86_64-linux-gnu/libpthread-2.27.so /build/glibc-OTsEL5/glibc-2.27/misc/../sysdeps/unix/sysv/linux/x86_64/clone.S:97 ``` -## addressToSymbol { #addresstosymbol} +## addressToSymbol {#addresstosymbol} Converts virtual memory address inside ClickHouse server process to the symbol from ClickHouse object files. @@ -201,7 +201,7 @@ start_thread clone ``` -## demangle { #demangle} +## demangle {#demangle} Converts a symbol that you can get using the [addressToSymbol](#addresstosymbol) function to the C++ function name. diff --git a/docs/en/query_language/functions/machine_learning_functions.md b/docs/en/query_language/functions/machine_learning_functions.md index 498f6ff9994..5d9983f015f 100644 --- a/docs/en/query_language/functions/machine_learning_functions.md +++ b/docs/en/query_language/functions/machine_learning_functions.md @@ -1,6 +1,6 @@ # Machine learning functions -## evalMLMethod (prediction) { #machine_learning_methods-evalmlmethod} +## evalMLMethod (prediction) {#machine_learning_methods-evalmlmethod} Prediction using fitted regression models uses `evalMLMethod` function. See link in `linearRegression`. diff --git a/docs/en/query_language/functions/other_functions.md b/docs/en/query_language/functions/other_functions.md index ad0d4ff33fd..24b6906b57e 100644 --- a/docs/en/query_language/functions/other_functions.md +++ b/docs/en/query_language/functions/other_functions.md @@ -4,7 +4,7 @@ Returns a string with the name of the host that this function was performed on. For distributed processing, this is the name of the remote server host, if the function is performed on a remote server. -## FQDN { #fqdn} +## FQDN {#fqdn} Returns the fully qualified domain name. @@ -109,7 +109,7 @@ Returns a string containing the type name of the passed argument. If `NULL` is passed to the function as input, then it returns the `Nullable(Nothing)` type, which corresponds to an internal `NULL` representation in ClickHouse. -## blockSize() { #function-blocksize} +## blockSize() {#function-blocksize} Gets the size of the block. In ClickHouse, queries are always run on blocks (sets of column parts). This function allows getting the size of the block that you called it for. @@ -137,7 +137,7 @@ Sleeps 'seconds' seconds on each row. You can specify an integer or a floating-p Returns the name of the current database. You can use this function in table engine parameters in a CREATE TABLE query where you need to specify the database. -## currentUser() { #other_function-currentuser} +## currentUser() {#other_function-currentuser} Returns the login of current user. Login of user, that initiated query, will be returned in case distibuted query. @@ -178,7 +178,7 @@ Accepts Float32 and Float64 and returns UInt8 equal to 1 if the argument is not Accepts Float32 and Float64 and returns UInt8 equal to 1 if the argument is infinite, otherwise 0. Note that 0 is returned for a NaN. -## ifNotFinite { #ifnotfinite} +## ifNotFinite {#ifnotfinite} Checks whether floating point value is finite. @@ -225,7 +225,7 @@ Accepts constant strings: database name, table name, and column name. Returns a The function throws an exception if the table does not exist. For elements in a nested data structure, the function checks for the existence of a column. For the nested data structure itself, the function returns 0. -## bar { #function-bar} +## bar {#function-bar} Allows building a unicode-art diagram. @@ -408,7 +408,7 @@ Returns the timezone of the server. Returns the sequence number of the data block where the row is located. -## rowNumberInBlock { #function-rownumberinblock} +## rowNumberInBlock {#function-rownumberinblock} Returns the ordinal number of the row in the data block. Different data blocks are always recalculated. @@ -416,7 +416,7 @@ Returns the ordinal number of the row in the data block. Different data blocks a Returns the ordinal number of the row in the data block. This function only considers the affected data blocks. -## neighbor { #neighbor} +## neighbor {#neighbor} The window function that provides access to a row at a specified offset which comes before or after the current row of a given column. @@ -527,7 +527,7 @@ Result: └────────────┴───────┴───────────┴────────────────┘ ``` -## runningDifference(x) { #other_functions-runningdifference} +## runningDifference(x) {#other_functions-runningdifference} Calculates the difference between successive row values ​​in the data block. Returns 0 for the first row and the difference from the previous row for each subsequent row. @@ -772,7 +772,7 @@ SELECT defaultValueOfArgumentType( CAST(1 AS Nullable(Int8) ) ) ``` -## replicate { #other_functions-replicate} +## replicate {#other_functions-replicate} Creates an array with a single value. @@ -809,7 +809,7 @@ Result: └───────────────────────────────┘ ``` -## filesystemAvailable { #filesystemavailable} +## filesystemAvailable {#filesystemavailable} Returns amount of remaining space on the filesystem where the files of the databases located. It is always smaller than total free space ([filesystemFree](#filesystemfree)) because some space is reserved for OS. @@ -841,7 +841,7 @@ Result: └─────────────────┴────────┘ ``` -## filesystemFree { #filesystemfree} +## filesystemFree {#filesystemfree} Returns total amount of the free space on the filesystem where the files of the databases located. See also `filesystemAvailable` @@ -873,7 +873,7 @@ Result: └────────────┴────────┘ ``` -## filesystemCapacity { #filesystemcapacity} +## filesystemCapacity {#filesystemcapacity} Returns the capacity of the filesystem in bytes. For evaluation, the [path](../../operations/server_settings/settings.md#server_settings-path) to the data directory must be configured. @@ -905,17 +905,17 @@ Result: └───────────┴────────┘ ``` -## finalizeAggregation { #function-finalizeaggregation} +## finalizeAggregation {#function-finalizeaggregation} Takes state of aggregate function. Returns result of aggregation (finalized state). -## runningAccumulate { #function-runningaccumulate} +## runningAccumulate {#function-runningaccumulate} Takes the states of the aggregate function and returns a column with values, are the result of the accumulation of these states for a set of block lines, from the first to the current line. For example, takes state of aggregate function (example runningAccumulate(uniqState(UserID))), and for each row of block, return result of aggregate function on merge of states of all previous rows and current row. So, result of function depends on partition of data to blocks and on order of data in block. -## joinGet { #joinget} +## joinGet {#joinget} The function lets you extract data from the table the same way as from a [dictionary](../../query_language/dicts/index.md). @@ -978,7 +978,7 @@ Result: └──────────────────────────────────────────────────┘ ``` -## modelEvaluate(model_name, ...) { #function-modelevaluate} +## modelEvaluate(model_name, ...) {#function-modelevaluate} Evaluate external model. Accepts a model name and model arguments. Returns Float64. @@ -995,7 +995,7 @@ SELECT throwIf(number = 3, 'Too many') FROM numbers(10); Code: 395. DB::Exception: Received from localhost:9000. DB::Exception: Too many. ``` -## identity { #identity} +## identity {#identity} Returns the same value that was used as its argument. Used for debugging and testing, allows to cancel using index, and get the query performance of a full scan. When query is analyzed for possible use of index, the analyzer doesn't look inside `identity` functions. @@ -1021,7 +1021,7 @@ Result: └──────────────┘ ``` -## randomPrintableASCII { #randomascii} +## randomPrintableASCII {#randomascii} Generates a string with a random set of [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters) printable characters. diff --git a/docs/en/query_language/functions/rounding_functions.md b/docs/en/query_language/functions/rounding_functions.md index 6e8bb1635d5..740ae3112fb 100644 --- a/docs/en/query_language/functions/rounding_functions.md +++ b/docs/en/query_language/functions/rounding_functions.md @@ -20,7 +20,7 @@ Returns the smallest round number that is greater than or equal to `x`. In every Returns the round number with largest absolute value that has an absolute value less than or equal to `x`'s. In every other way, it is the same as the 'floor' function (see above). -## round(x\[, N\]) { #rounding_functions-round} +## round(x\[, N\]) {#rounding_functions-round} Rounds a value to a specified number of decimal places. @@ -82,7 +82,7 @@ round(3.65, 1) = 3.6 - [roundBankers](#roundbankers) -## roundBankers { #roundbankers} +## roundBankers {#roundbankers} Rounds a number to a specified decimal position. diff --git a/docs/en/query_language/functions/string_functions.md b/docs/en/query_language/functions/string_functions.md index 0fc305363ba..3d8beae6800 100644 --- a/docs/en/query_language/functions/string_functions.md +++ b/docs/en/query_language/functions/string_functions.md @@ -1,6 +1,6 @@ # Functions for working with strings -## empty { #string_functions-empty} +## empty {#string_functions-empty} Returns 1 for an empty string or 0 for a non-empty string. The result type is UInt8. @@ -85,7 +85,7 @@ SELECT toValidUTF8('\x61\xF0\x80\x80\x80b') └───────────────────────┘ ``` -## repeat { #repeat} +## repeat {#repeat} Repeats a string as many times as specified and concatenates the replicated values as a single string. @@ -151,7 +151,7 @@ SELECT format('{} {}', 'Hello', 'World') └───────────────────────────────────┘ ``` -## concat { #concat} +## concat {#concat} Concatenates the strings listed in the arguments, without a separator. @@ -187,7 +187,7 @@ Result: └─────────────────────────────┘ ``` -## concatAssumeInjective { #concatassumeinjective} +## concatAssumeInjective {#concatassumeinjective} Same as [concat](#concat), the difference is that you need to ensure that `concat(s1, s2, ...) → sn` is injective, it will be used for optimization of GROUP BY. @@ -253,11 +253,11 @@ Decode base64-encoded string 's' into original string. In case of failure raises ## tryBase64Decode(s) Similar to base64Decode, but in case of error an empty string would be returned. -## endsWith(s, suffix) { #function-endswith} +## endsWith(s, suffix) {#function-endswith} Returns whether to end with the specified suffix. Returns 1 if the string ends with the specified suffix, otherwise it returns 0. -## startsWith(str, prefix) { #function-startswith} +## startsWith(str, prefix) {#function-startswith} Returns 1 whether string starts with the specified prefix, otherwise it returns 0. @@ -286,7 +286,7 @@ Result: └───────────────────────────────────┘ ``` -## trim { #trim} +## trim {#trim} Removes all specified characters from the start or end of a string. By default removes all consecutive occurrences of common whitespace (ASCII character 32) from both ends of a string. @@ -324,7 +324,7 @@ Result: └───────────────────────────────────────────────┘ ``` -## trimLeft { #trimleft} +## trimLeft {#trimleft} Removes all consecutive occurrences of common whitespace (ASCII character 32) from the beginning of a string. It doesn't remove other kinds of whitespace characters (tab, no-break space, etc.). @@ -362,7 +362,7 @@ Result: └─────────────────────────────────────┘ ``` -## trimRight { #trimright} +## trimRight {#trimright} Removes all consecutive occurrences of common whitespace (ASCII character 32) from the end of a string. It doesn't remove other kinds of whitespace characters (tab, no-break space, etc.). @@ -400,7 +400,7 @@ Result: └──────────────────────────────────────┘ ``` -## trimBoth { #trimboth} +## trimBoth {#trimboth} Removes all consecutive occurrences of common whitespace (ASCII character 32) from both ends of a string. It doesn't remove other kinds of whitespace characters (tab, no-break space, etc.). diff --git a/docs/en/query_language/functions/string_search_functions.md b/docs/en/query_language/functions/string_search_functions.md index 724d58ddf86..b3db647a1b5 100644 --- a/docs/en/query_language/functions/string_search_functions.md +++ b/docs/en/query_language/functions/string_search_functions.md @@ -2,7 +2,7 @@ The search is case-sensitive by default in all these functions. There are separate variants for case insensitive search. -## position(haystack, needle), locate(haystack, needle) { #position} +## position(haystack, needle), locate(haystack, needle) {#position} Returns the position (in bytes) of the found substring in the string, starting from 1. @@ -64,7 +64,7 @@ Result: └───────────────────────────────┘ ``` -## positionCaseInsensitive { #positioncaseinsensitive} +## positionCaseInsensitive {#positioncaseinsensitive} The same as [position](#position) returns the position (in bytes) of the found substring in the string, starting from 1. Use the function for a case-insensitive search. @@ -104,7 +104,7 @@ Result: └───────────────────────────────────────────────────┘ ``` -## positionUTF8 { #positionutf8} +## positionUTF8 {#positionutf8} Returns the position (in Unicode points) of the found substring in the string, starting from 1. @@ -178,7 +178,7 @@ Result: └────────────────────────────────────────┘ ``` -## positionCaseInsensitiveUTF8 { #positioncaseinsensitiveutf8} +## positionCaseInsensitiveUTF8 {#positioncaseinsensitiveutf8} The same as [positionUTF8](#positionutf8), but is case-insensitive. Returns the position (in Unicode points) of the found substring in the string, starting from 1. @@ -218,7 +218,7 @@ Result: └────────────────────────────────────────────────────┘ ``` -## multiSearchAllPositions { #multiSearchAllPositions} +## multiSearchAllPositions {#multiSearchAllPositions} The same as [position](string_search_functions.md#position) but returns `Array` of positions (in bytes) of the found corresponding substrings in the string. Positions are indexed starting from 1. @@ -259,11 +259,11 @@ Result: └───────────────────────────────────────────────────────────────────┘ ``` -## multiSearchAllPositionsUTF8 { #multiSearchAllPositionsUTF8} +## multiSearchAllPositionsUTF8 {#multiSearchAllPositionsUTF8} See `multiSearchAllPositions`. -## multiSearchFirstPosition(haystack, [needle1, needle2, ..., needlen]) { #multiSearchFirstPosition} +## multiSearchFirstPosition(haystack, [needle1, needle2, ..., needlen]) {#multiSearchFirstPosition} The same as `position` but returns the leftmost offset of the string `haystack` that is matched to some of the needles. @@ -275,7 +275,7 @@ Returns the index `i` (starting from 1) of the leftmost found needlei For a case-insensitive search or/and in UTF-8 format use functions `multiSearchFirstIndexCaseInsensitive, multiSearchFirstIndexUTF8, multiSearchFirstIndexCaseInsensitiveUTF8`. -## multiSearchAny(haystack, [needle1, needle2, ..., needlen]) { #function-multisearchany} +## multiSearchAny(haystack, [needle1, needle2, ..., needlen]) {#function-multisearchany} Returns 1, if at least one string needlei matches the string `haystack` and 0 otherwise. @@ -336,7 +336,7 @@ Extracts a fragment of a string using a regular expression. If 'haystack' doesn' Extracts all the fragments of a string using a regular expression. If 'haystack' doesn't match the 'pattern' regex, an empty string is returned. Returns an array of strings consisting of all matches to the regex. In general, the behavior is the same as the 'extract' function (it takes the first subpattern, or the entire expression if there isn't a subpattern). -## like(haystack, pattern), haystack LIKE pattern operator { #function-like} +## like(haystack, pattern), haystack LIKE pattern operator {#function-like} Checks whether a string matches a simple regular expression. The regular expression can contain the metasymbols `%` and `_`. @@ -350,7 +350,7 @@ Use the backslash (`\`) for escaping metasymbols. See the note on escaping in th For regular expressions like `%needle%`, the code is more optimal and works as fast as the `position` function. For other regular expressions, the code is the same as for the 'match' function. -## notLike(haystack, pattern), haystack NOT LIKE pattern operator { #function-notlike} +## notLike(haystack, pattern), haystack NOT LIKE pattern operator {#function-notlike} The same thing as 'like', but negative. diff --git a/docs/en/query_language/functions/type_conversion_functions.md b/docs/en/query_language/functions/type_conversion_functions.md index a4ce9467cec..ffb757c17a2 100644 --- a/docs/en/query_language/functions/type_conversion_functions.md +++ b/docs/en/query_language/functions/type_conversion_functions.md @@ -1,6 +1,6 @@ # Type Conversion Functions -## Common Issues of Numeric Conversions { #numeric-conversion-issues} +## Common Issues of Numeric Conversions {#numeric-conversion-issues} When you convert a value from one to another data type, you should remember that in common case, it is an unsafe operation that can lead to a data loss. A data loss can occur if you try to fit value from a larger data type to a smaller data type, or if you convert values between different data types. @@ -297,7 +297,7 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut These functions accept a string and interpret the bytes placed at the beginning of the string as a number in host order (little endian). If the string isn't long enough, the functions work as if the string is padded with the necessary number of null bytes. If the string is longer than needed, the extra bytes are ignored. A date is interpreted as the number of days since the beginning of the Unix Epoch, and a date with time is interpreted as the number of seconds since the beginning of the Unix Epoch. -## reinterpretAsString { #type_conversion_functions-reinterpretAsString} +## reinterpretAsString {#type_conversion_functions-reinterpretAsString} This function accepts a number or date or date with time, and returns a string containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long. @@ -305,7 +305,7 @@ This function accepts a number or date or date with time, and returns a string c This function accepts a number or date or date with time, and returns a FixedString containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a FixedString that is one byte long. -## CAST(x, t) { #type_conversion_function-cast} +## CAST(x, t) {#type_conversion_function-cast} Converts 'x' to the 't' data type. The syntax CAST(x AS t) is also supported. @@ -349,7 +349,7 @@ SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null └─────────────────────────────────────────┘ ``` -## toInterval(Year|Quarter|Month|Week|Day|Hour|Minute|Second) { #function-tointerval} +## toInterval(Year|Quarter|Month|Week|Day|Hour|Minute|Second) {#function-tointerval} Converts a Number type argument to an [Interval](../../data_types/special_data_types/interval.md) data type. @@ -392,7 +392,7 @@ SELECT └───────────────────────────┴──────────────────────────────┘ ``` -## parseDateTimeBestEffort { #type_conversion_functions-parsedatetimebesteffort} +## parseDateTimeBestEffort {#type_conversion_functions-parsedatetimebesteffort} Parse a number type argument to a Date or DateTime type. different from toDate and toDateTime, parseDateTimeBestEffort can progress more complex date format. diff --git a/docs/en/query_language/functions/uuid_functions.md b/docs/en/query_language/functions/uuid_functions.md index 4e3752f8cc6..af8ba7f84f2 100644 --- a/docs/en/query_language/functions/uuid_functions.md +++ b/docs/en/query_language/functions/uuid_functions.md @@ -2,7 +2,7 @@ The functions for working with UUID are listed below. -## generateUUIDv4 { #uuid_function-generate} +## generateUUIDv4 {#uuid_function-generate} Generates the [UUID](../../data_types/uuid.md) of [version 4](https://tools.ietf.org/html/rfc4122#section-4.4). diff --git a/docs/en/query_language/insert_into.md b/docs/en/query_language/insert_into.md index e2a6ff3f51b..e2bf226c298 100644 --- a/docs/en/query_language/insert_into.md +++ b/docs/en/query_language/insert_into.md @@ -44,7 +44,7 @@ You can insert data separately from the query by using the command-line client o If table has [constraints](create.md#constraints), their expressions will be checked for each row of inserted data. If any of those constraints is not satisfied — server will raise an exception containing constraint name and expression, the query will be stopped. -### Inserting The Results of `SELECT` { #insert_query_insert-select} +### Inserting The Results of `SELECT` {#insert_query_insert-select} ```sql INSERT INTO [db.]table [(c1, c2, c3)] SELECT ... diff --git a/docs/en/query_language/misc.md b/docs/en/query_language/misc.md index 74514c14f12..cbdbf0318f8 100644 --- a/docs/en/query_language/misc.md +++ b/docs/en/query_language/misc.md @@ -54,7 +54,7 @@ If the table is corrupted, you can copy the non-corrupted data to another table. 3. Execute the query `INSERT INTO SELECT * FROM `. This request copies the non-corrupted data from the damaged table to another table. Only the data before the corrupted part will be copied. 4. Restart the `clickhouse-client` to reset the `max_threads` value. -## DESCRIBE TABLE { #misc-describe-table} +## DESCRIBE TABLE {#misc-describe-table} ```sql DESC|DESCRIBE TABLE [db.]table [INTO OUTFILE filename] [FORMAT format] @@ -151,7 +151,7 @@ The response contains the `kill_status` column, which can take the following val A test query (`TEST`) only checks the user's rights and displays a list of queries to stop. -## KILL MUTATION { #kill-mutation} +## KILL MUTATION {#kill-mutation} ```sql KILL MUTATION [ON CLUSTER cluster] @@ -178,7 +178,7 @@ The query is useful when a mutation is stuck and cannot finish (e.g. if some fun Changes already made by the mutation are not rolled back. -## OPTIMIZE { #misc_operations-optimize} +## OPTIMIZE {#misc_operations-optimize} ```sql OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE] @@ -198,7 +198,7 @@ When `OPTIMIZE` is used with the [ReplicatedMergeTree](../operations/table_engin !!! warning "Warning" `OPTIMIZE` can't fix the "Too many parts" error. -## RENAME { #misc_operations-rename} +## RENAME {#misc_operations-rename} Renames one or more tables. @@ -208,7 +208,7 @@ RENAME TABLE [db11.]name11 TO [db12.]name12, [db21.]name21 TO [db22.]name22, ... All tables are renamed under global locking. Renaming tables is a light operation. If you indicated another database after TO, the table will be moved to this database. However, the directories with databases must reside in the same file system (otherwise, an error is returned). -## SET { #query-set} +## SET {#query-set} ```sql SET param = value diff --git a/docs/en/query_language/operators.md b/docs/en/query_language/operators.md index a7c4b40c6d9..3c280a3a399 100644 --- a/docs/en/query_language/operators.md +++ b/docs/en/query_language/operators.md @@ -65,9 +65,9 @@ Groups of operators are listed in order of priority (the higher it is in the lis `a GLOBAL NOT IN ...` – The `globalNotIn(a, b)` function. -## Operators for Working with Dates and Times { #operators-datetime} +## Operators for Working with Dates and Times {#operators-datetime} -### EXTRACT { #operator-extract} +### EXTRACT {#operator-extract} ```sql EXTRACT(part FROM date); @@ -129,7 +129,7 @@ FROM test.Orders; You can see more examples in [tests](https://github.com/ClickHouse/ClickHouse/blob/master/dbms/tests/queries/0_stateless/00619_extract.sql). -### INTERVAL { #operator-interval} +### INTERVAL {#operator-interval} Creates an [Interval](../data_types/special_data_types/interval.md)-type value that should be used in arithmetical operations with [Date](../data_types/date.md) and [DateTime](../data_types/datetime.md)-type values. @@ -182,7 +182,7 @@ Note: The conditional operator calculates the values of b and c, then checks whether condition a is met, and then returns the corresponding value. If `b` or `C` is an [arrayJoin()](functions/array_join.md#functions_arrayjoin) function, each row will be replicated regardless of the "a" condition. -## Conditional Expression { #operator_case} +## Conditional Expression {#operator_case} ```sql CASE [x] @@ -227,7 +227,7 @@ For efficiency, the `and` and `or` functions accept any number of arguments. The ClickHouse supports the `IS NULL` and `IS NOT NULL` operators. -### IS NULL { #operator-is-null} +### IS NULL {#operator-is-null} - For [Nullable](../data_types/nullable.md) type values, the `IS NULL` operator returns: - `1`, if the value is `NULL`. diff --git a/docs/en/query_language/select.md b/docs/en/query_language/select.md index 8a6f9e4a4e1..6a19444f44e 100644 --- a/docs/en/query_language/select.md +++ b/docs/en/query_language/select.md @@ -93,7 +93,7 @@ FROM ``` -### FROM Clause { #select-from} +### FROM Clause {#select-from} If the FROM clause is omitted, data will be read from the `system.one` table. The `system.one` table contains exactly one row (this table fulfills the same purpose as the DUAL table found in other DBMSs). @@ -112,7 +112,7 @@ In contrast to standard SQL, a synonym does not need to be specified after a sub To execute a query, all the columns listed in the query are extracted from the appropriate table. Any columns not needed for the external query are thrown out of the subqueries. If a query does not list any columns (for example, `SELECT count() FROM t`), some column is extracted from the table anyway (the smallest one is preferred), in order to calculate the number of rows. -#### FINAL Modifier { #select-from-final} +#### FINAL Modifier {#select-from-final} Applicable when selecting data from tables from the [MergeTree](../operations/table_engines/mergetree.md)-engine family other than `GraphiteMergeTree`. When `FINAL` is specified, ClickHouse fully merges the data before returning the result and thus performs all data transformations that happen during merges for the given table engine. @@ -127,7 +127,7 @@ Queries that use `FINAL` are executed not as fast as similar queries that don't, In most cases, avoid using `FINAL`. -### SAMPLE Clause { #select-sample-clause} +### SAMPLE Clause {#select-sample-clause} The `SAMPLE` clause allows for approximated query processing. @@ -157,7 +157,7 @@ For the `SAMPLE` clause the following syntax is supported: | `SAMPLE k OFFSET m` | Here `k` and `m` are the numbers from 0 to 1.
    The query is executed on a sample of `k` fraction of the data. The data used for the sample is offset by `m` fraction. [Read more](#select-sample-offset) | -#### SAMPLE k { #select-sample-k} +#### SAMPLE k {#select-sample-k} Here `k` is the number from 0 to 1 (both fractional and decimal notations are supported). For example, `SAMPLE 1/2` or `SAMPLE 0.5`. @@ -177,7 +177,7 @@ ORDER BY PageViews DESC LIMIT 1000 In this example, the query is executed on a sample from 0.1 (10%) of data. Values of aggregate functions are not corrected automatically, so to get an approximate result, the value `count()` is manually multiplied by 10. -#### SAMPLE n { #select-sample-n} +#### SAMPLE n {#select-sample-n} Here `n` is a sufficiently large integer. For example, `SAMPLE 10000000`. @@ -213,7 +213,7 @@ FROM visits SAMPLE 10000000 ``` -#### SAMPLE k OFFSET m { #select-sample-offset} +#### SAMPLE k OFFSET m {#select-sample-offset} Here `k` and `m` are numbers from 0 to 1. Examples are shown below. @@ -237,7 +237,7 @@ Here, a sample of 10% is taken from the second half of the data. `[----------++--------]` -### ARRAY JOIN Clause { #select-array-join-clause} +### ARRAY JOIN Clause {#select-array-join-clause} Allows executing `JOIN` with an array or nested data structure. The intent is similar to the [arrayJoin](functions/array_join.md#functions_arrayjoin) function, but its functionality is broader. @@ -504,7 +504,7 @@ ARRAY JOIN nest AS n, arrayEnumerate(`nest.x`) AS num; └───────┴─────┴─────┴─────────┴────────────┴─────┘ ``` -### JOIN Clause { #select-join} +### JOIN Clause {#select-join} Joins the data in the normal [SQL JOIN](https://en.wikipedia.org/wiki/Join_(SQL)) sense. @@ -520,7 +520,7 @@ FROM The table names can be specified instead of `` and ``. This is equivalent to the `SELECT * FROM table` subquery, except in a special case when the table has the [Join](../operations/table_engines/join.md) engine – an array prepared for joining. -#### Supported Types of `JOIN` { #select-join-types} +#### Supported Types of `JOIN` {#select-join-types} - `INNER JOIN` (or `JOIN`) - `LEFT JOIN` (or `LEFT OUTER JOIN`) @@ -552,7 +552,7 @@ Don't mix these syntaxes. ClickHouse doesn't directly support syntax with commas, so we don't recommend using them. The algorithm tries to rewrite the query in terms of `CROSS JOIN` and `INNER JOIN` clauses and then proceeds to query processing. When rewriting the query, ClickHouse tries to optimize performance and memory consumption. By default, ClickHouse treats commas as an `INNER JOIN` clause and converts `INNER JOIN` to `CROSS JOIN` when the algorithm cannot guarantee that `INNER JOIN` returns the required data. -#### Strictness { #select-join-strictness} +#### Strictness {#select-join-strictness} - `ALL` — If the right table has several matching rows, ClickHouse creates a [Cartesian product](https://en.wikipedia.org/wiki/Cartesian_product) from matching rows. This is the standard `JOIN` behavior in SQL. - `ANY` — If the right table has several matching rows, only the first one found is joined. If the right table has only one matching row, the results of queries with `ANY` and `ALL` keywords are the same. @@ -704,7 +704,7 @@ For `ON`, `WHERE`, and `GROUP BY` clauses: - Arbitrary expressions cannot be used in `ON`, `WHERE`, and `GROUP BY` clauses, but you can define an expression in a `SELECT` clause and then use it in these clauses via an alias. -### WHERE Clause { #select-where} +### WHERE Clause {#select-where} If there is a WHERE clause, it must contain an expression with the UInt8 type. This is usually an expression with comparison and logical operators. This expression will be used for filtering data before all other transformations. @@ -727,7 +727,7 @@ A query may simultaneously specify PREWHERE and WHERE. In this case, PREWHERE pr If the 'optimize_move_to_prewhere' setting is set to 1 and PREWHERE is omitted, the system uses heuristics to automatically move parts of expressions from WHERE to PREWHERE. -### GROUP BY Clause { #select-group-by-clause} +### GROUP BY Clause {#select-group-by-clause} This is one of the most important parts of a column-oriented DBMS. @@ -824,7 +824,7 @@ If `max_rows_to_group_by` and `group_by_overflow_mode = 'any'` are not used, all You can use WITH TOTALS in subqueries, including subqueries in the JOIN clause (in this case, the respective total values are combined). -#### GROUP BY in External Memory { #select-group-by-in-external-memory} +#### GROUP BY in External Memory {#select-group-by-in-external-memory} You can enable dumping temporary data to the disk to restrict memory usage during `GROUP BY`. The [max_bytes_before_external_group_by](../operations/settings/settings.md#settings-max_bytes_before_external_group_by) setting determines the threshold RAM consumption for dumping `GROUP BY` temporary data to the file system. If set to 0 (the default), it is disabled. @@ -911,7 +911,7 @@ WHERE and HAVING differ in that WHERE is performed before aggregation (GROUP BY) If aggregation is not performed, HAVING can't be used. -### ORDER BY Clause { #select-order-by} +### ORDER BY Clause {#select-order-by} The ORDER BY clause contains a list of expressions, which can each be assigned DESC or ASC (the sorting direction). If the direction is not specified, ASC is assumed. ASC is sorted in ascending order, and DESC in descending order. The sorting direction applies to a single expression, not to the entire list. Example: `ORDER BY Visits DESC, SearchPhrase` @@ -974,7 +974,7 @@ Running a query may use more memory than 'max_bytes_before_external_sort'. For t External sorting works much less effectively than sorting in RAM. -### SELECT Clause { #select-select} +### SELECT Clause {#select-select} [Expressions](syntax.md#syntax-expressions) specified in the `SELECT` clause are calculated after all the operations in the clauses described above are finished. These expressions work as if they apply to separate rows in the result. If expressions in the `SELECT` clause contain aggregate functions, then ClickHouse processes aggregate functions and expressions used as their arguments during the [GROUP BY](#select-group-by-clause) aggregation. @@ -1035,7 +1035,7 @@ In this example, `COLUMNS('a')` returns two columns: `aa` and `ab`. `COLUMNS('c' Columns that matched the `COLUMNS` expression can have different data types. If `COLUMNS` doesn't match any columns and is the only expression in `SELECT`, ClickHouse throws an exception. -### DISTINCT Clause { #select-distinct} +### DISTINCT Clause {#select-distinct} If DISTINCT is specified, only a single row will remain out of all the sets of fully matching rows in the result. The result will be the same as if GROUP BY were specified across all the fields specified in SELECT without aggregate functions. But there are several differences from GROUP BY: @@ -1120,7 +1120,7 @@ The structure of results (the number and type of columns) must match for the que Queries that are parts of UNION ALL can't be enclosed in brackets. ORDER BY and LIMIT are applied to separate queries, not to the final result. If you need to apply a conversion to the final result, you can put all the queries with UNION ALL in a subquery in the FROM clause. -### INTO OUTFILE Clause { #into-outfile-clause} +### INTO OUTFILE Clause {#into-outfile-clause} Add the `INTO OUTFILE filename` clause (where filename is a string literal) to redirect query output to the specified file. In contrast to MySQL, the file is created on the client side. The query will fail if a file with the same filename already exists. @@ -1128,7 +1128,7 @@ This functionality is available in the command-line client and clickhouse-local The default output format is TabSeparated (the same as in the command-line client batch mode). -### FORMAT Clause { #format-clause} +### FORMAT Clause {#format-clause} Specify 'FORMAT format' to get data in any specified format. You can use this for convenience, or for creating dumps. @@ -1138,7 +1138,7 @@ If the FORMAT clause is omitted, the default format is used, which depends on bo When using the command-line client, data is passed to the client in an internal efficient format. The client independently interprets the FORMAT clause of the query and formats the data itself (thus relieving the network and the server from the load). -### IN Operators { #select-in-operators} +### IN Operators {#select-in-operators} The `IN`, `NOT IN`, `GLOBAL IN`, and `GLOBAL NOT IN` operators are covered separately, since their functionality is quite rich. @@ -1237,7 +1237,7 @@ FROM t_null ``` -#### Distributed Subqueries { #select-distributed-subqueries} +#### Distributed Subqueries {#select-distributed-subqueries} There are two options for IN-s with subqueries (similar to JOINs): normal `IN` / `JOIN` and `GLOBAL IN` / `GLOBAL JOIN`. They differ in how they are run for distributed query processing. diff --git a/docs/en/query_language/show.md b/docs/en/query_language/show.md index e51a1ef4d18..f6a9cc6865b 100644 --- a/docs/en/query_language/show.md +++ b/docs/en/query_language/show.md @@ -8,7 +8,7 @@ SHOW CREATE [TEMPORARY] [TABLE|DICTIONARY] [db.]table [INTO OUTFILE filename] [F Returns a single `String`-type 'statement' column, which contains a single value – the `CREATE` query used for creating the specified object. -## SHOW DATABASES { #show-databases} +## SHOW DATABASES {#show-databases} ```sql SHOW DATABASES [INTO OUTFILE filename] [FORMAT format] diff --git a/docs/en/query_language/syntax.md b/docs/en/query_language/syntax.md index ae3ebc54c9d..f73d319e7b8 100644 --- a/docs/en/query_language/syntax.md +++ b/docs/en/query_language/syntax.md @@ -27,7 +27,7 @@ SQL-style and C-style comments are supported. SQL-style comments: from `--` to the end of the line. The space after `--` can be omitted. Comments in C-style: from `/*` to `*/`. These comments can be multiline. Spaces are not required here, either. -## Keywords { #syntax-keywords} +## Keywords {#syntax-keywords} Keywords are case-insensitive when they correspond to: @@ -40,7 +40,7 @@ In contrast to standard SQL all other keywords (including functions names) are * Keywords are not reserved (they are just parsed as keywords in the corresponding context). If you use [identifiers](#syntax-identifiers) the same as the keywords, enclose them into quotes. For example, the query `SELECT "FROM" FROM table_name` is valid if the table `table_name` has column with the name `"FROM"`. -## Identifiers { #syntax-identifiers} +## Identifiers {#syntax-identifiers} Identifiers are: @@ -75,7 +75,7 @@ For example, 1 is parsed as `UInt8`, but 256 is parsed as `UInt16`. For more inf Examples: `1`, `18446744073709551615`, `0xDEADBEEF`, `01`, `0.1`, `1e100`, `-1e-100`, `inf`, `nan`. -### String { #syntax-string-literal} +### String {#syntax-string-literal} Only string literals in single quotes are supported. The enclosed characters can be backslash-escaped. The following escape sequences have a corresponding special value: `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\a`, `\v`, `\xHH`. In all other cases, escape sequences in the format `\c`, where `c` is any character, are converted to `c`. This means that you can use the sequences `\'`and`\\`. The value will have the [String](../data_types/string.md) type. @@ -88,7 +88,7 @@ Actually, these are not literals, but expressions with the array creation operat An array must consist of at least one item, and a tuple must have at least two items. Tuples have a special purpose for use in the `IN` clause of a `SELECT` query. Tuples can be obtained as the result of a query, but they can't be saved to a database (with the exception of [Memory](../operations/table_engines/memory.md) tables). -### NULL { #null-literal} +### NULL {#null-literal} Indicates that the value is missing. @@ -115,7 +115,7 @@ For example, the expression `1 + 2 * 3 + 4` is transformed to `plus(plus(1, mult Data types and table engines in the `CREATE` query are written the same way as identifiers or functions. In other words, they may or may not contain an arguments list in brackets. For more information, see the sections "Data types," "Table engines," and "CREATE". -## Expression Aliases { #syntax-expression_aliases} +## Expression Aliases {#syntax-expression_aliases} An alias is a user-defined name for an expression in a query. @@ -173,7 +173,7 @@ In this example, we declared table `t` with column `b`. Then, when selecting dat In a `SELECT` query, an asterisk can replace the expression. For more information, see the section "SELECT". -## Expressions { #syntax-expressions} +## Expressions {#syntax-expressions} An expression is a function, identifier, literal, application of an operator, expression in brackets, subquery, or asterisk. It can also contain an alias. A list of expressions is one or more expressions separated by commas. diff --git a/docs/en/query_language/system.md b/docs/en/query_language/system.md index 7408f4d883b..6bff8381f0e 100644 --- a/docs/en/query_language/system.md +++ b/docs/en/query_language/system.md @@ -1,4 +1,4 @@ -# SYSTEM Queries { #query_language-system} +# SYSTEM Queries {#query_language-system} - [RELOAD DICTIONARIES](#query_language-system-reload-dictionaries) - [RELOAD DICTIONARY](#query_language-system-reload-dictionary) @@ -14,13 +14,13 @@ - [STOP MERGES](#query_language-system-stop-merges) - [START MERGES](#query_language-system-start-merges) -## RELOAD DICTIONARIES { #query_language-system-reload-dictionaries} +## RELOAD DICTIONARIES {#query_language-system-reload-dictionaries} Reloads all dictionaries that have been successfully loaded before. By default, dictionaries are loaded lazily (see [dictionaries_lazy_load](../operations/server_settings/settings.md#server_settings-dictionaries_lazy_load)), so instead of being loaded automatically at startup, they are initialized on first access through dictGet function or SELECT from tables with ENGINE = Dictionary. The `SYSTEM RELOAD DICTIONARIES` query reloads such dictionaries (LOADED). Always returns `Ok.` regardless of the result of the dictionary update. -## RELOAD DICTIONARY dictionary_name { #query_language-system-reload-dictionary} +## RELOAD DICTIONARY dictionary_name {#query_language-system-reload-dictionary} Completely reloads a dictionary `dictionary_name`, regardless of the state of the dictionary (LOADED / NOT_LOADED / FAILED). Always returns `Ok.` regardless of the result of updating the dictionary. @@ -30,38 +30,38 @@ The status of the dictionary can be checked by querying the `system.dictionaries SELECT name, status FROM system.dictionaries; ``` -## DROP DNS CACHE { #query_language-system-drop-dns-cache} +## DROP DNS CACHE {#query_language-system-drop-dns-cache} Resets ClickHouse's internal DNS cache. Sometimes (for old ClickHouse versions) it is necessary to use this command when changing the infrastructure (changing the IP address of another ClickHouse server or the server used by dictionaries). For more convenient (automatic) cache management, see disable_internal_dns_cache, dns_cache_update_period parameters. -## DROP MARK CACHE { #query_language-system-drop-mark-cache} +## DROP MARK CACHE {#query_language-system-drop-mark-cache} Resets the mark cache. Used in development of ClickHouse and performance tests. -## FLUSH LOGS { #query_language-system-flush_logs} +## FLUSH LOGS {#query_language-system-flush_logs} Flushes buffers of log messages to system tables (e.g. system.query_log). Allows you to not wait 7.5 seconds when debugging. -## RELOAD CONFIG { #query_language-system-reload-config} +## RELOAD CONFIG {#query_language-system-reload-config} Reloads ClickHouse configuration. Used when configuration is stored in ZooKeeeper. -## SHUTDOWN { #query_language-system-shutdown} +## SHUTDOWN {#query_language-system-shutdown} Normally shuts down ClickHouse (like `service clickhouse-server stop` / `kill {$pid_clickhouse-server}`) -## KILL { #query_language-system-kill} +## KILL {#query_language-system-kill} Aborts ClickHouse process (like `kill -9 {$ pid_clickhouse-server}`) -## Managing Distributed Tables { #query_language-system-distributed} +## Managing Distributed Tables {#query_language-system-distributed} ClickHouse can manage [distributed](../operations/table_engines/distributed.md) tables. When a user inserts data into these tables, ClickHouse first creates a queue of the data that should be sent to cluster nodes, then asynchronously sends it. You can manage queue processing with the [STOP DISTRIBUTED SENDS](#query_language-system-stop-distributed-sends), [FLUSH DISTRIBUTED](#query_language-system-flush-distributed), and [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends) queries. You can also synchronously insert distributed data with the `insert_distributed_sync` setting. -### STOP DISTRIBUTED SENDS { #query_language-system-stop-distributed-sends} +### STOP DISTRIBUTED SENDS {#query_language-system-stop-distributed-sends} Disables background data distribution when inserting data into distributed tables. @@ -70,7 +70,7 @@ SYSTEM STOP DISTRIBUTED SENDS [db.] ``` -### FLUSH DISTRIBUTED { #query_language-system-flush-distributed} +### FLUSH DISTRIBUTED {#query_language-system-flush-distributed} Forces ClickHouse to send data to cluster nodes synchronously. If any nodes are unavailable, ClickHouse throws an exception and stops query execution. You can retry the query until it succeeds, which will happen when all nodes are back online. @@ -79,7 +79,7 @@ SYSTEM FLUSH DISTRIBUTED [db.] ``` -### START DISTRIBUTED SENDS { #query_language-system-start-distributed-sends} +### START DISTRIBUTED SENDS {#query_language-system-start-distributed-sends} Enables background data distribution when inserting data into distributed tables. @@ -88,7 +88,7 @@ SYSTEM START DISTRIBUTED SENDS [db.] ``` -### STOP MERGES { #query_language-system-stop-merges} +### STOP MERGES {#query_language-system-stop-merges} Provides possibility to stop background merges for tables in the MergeTree family: @@ -99,7 +99,7 @@ SYSTEM STOP MERGES [[db.]merge_tree_family_table_name] `DETACH / ATTACH` table will start background merges for the table even in case when merges have been stopped for all MergeTree tables before. -### START MERGES { #query_language-system-start-merges} +### START MERGES {#query_language-system-start-merges} Provides possibility to start background merges for tables in the MergeTree family: diff --git a/docs/en/query_language/table_functions/jdbc.md b/docs/en/query_language/table_functions/jdbc.md index 6f93cb2819f..97f3b5af666 100644 --- a/docs/en/query_language/table_functions/jdbc.md +++ b/docs/en/query_language/table_functions/jdbc.md @@ -1,4 +1,4 @@ -# jdbc { #table_function-jdbc} +# jdbc {#table_function-jdbc} `jdbc(jdbc_connection_uri, schema, table)` - returns table that is connected via JDBC driver. diff --git a/docs/en/query_language/table_functions/odbc.md b/docs/en/query_language/table_functions/odbc.md index 17afd91a22c..d9115557f1e 100644 --- a/docs/en/query_language/table_functions/odbc.md +++ b/docs/en/query_language/table_functions/odbc.md @@ -1,4 +1,4 @@ -# odbc { #table_functions-odbc} +# odbc {#table_functions-odbc} Returns table that is connected via [ODBC](https://en.wikipedia.org/wiki/Open_Database_Connectivity). diff --git a/website/images/clickhouse-black.svg b/website/images/clickhouse-black.svg index a0a607dc0b2..695d0175685 100644 --- a/website/images/clickhouse-black.svg +++ b/website/images/clickhouse-black.svg @@ -1 +1 @@ -ClickHouse \ No newline at end of file +ClickHouse From 38b2dec3544b202654133355875c286748141578 Mon Sep 17 00:00:00 2001 From: Ivan <5627721+abyss7@users.noreply.github.com> Date: Wed, 18 Mar 2020 21:54:27 +0300 Subject: [PATCH 076/115] Arcadia (#9729) * Move getFQDNOrHostName to base/common/ * Add argsToConfig to ya.make * Add coverage.cpp to ya.make Also remove WITH_COVERAGE from config file --- CMakeLists.txt | 2 ++ base/common/CMakeLists.txt | 21 ++++++++++--------- base/common/argsToConfig.cpp | 2 +- base/common/argsToConfig.h | 1 + base/common/config_common.h.in | 1 - base/common/coverage.cpp | 21 ++++++++++--------- .../common}/getFQDNOrHostName.cpp | 2 +- .../common}/getFQDNOrHostName.h | 0 base/common/ya.make | 12 +++++++++++ base/daemon/GraphiteWriter.cpp | 2 +- base/ya.make | 4 +++- dbms/programs/copier/Internals.h | 2 +- .../performance-test/ReportBuilder.cpp | 2 +- dbms/programs/server/HTTPHandler.cpp | 2 +- dbms/programs/server/MySQLHandler.h | 2 +- dbms/programs/server/Server.cpp | 2 +- dbms/programs/server/TCPHandler.h | 2 +- .../src/Client/ConnectionPoolWithFailover.cpp | 2 +- dbms/src/Functions/FunctionFQDN.cpp | 2 +- dbms/src/Interpreters/ClientInfo.cpp | 2 +- dbms/src/Interpreters/DDLWorker.cpp | 2 +- 21 files changed, 53 insertions(+), 35 deletions(-) rename {dbms/src/Common => base/common}/getFQDNOrHostName.cpp (91%) rename {dbms/src/Common => base/common}/getFQDNOrHostName.h (100%) create mode 100644 base/common/ya.make diff --git a/CMakeLists.txt b/CMakeLists.txt index 08b3ed9fdf4..16993fdd9c7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -186,11 +186,13 @@ if (COMPILER_GCC OR COMPILER_CLANG) endif () option(WITH_COVERAGE "Build with coverage." 0) + if(WITH_COVERAGE AND COMPILER_CLANG) set(COMPILER_FLAGS "${COMPILER_FLAGS} -fprofile-instr-generate -fcoverage-mapping") # If we want to disable coverage for specific translation units set(WITHOUT_COVERAGE "-fno-profile-instr-generate -fno-coverage-mapping") endif() + if(WITH_COVERAGE AND COMPILER_GCC) set(COMPILER_FLAGS "${COMPILER_FLAGS} -fprofile-arcs -ftest-coverage") set(COVERAGE_OPTION "-lgcov") diff --git a/base/common/CMakeLists.txt b/base/common/CMakeLists.txt index 3e831d3ee82..41e99768953 100644 --- a/base/common/CMakeLists.txt +++ b/base/common/CMakeLists.txt @@ -6,6 +6,7 @@ set (SRCS DateLUT.cpp DateLUTImpl.cpp demangle.cpp + getFQDNOrHostName.cpp getMemoryAmount.cpp getThreadId.cpp JSON.cpp @@ -20,15 +21,9 @@ set (SRCS ) if (ENABLE_REPLXX) - set (SRCS ${SRCS} - ReplxxLineReader.cpp - ReplxxLineReader.h - ) + list (APPEND SRCS ReplxxLineReader.cpp) elseif (ENABLE_READLINE) - set (SRCS ${SRCS} - ReadlineLineReader.cpp - ReadlineLineReader.h - ) + list (APPEND SRCS ReadlineLineReader.cpp) endif () if (USE_DEBUG_HELPERS) @@ -38,6 +33,12 @@ endif () add_library (common ${SRCS}) +if (WITH_COVERAGE) + target_compile_definitions(common PUBLIC WITH_COVERAGE=1) +else () + target_compile_definitions(common PUBLIC WITH_COVERAGE=0) +endif () + target_include_directories(common PUBLIC .. ${CMAKE_CURRENT_BINARY_DIR}/..) if(CCTZ_INCLUDE_DIR) @@ -56,8 +57,6 @@ if(CCTZ_LIBRARY) target_link_libraries(common PRIVATE ${CCTZ_LIBRARY}) endif() -target_link_libraries(common PUBLIC replxx) - # allow explicitly fallback to readline if (NOT ENABLE_REPLXX AND ENABLE_READLINE) message (STATUS "Attempt to fallback to readline explicitly") @@ -82,11 +81,13 @@ endif () target_link_libraries (common PUBLIC + ${Poco_Net_LIBRARY} ${Poco_Util_LIBRARY} ${Poco_Foundation_LIBRARY} ${CITYHASH_LIBRARIES} ${Boost_SYSTEM_LIBRARY} FastMemcpy + replxx ) if (ENABLE_TESTS) diff --git a/base/common/argsToConfig.cpp b/base/common/argsToConfig.cpp index b0ec2900268..e6b65c7bb01 100644 --- a/base/common/argsToConfig.cpp +++ b/base/common/argsToConfig.cpp @@ -1,4 +1,4 @@ -#include +#include "argsToConfig.h" #include #include diff --git a/base/common/argsToConfig.h b/base/common/argsToConfig.h index 1c1607bc4c5..134eed64fd2 100644 --- a/base/common/argsToConfig.h +++ b/base/common/argsToConfig.h @@ -1,4 +1,5 @@ #pragma once + #include namespace Poco::Util diff --git a/base/common/config_common.h.in b/base/common/config_common.h.in index 41999bb5cde..514cc27d67c 100644 --- a/base/common/config_common.h.in +++ b/base/common/config_common.h.in @@ -4,4 +4,3 @@ #cmakedefine01 USE_JEMALLOC #cmakedefine01 UNBUNDLED -#cmakedefine01 WITH_COVERAGE diff --git a/base/common/coverage.cpp b/base/common/coverage.cpp index d8d3b71edd1..9f3c5ca653a 100644 --- a/base/common/coverage.cpp +++ b/base/common/coverage.cpp @@ -1,16 +1,17 @@ -#include -#include +#include "coverage.h" #if WITH_COVERAGE -#include -#include +# include -#if defined(__clang__) +# include + + +# if defined(__clang__) extern "C" void __llvm_profile_dump(); -#elif defined(__GNUC__) || defined(__GNUG__) +# elif defined(__GNUC__) || defined(__GNUG__) extern "C" void __gcov_exit(); -#endif +# endif #endif @@ -21,11 +22,11 @@ void dumpCoverageReportIfPossible() static std::mutex mutex; std::lock_guard lock(mutex); -#if defined(__clang__) +# if defined(__clang__) __llvm_profile_dump(); -#elif defined(__GNUC__) || defined(__GNUG__) +# elif defined(__GNUC__) || defined(__GNUG__) __gcov_exit(); -#endif +# endif #endif } diff --git a/dbms/src/Common/getFQDNOrHostName.cpp b/base/common/getFQDNOrHostName.cpp similarity index 91% rename from dbms/src/Common/getFQDNOrHostName.cpp rename to base/common/getFQDNOrHostName.cpp index 08ec015919e..f67b37bd71c 100644 --- a/dbms/src/Common/getFQDNOrHostName.cpp +++ b/base/common/getFQDNOrHostName.cpp @@ -1,5 +1,5 @@ #include -#include +#include namespace diff --git a/dbms/src/Common/getFQDNOrHostName.h b/base/common/getFQDNOrHostName.h similarity index 100% rename from dbms/src/Common/getFQDNOrHostName.h rename to base/common/getFQDNOrHostName.h diff --git a/base/common/ya.make b/base/common/ya.make new file mode 100644 index 00000000000..a41c8f4c583 --- /dev/null +++ b/base/common/ya.make @@ -0,0 +1,12 @@ +LIBRARY() + +PEERDIR( + contrib/libs/poco/Util +) + +SRCS( + argsToConfig.cpp + coverage.cpp +) + +END() diff --git a/base/daemon/GraphiteWriter.cpp b/base/daemon/GraphiteWriter.cpp index eeb6b4c1705..f28019dec01 100644 --- a/base/daemon/GraphiteWriter.cpp +++ b/base/daemon/GraphiteWriter.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include diff --git a/base/ya.make b/base/ya.make index 8b137891791..25ab5886b2c 100644 --- a/base/ya.make +++ b/base/ya.make @@ -1 +1,3 @@ - +RECURSE( + common +) diff --git a/dbms/programs/copier/Internals.h b/dbms/programs/copier/Internals.h index 59184ec8c0d..c47f42417d3 100644 --- a/dbms/programs/copier/Internals.h +++ b/dbms/programs/copier/Internals.h @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/dbms/programs/performance-test/ReportBuilder.cpp b/dbms/programs/performance-test/ReportBuilder.cpp index 0bb4f3fdb6f..a49d13caa73 100644 --- a/dbms/programs/performance-test/ReportBuilder.cpp +++ b/dbms/programs/performance-test/ReportBuilder.cpp @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include diff --git a/dbms/programs/server/HTTPHandler.cpp b/dbms/programs/server/HTTPHandler.cpp index 0b0f73fcdc5..0d447a56740 100644 --- a/dbms/programs/server/HTTPHandler.cpp +++ b/dbms/programs/server/HTTPHandler.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/dbms/programs/server/MySQLHandler.h b/dbms/programs/server/MySQLHandler.h index cbd9233651a..ca5d045beb0 100644 --- a/dbms/programs/server/MySQLHandler.h +++ b/dbms/programs/server/MySQLHandler.h @@ -1,7 +1,7 @@ #pragma once #include #include -#include +#include #include #include #include "IServer.h" diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index 81c2de8ce3a..be0f19e04c2 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -27,7 +27,7 @@ #include #include #include "config_core.h" -#include +#include #include #include #include diff --git a/dbms/programs/server/TCPHandler.h b/dbms/programs/server/TCPHandler.h index e3783ac282a..cac9c8dd2fe 100644 --- a/dbms/programs/server/TCPHandler.h +++ b/dbms/programs/server/TCPHandler.h @@ -2,7 +2,7 @@ #include -#include +#include #include #include #include diff --git a/dbms/src/Client/ConnectionPoolWithFailover.cpp b/dbms/src/Client/ConnectionPoolWithFailover.cpp index 03387c32099..52bd73c9de4 100644 --- a/dbms/src/Client/ConnectionPoolWithFailover.cpp +++ b/dbms/src/Client/ConnectionPoolWithFailover.cpp @@ -4,7 +4,7 @@ #include #include -#include +#include #include #include #include diff --git a/dbms/src/Functions/FunctionFQDN.cpp b/dbms/src/Functions/FunctionFQDN.cpp index ed49b43632e..12be3508e40 100644 --- a/dbms/src/Functions/FunctionFQDN.cpp +++ b/dbms/src/Functions/FunctionFQDN.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include diff --git a/dbms/src/Interpreters/ClientInfo.cpp b/dbms/src/Interpreters/ClientInfo.cpp index 743fe934dc3..ed806e5ad57 100644 --- a/dbms/src/Interpreters/ClientInfo.cpp +++ b/dbms/src/Interpreters/ClientInfo.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp index a10b953e644..6f3c0ba80e7 100644 --- a/dbms/src/Interpreters/DDLWorker.cpp +++ b/dbms/src/Interpreters/DDLWorker.cpp @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include From e8eb18974d0bacf1111067fbe5e4c228547c554c Mon Sep 17 00:00:00 2001 From: "imgbot[bot]" <31301654+imgbot[bot]@users.noreply.github.com> Date: Wed, 18 Mar 2020 21:55:54 +0300 Subject: [PATCH 077/115] [ImgBot] Optimize images (#9737) /website/images/clickhouse-black.svg -- 4.33kb -> 4.33kb (0.02%) Signed-off-by: ImgBotApp Co-authored-by: ImgBotApp --- website/images/clickhouse-black.svg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/images/clickhouse-black.svg b/website/images/clickhouse-black.svg index 695d0175685..a0a607dc0b2 100644 --- a/website/images/clickhouse-black.svg +++ b/website/images/clickhouse-black.svg @@ -1 +1 @@ -ClickHouse +ClickHouse \ No newline at end of file From 26fcc0f0f8f47ff6262ba332dd057140dffe2fb1 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 18 Mar 2020 22:06:15 +0300 Subject: [PATCH 078/115] Update report.py --- docker/test/performance-comparison/report.py | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py index df28251f015..f48adbb841d 100755 --- a/docker/test/performance-comparison/report.py +++ b/docker/test/performance-comparison/report.py @@ -274,6 +274,7 @@ if unstable_queries: error_tests += slow_average_tests if error_tests: + status = 'failure' message_array.append(str(error_tests) + ' errors') if message_array: From a7956013f3f8928a765acf7d3ced8829f978ed5c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 18 Mar 2020 22:17:35 +0300 Subject: [PATCH 079/115] Fixed clang-tidy check --- dbms/src/Functions/CRC.cpp | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/dbms/src/Functions/CRC.cpp b/dbms/src/Functions/CRC.cpp index b4cb064dd8a..e5730e6610f 100644 --- a/dbms/src/Functions/CRC.cpp +++ b/dbms/src/Functions/CRC.cpp @@ -17,9 +17,7 @@ struct CRCBase { T c = i; for (size_t j = 0; j < 8; ++j) - { c = c & 1 ? polynomial ^ (c >> 1) : c >> 1; - } tab[i] = c; } } @@ -34,13 +32,9 @@ struct CRCImpl { static CRCBase base(polynomial); - T i, crc; - - crc = 0; - for (i = 0; i < size; i++) - { + T crc = 0; + for (size_t i = 0; i < size; i++) crc = base.tab[(crc ^ buf[i]) & 0xff] ^ (crc >> 8); - } return crc; } }; @@ -63,10 +57,12 @@ struct CRC32ZLIBImpl static constexpr auto name = "CRC32"; static UInt32 make_crc(const unsigned char *buf, size_t size) - { return crc32_z(0L, buf, size); } + { + return crc32_z(0L, buf, size); + } }; -} // \anonymous +} namespace DB { From 9dc62d1920c0d8464aec2893ea0fc231b65aeeb1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 18 Mar 2020 22:17:35 +0300 Subject: [PATCH 080/115] Fixed clang-tidy check --- dbms/src/Functions/CRC.cpp | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/dbms/src/Functions/CRC.cpp b/dbms/src/Functions/CRC.cpp index 22814b2c26c..c754880e5ca 100644 --- a/dbms/src/Functions/CRC.cpp +++ b/dbms/src/Functions/CRC.cpp @@ -17,9 +17,7 @@ struct CRCBase { T c = i; for (size_t j = 0; j < 8; ++j) - { c = c & 1 ? polynomial ^ (c >> 1) : c >> 1; - } tab[i] = c; } } @@ -34,13 +32,9 @@ struct CRCImpl { static CRCBase base(polynomial); - T i, crc; - - crc = 0; - for (i = 0; i < size; i++) - { + T crc = 0; + for (size_t i = 0; i < size; i++) crc = base.tab[(crc ^ buf[i]) & 0xff] ^ (crc >> 8); - } return crc; } }; @@ -63,10 +57,12 @@ struct CRC32ZLIBImpl static constexpr auto name = "CRC32"; static UInt32 make_crc(const unsigned char *buf, size_t size) - { return crc32_z(0L, buf, size); } + { + return crc32_z(0L, buf, size); + } }; -} // \anonymous +} namespace DB { From d060b7111dd2f017031791b15b4281cc08914c23 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 18 Mar 2020 23:23:48 +0300 Subject: [PATCH 081/115] Also unescape backslashes --- dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp | 3 ++- dbms/tests/integration/test_dictionaries_ddl/test.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index 0eb734b18fa..daa3b1a272d 100644 --- a/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -39,8 +39,9 @@ String getUnescapedFieldString(const Field & field) if (!string.empty() && string.front() == '\'' && string.back() == '\'') string = string.substr(1, string.size() - 2); - /// Backqouting will be performed on dictionary providers side + /// Escaping will be performed on dictionary providers side boost::replace_all(string, "\\'", "'"); + boost::replace_all(string, "\\\\", "\\"); return string; } diff --git a/dbms/tests/integration/test_dictionaries_ddl/test.py b/dbms/tests/integration/test_dictionaries_ddl/test.py index 5f1430a786c..c5df6e68dd2 100644 --- a/dbms/tests/integration/test_dictionaries_ddl/test.py +++ b/dbms/tests/integration/test_dictionaries_ddl/test.py @@ -234,7 +234,7 @@ def test_dictionary_with_where(started_cluster): DB 'clickhouse' TABLE 'special_table' REPLICA(PRIORITY 1 HOST 'mysql1' PORT 3306) - WHERE 'value1 = \\'qweqwe\\'' + WHERE 'value1 = \\'qweqwe\\' OR value1 = \\'\\\\u3232\\'' )) LAYOUT(FLAT()) LIFETIME(MIN 1 MAX 3) From c0f7e5c908b347afbf9d98ea60580fb74817fd07 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 19 Mar 2020 01:22:04 +0300 Subject: [PATCH 082/115] Revert "Removed always built target" This reverts commit 5983cf03a6d6d70867edaea54e05318a0e424962. --- contrib/avro-cmake/CMakeLists.txt | 10 ++++++++-- contrib/avro-cmake/include/avro | 1 - 2 files changed, 8 insertions(+), 3 deletions(-) delete mode 120000 contrib/avro-cmake/include/avro diff --git a/contrib/avro-cmake/CMakeLists.txt b/contrib/avro-cmake/CMakeLists.txt index a4154a331b7..f544b3c50cd 100644 --- a/contrib/avro-cmake/CMakeLists.txt +++ b/contrib/avro-cmake/CMakeLists.txt @@ -1,5 +1,5 @@ set(AVROCPP_ROOT_DIR ${CMAKE_SOURCE_DIR}/contrib/avro/lang/c++) -set(AVROCPP_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/contrib/avro-cmake/include) +set(AVROCPP_INCLUDE_DIR ${AVROCPP_ROOT_DIR}/api) set(AVROCPP_SOURCE_DIR ${AVROCPP_ROOT_DIR}/impl) set (CMAKE_CXX_STANDARD 17) @@ -44,7 +44,6 @@ add_library (avrocpp ${AVROCPP_SOURCE_FILES}) set_target_properties (avrocpp PROPERTIES VERSION ${AVRO_VERSION_MAJOR}.${AVRO_VERSION_MINOR}) target_include_directories(avrocpp SYSTEM PUBLIC ${AVROCPP_INCLUDE_DIR}) -target_include_directories(avrocpp SYSTEM PRIVATE ${AVROCPP_ROOT_DIR}/api) target_include_directories(avrocpp SYSTEM PUBLIC ${Boost_INCLUDE_DIRS}) target_link_libraries (avrocpp ${Boost_IOSTREAMS_LIBRARY}) @@ -62,3 +61,10 @@ elseif (COMPILER_CLANG) endif () target_compile_options(avrocpp PRIVATE ${SUPPRESS_WARNINGS}) + +# create a symlink to include headers with +ADD_CUSTOM_TARGET(avro_symlink_headers ALL + COMMAND ${CMAKE_COMMAND} -E make_directory ${AVROCPP_ROOT_DIR}/include + COMMAND ${CMAKE_COMMAND} -E create_symlink ${AVROCPP_ROOT_DIR}/api ${AVROCPP_ROOT_DIR}/include/avro +) +add_dependencies(avrocpp avro_symlink_headers) \ No newline at end of file diff --git a/contrib/avro-cmake/include/avro b/contrib/avro-cmake/include/avro deleted file mode 120000 index 4d02fd92e3f..00000000000 --- a/contrib/avro-cmake/include/avro +++ /dev/null @@ -1 +0,0 @@ -../../avro/lang/c++/api \ No newline at end of file From 08f9413e64d555435f715bc3eafac5a6d182f1a0 Mon Sep 17 00:00:00 2001 From: hcz Date: Thu, 19 Mar 2020 10:26:40 +0800 Subject: [PATCH 083/115] Extend splitByString for empty separators --- dbms/src/Functions/FunctionsStringArray.h | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/dbms/src/Functions/FunctionsStringArray.h b/dbms/src/Functions/FunctionsStringArray.h index b206f07522e..fef59f3dfd6 100644 --- a/dbms/src/Functions/FunctionsStringArray.h +++ b/dbms/src/Functions/FunctionsStringArray.h @@ -214,9 +214,6 @@ public: ErrorCodes::ILLEGAL_COLUMN); sep = col->getValue(); - - if (sep.empty()) - throw Exception("Illegal separator for function " + getName() + ". Must be not empty.", ErrorCodes::BAD_ARGUMENTS); } /// Returns the position of the argument that is the column of strings @@ -239,15 +236,27 @@ public: return false; token_begin = pos; - pos = reinterpret_cast(memmem(pos, end - pos, sep.data(), sep.size())); - if (pos) + if (sep.empty()) { + pos += 1; token_end = pos; - pos += sep.size(); + + if (pos == end) + pos = nullptr; } else - token_end = end; + { + pos = reinterpret_cast(memmem(pos, end - pos, sep.data(), sep.size())); + + if (pos) + { + token_end = pos; + pos += sep.size(); + } + else + token_end = end; + } return true; } From c115757e7984d7a249ba496489bc8a44533613b5 Mon Sep 17 00:00:00 2001 From: hcz Date: Thu, 19 Mar 2020 10:35:18 +0800 Subject: [PATCH 084/115] Update tests and docs for string splitting functions --- .../01100_split_by_string.reference | 7 ++++ .../0_stateless/01100_split_by_string.sql | 6 ++++ .../functions/splitting_merging_functions.md | 33 ++++++++++++++++++- 3 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 dbms/tests/queries/0_stateless/01100_split_by_string.reference create mode 100644 dbms/tests/queries/0_stateless/01100_split_by_string.sql diff --git a/dbms/tests/queries/0_stateless/01100_split_by_string.reference b/dbms/tests/queries/0_stateless/01100_split_by_string.reference new file mode 100644 index 00000000000..802ad95b1d6 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01100_split_by_string.reference @@ -0,0 +1,7 @@ +['cde','cde'] +['','cde','cde',''] +['','','',''] +['',''] +['a','b','c','d','e'] +['hello','world'] +['gbye','bug'] diff --git a/dbms/tests/queries/0_stateless/01100_split_by_string.sql b/dbms/tests/queries/0_stateless/01100_split_by_string.sql new file mode 100644 index 00000000000..c65c55902ea --- /dev/null +++ b/dbms/tests/queries/0_stateless/01100_split_by_string.sql @@ -0,0 +1,6 @@ +select splitByString('ab', 'cdeabcde'); +select splitByString('ab', 'abcdeabcdeab'); +select splitByString('ab', 'ababab'); +select splitByString('ababab', 'ababab'); +select splitByString('', 'abcde'); +select splitByString(', ', x) from (select arrayJoin(['hello, world', 'gbye, bug']) x); diff --git a/docs/en/query_language/functions/splitting_merging_functions.md b/docs/en/query_language/functions/splitting_merging_functions.md index 514c2165376..5743fd6dc6d 100644 --- a/docs/en/query_language/functions/splitting_merging_functions.md +++ b/docs/en/query_language/functions/splitting_merging_functions.md @@ -5,9 +5,40 @@ Splits a string into substrings separated by 'separator'.'separator' must be a string constant consisting of exactly one character. Returns an array of selected substrings. Empty substrings may be selected if the separator occurs at the beginning or end of the string, or if there are multiple consecutive separators. +**Example:** + +```sql +SELECT splitByChar(',', '1,2,3,abcde') +``` +```text +┌─splitByChar(',', '1,2,3,abcde')─┐ +│ ['1','2','3','abcde'] │ +└─────────────────────────────────┘ +``` + ## splitByString(separator, s) -The same as above, but it uses a string of multiple characters as the separator. The string must be non-empty. +The same as above, but it uses a string of multiple characters as the separator. If the string is empty, it will split the string into an array of single characters. + +**Example:** + +```sql +SELECT splitByString(', ', '1, 2 3, 4,5, abcde') +``` +```text +┌─splitByString(', ', '1, 2 3, 4,5, abcde')─┐ +│ ['1','2 3','4,5','abcde'] │ +└───────────────────────────────────────────┘ +``` + +```sql +SELECT splitByString('', 'abcde') +``` +```text +┌─splitByString('', 'abcde')─┐ +│ ['a','b','c','d','e'] │ +└────────────────────────────┘ +``` ## arrayStringConcat(arr\[, separator\]) From a7f3c6782599b0b8eeb04b4ad9384bc44a1e93a1 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 19 Mar 2020 08:48:33 +0300 Subject: [PATCH 085/115] Add a bunch of markdown extensions (#9738) --- docs/en/operations/performance_test.md | 34 ++++++++++---------------- docs/tools/build.py | 5 ++++ docs/tools/requirements.txt | 1 + website/images/clickhouse-black.svg | 2 +- 4 files changed, 20 insertions(+), 22 deletions(-) diff --git a/docs/en/operations/performance_test.md b/docs/en/operations/performance_test.md index dfdabe08395..db821233d17 100644 --- a/docs/en/operations/performance_test.md +++ b/docs/en/operations/performance_test.md @@ -2,13 +2,13 @@ With this instruction you can run basic ClickHouse performance test on any server without installation of ClickHouse packages. -\1. Go to "commits" page: [https://github.com/ClickHouse/ClickHouse/commits/master](https://github.com/ClickHouse/ClickHouse/commits/master) +1. Go to "commits" page: https://github.com/ClickHouse/ClickHouse/commits/master -\2. Click on the first green check mark or red cross with green "ClickHouse Build Check" and click on the "Details" link near "ClickHouse Build Check". +2. Click on the first green check mark or red cross with green "ClickHouse Build Check" and click on the "Details" link near "ClickHouse Build Check". -\3. Copy the link to "clickhouse" binary for amd64 or aarch64. +3. Copy the link to "clickhouse" binary for amd64 or aarch64. -\4. ssh to the server and download it with wget: +4. ssh to the server and download it with wget: ``` # For amd64: wget https://clickhouse-builds.s3.yandex.net/0/00ba767f5d2a929394ea3be193b1f79074a1c4bc/1578163263_binary/clickhouse @@ -18,7 +18,7 @@ wget https://clickhouse-builds.s3.yandex.net/0/00ba767f5d2a929394ea3be193b1f7907 chmod a+x clickhouse ``` -\5. Download configs: +5. Download configs: ``` wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/dbms/programs/server/config.xml wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/dbms/programs/server/users.xml @@ -27,20 +27,14 @@ wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/dbms/program wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/dbms/programs/server/config.d/log_to_console.xml -O config.d/log_to_console.xml ``` -\6. Download benchmark files: +6. Download benchmark files: ``` wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/dbms/benchmark/clickhouse/benchmark-new.sh chmod a+x benchmark-new.sh wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/dbms/benchmark/clickhouse/queries.sql ``` -\7. Download test data: - -According to the instruction: - -[https://clickhouse.tech/docs/en/getting_started/example_datasets/metrica/](https://clickhouse.yandex/docs/en/getting_started/example_datasets/metrica/) - -("hits" table containing 100 million rows) +7. Download test data according to the [Yandex.Metrica dataset](../getting_started/example_datasets/metrica.md) instruction ("hits" table containing 100 million rows). ``` wget https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_100m_obfuscated_v1.tar.xz @@ -48,29 +42,27 @@ tar xvf hits_100m_obfuscated_v1.tar.xz -C . mv hits_100m_obfuscated_v1/* . ``` -\8. Run the server: +8. Run the server: ``` ./clickhouse server ``` -\9. Check the data: - -ssh to the server in another terminal +9. Check the data: ssh to the server in another terminal ``` ./clickhouse client --query "SELECT count() FROM hits_100m_obfuscated" 100000000 ``` -\10. Edit the benchmark-new.sh, change "clickhouse-client" to "./clickhouse client" and add "--max_memory_usage 100000000000" parameter. +10. Edit the benchmark-new.sh, change "clickhouse-client" to "./clickhouse client" and add "--max_memory_usage 100000000000" parameter. ``` mcedit benchmark-new.sh ``` -\11. Run the benchmark: +11. Run the benchmark: ``` ./benchmark-new.sh hits_100m_obfuscated ``` -\12. Send the numbers and the info about your hardware configuration to clickhouse-feedback@yandex-team.com +12. Send the numbers and the info about your hardware configuration to clickhouse-feedback@yandex-team.com -All the results are published here: [https://clickhouse.tech/benchmark_hardware.html](https://clickhouse.yandex/benchmark_hardware.html) +All the results are published here: https://clickhouse.tech/benchmark_hardware.html diff --git a/docs/tools/build.py b/docs/tools/build.py index fdef579f41b..31a3b8cc1c2 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -21,6 +21,7 @@ from mkdocs.commands import build as mkdocs_build from concatenate import concatenate from website import build_website, minify_website + import mdx_clickhouse import test import util @@ -109,6 +110,10 @@ def build_for_lang(lang, args): 'admonition', 'attr_list', 'codehilite', + 'nl2br', + 'sane_lists', + 'pymdownx.magiclink', + 'pymdownx.superfences', 'extra', { 'toc': { diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index 6b0f39558a6..00e600c0510 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -23,6 +23,7 @@ nose==1.3.7 numpy==1.15.4 protobuf==3.6.1 Pygments==2.5.2 +pymdown-extensions==6.3 python-slugify==1.2.6 PyYAML==5.3 repackage==0.7.3 diff --git a/website/images/clickhouse-black.svg b/website/images/clickhouse-black.svg index a0a607dc0b2..695d0175685 100644 --- a/website/images/clickhouse-black.svg +++ b/website/images/clickhouse-black.svg @@ -1 +1 @@ -ClickHouse \ No newline at end of file +ClickHouse From 8fd0cef4634f3e5f160068d7e0678ff191bb0777 Mon Sep 17 00:00:00 2001 From: "imgbot[bot]" <31301654+imgbot[bot]@users.noreply.github.com> Date: Thu, 19 Mar 2020 09:07:48 +0300 Subject: [PATCH 086/115] [ImgBot] Optimize images (#9743) /website/images/clickhouse-black.svg -- 4.33kb -> 4.33kb (0.02%) Signed-off-by: ImgBotApp Co-authored-by: ImgBotApp --- website/images/clickhouse-black.svg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/images/clickhouse-black.svg b/website/images/clickhouse-black.svg index 695d0175685..a0a607dc0b2 100644 --- a/website/images/clickhouse-black.svg +++ b/website/images/clickhouse-black.svg @@ -1 +1 @@ -ClickHouse +ClickHouse \ No newline at end of file From ee65e63c33400ebd8d9f01c931334ce9a1f8d0a9 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 19 Mar 2020 09:28:58 +0300 Subject: [PATCH 087/115] Improve docs build logging (#9744) --- docs/tools/build.py | 17 +++++++++-------- docs/tools/github.py | 4 +++- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/docs/tools/build.py b/docs/tools/build.py index 31a3b8cc1c2..6ee7dae83e3 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -191,7 +191,7 @@ def build_single_page_version(lang, args, cfg): create_pdf_command = ['wkhtmltopdf', '--print-media-type', single_page_index_html, single_page_pdf] logging.debug(' '.join(create_pdf_command)) with open(os.devnull, 'w') as devnull: - subprocess.check_call(' '.join(create_pdf_command), shell=True) + subprocess.check_call(' '.join(create_pdf_command), shell=True, stderr=devnull) with util.temp_dir() as test_dir: cfg.load_dict({ @@ -317,6 +317,14 @@ if __name__ == '__main__': arg_parser.add_argument('--verbose', action='store_true') args = arg_parser.parse_args() + + logging.basicConfig( + level=logging.DEBUG if args.verbose else logging.INFO, + stream=sys.stderr + ) + + logging.getLogger('MARKDOWN').setLevel(logging.INFO) + args.docs_output_dir = os.path.join(os.path.abspath(args.output_dir), 'docs') from github import choose_latest_releases, get_events @@ -326,13 +334,6 @@ if __name__ == '__main__': args.rev_url = 'https://github.com/ClickHouse/ClickHouse/commit/%s' % args.rev args.events = get_events(args) - logging.basicConfig( - level=logging.DEBUG if args.verbose else logging.INFO, - stream=sys.stderr - ) - - logging.getLogger('MARKDOWN').setLevel(logging.INFO) - from build import build build(args) diff --git a/docs/tools/github.py b/docs/tools/github.py index 7c4cf3fd741..c6ff7f19350 100644 --- a/docs/tools/github.py +++ b/docs/tools/github.py @@ -12,11 +12,13 @@ import util def choose_latest_releases(): + logging.info('Collecting release candidates') seen = collections.OrderedDict() candidates = [] for page in range(1, 10): url = 'https://api.github.com/repos/ClickHouse/ClickHouse/tags?per_page=100&page=%d' % page candidates += requests.get(url).json() + logging.info('Collected all release candidates') for tag in candidates: if isinstance(tag, dict): @@ -45,7 +47,7 @@ def process_release(args, callback, release): tar = tarfile.open(mode='r:gz', fileobj=buf) with util.temp_dir() as base_dir: tar.extractall(base_dir) - args = copy.deepcopy(args) + args = copy.copy(args) args.version_prefix = name args.is_stable_release = True args.docs_dir = os.path.join(base_dir, os.listdir(base_dir)[0], 'docs') From b2622853e48a4042ee32189ab1cd53648dce7355 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 19 Mar 2020 09:53:47 +0300 Subject: [PATCH 088/115] Docs grammar fixes (#9745) --- docs/en/faq/general.md | 6 +-- .../example_datasets/amplab_benchmark.md | 2 +- .../example_datasets/nyc_taxi.md | 9 ++-- .../example_datasets/ontime.md | 6 +-- docs/en/getting_started/index.md | 2 +- docs/en/getting_started/install.md | 18 ++++---- docs/en/getting_started/tutorial.md | 42 ++++++++++--------- docs/en/guides/apply_catboost_model.md | 10 +++-- docs/en/guides/index.md | 4 +- 9 files changed, 51 insertions(+), 48 deletions(-) diff --git a/docs/en/faq/general.md b/docs/en/faq/general.md index fb753026812..1488e2b98e2 100644 --- a/docs/en/faq/general.md +++ b/docs/en/faq/general.md @@ -2,14 +2,14 @@ ## Why Not Use Something Like MapReduce? -We can refer to systems like MapReduce as distributed computing systems in which the reduce operation is based on distributed sorting. The most common open source solution in this class is [Apache Hadoop](http://hadoop.apache.org). Yandex uses their in-house solution, YT. +We can refer to systems like MapReduce as distributed computing systems in which the reduce operation is based on distributed sorting. The most common open-source solution in this class is [Apache Hadoop](http://hadoop.apache.org). Yandex uses its in-house solution, YT. These systems aren't appropriate for online queries due to their high latency. In other words, they can't be used as the back-end for a web interface. These types of systems aren't useful for real-time data updates. -Distributed sorting isn't the best way to perform reduce operations if the result of the operation and all the intermediate results (if there are any) are located in the RAM of a single server, which is usually the case for online queries. In such a case, a hash table is the optimal way to perform reduce operations. A common approach to optimizing map-reduce tasks is pre-aggregation (partial reduce) using a hash table in RAM. The user performs this optimization manually. +Distributed sorting isn't the best way to perform reduce operations if the result of the operation and all the intermediate results (if there are any) are located in the RAM of a single server, which is usually the case for online queries. In such a case, a hash table is an optimal way to perform reduce operations. A common approach to optimizing map-reduce tasks is pre-aggregation (partial reduce) using a hash table in RAM. The user performs this optimization manually. Distributed sorting is one of the main causes of reduced performance when running simple map-reduce tasks. -Most MapReduce implementations allow you to execute arbitrary code on a cluster. But a declarative query language is better suited to OLAP in order to run experiments quickly. For example, Hadoop has Hive and Pig. Also consider Cloudera Impala or Shark (outdated) for Spark, as well as Spark SQL, Presto, and Apache Drill. Performance when running such tasks is highly sub-optimal compared to specialized systems, but relatively high latency makes it unrealistic to use these systems as the backend for a web interface. +Most MapReduce implementations allow you to execute arbitrary code on a cluster. But a declarative query language is better suited to OLAP to run experiments quickly. For example, Hadoop has Hive and Pig. Also consider Cloudera Impala or Shark (outdated) for Spark, as well as Spark SQL, Presto, and Apache Drill. Performance when running such tasks is highly sub-optimal compared to specialized systems, but relatively high latency makes it unrealistic to use these systems as the backend for a web interface. ## What If I Have a Problem with Encodings When Using Oracle Through ODBC? {#oracle-odbc-encodings} diff --git a/docs/en/getting_started/example_datasets/amplab_benchmark.md b/docs/en/getting_started/example_datasets/amplab_benchmark.md index 67ac38406ee..15bc48f4bc4 100644 --- a/docs/en/getting_started/example_datasets/amplab_benchmark.md +++ b/docs/en/getting_started/example_datasets/amplab_benchmark.md @@ -2,7 +2,7 @@ See -Sign up for a free account at . You will need a credit card, email and phone number.Get a new access key at +Sign up for a free account at . You will need a credit card, email and phone number. Get a new access key at Run the following in the console: diff --git a/docs/en/getting_started/example_datasets/nyc_taxi.md b/docs/en/getting_started/example_datasets/nyc_taxi.md index ab91089b0dd..55ec4fa9fea 100644 --- a/docs/en/getting_started/example_datasets/nyc_taxi.md +++ b/docs/en/getting_started/example_datasets/nyc_taxi.md @@ -272,7 +272,7 @@ SELECT formatReadableSize(sum(bytes)) FROM system.parts WHERE table = 'trips_mer └────────────────────────────────┘ ``` -Among other things, you can run the OPTIMIZE query on MergeTree. But it's not required, since everything will be fine without it. +Among other things, you can run the OPTIMIZE query on MergeTree. But it's not required since everything will be fine without it. ## Download of Prepared Partitions @@ -285,8 +285,7 @@ $ clickhouse-client --query "select count(*) from datasets.trips_mergetree" ``` !!!info - If you will run queries described below, you have to use full table name, - `datasets.trips_mergetree`. + If you will run the queries described below, you have to use the full table name, `datasets.trips_mergetree`. ## Results on Single Server @@ -330,7 +329,7 @@ The following server was used: Two Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz, 16 physical kernels total,128 GiB RAM,8x6 TB HD on hardware RAID-5 -Execution time is the best of three runsBut starting from the second run, queries read data from the file system cache. No further caching occurs: the data is read out and processed in each run. +Execution time is the best of three runs. But starting from the second run, queries read data from the file system cache. No further caching occurs: the data is read out and processed in each run. Creating a table on three servers: @@ -363,7 +362,7 @@ Q4: 1.241 seconds. No surprises here, since the queries are scaled linearly. -We also have results from a cluster of 140 servers: +We also have the results from a cluster of 140 servers: Q1: 0.028 sec. Q2: 0.043 sec. diff --git a/docs/en/getting_started/example_datasets/ontime.md b/docs/en/getting_started/example_datasets/ontime.md index e29305bcef8..cce1679537a 100644 --- a/docs/en/getting_started/example_datasets/ontime.md +++ b/docs/en/getting_started/example_datasets/ontime.md @@ -1,4 +1,3 @@ - # OnTime This dataset can be obtained in two ways: @@ -158,8 +157,7 @@ $ clickhouse-client --query "select count(*) from datasets.ontime" ``` !!!info - If you will run queries described below, you have to use full table name, - `datasets.ontime`. + If you will run the queries described below, you have to use the full table name, `datasets.ontime`. ## Queries @@ -195,7 +193,7 @@ GROUP BY DayOfWeek ORDER BY c DESC; ``` -Q3. The number of delays by airport for 2000-2008 +Q3. The number of delays by the airport for 2000-2008 ```sql SELECT Origin, count(*) AS c diff --git a/docs/en/getting_started/index.md b/docs/en/getting_started/index.md index 7ffdceebdea..d4757fdebf3 100644 --- a/docs/en/getting_started/index.md +++ b/docs/en/getting_started/index.md @@ -1,6 +1,6 @@ # Getting Started -If you are new to ClickHouse and want to get a hands-on feeling of it's performance, first of all you need to go through the [installation process](install.md). After that you can: +If you are new to ClickHouse and want to get a hands-on feeling of its performance, first of all, you need to go through the [installation process](install.md). After that you can: * [Go through detailed tutorial](tutorial.md) * [Experiment with example datasets](example_datasets/ontime.md) diff --git a/docs/en/getting_started/install.md b/docs/en/getting_started/install.md index 9bcff1cbeab..8f02dddd3c0 100644 --- a/docs/en/getting_started/install.md +++ b/docs/en/getting_started/install.md @@ -40,15 +40,15 @@ You can also download and install packages manually from here: . +The required version can be downloaded with `curl` or `wget` from repository . After that downloaded archives should be unpacked and installed with installation scripts. Example for the latest version: ```bash export LATEST_VERSION=`curl https://api.github.com/repos/ClickHouse/ClickHouse/tags 2>/dev/null | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -n 1` @@ -93,7 +93,7 @@ tar -xzvf clickhouse-client-$LATEST_VERSION.tgz sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh ``` -For production environments it's recommended to use latest `stable`-version. You can find it's number on github page https://github.com/ClickHouse/ClickHouse/tags with postfix `-stable`. +For production environments, it's recommended to use the latest `stable`-version. You can find its number on GitHub page https://github.com/ClickHouse/ClickHouse/tags with postfix `-stable`. ### From Docker Image @@ -116,7 +116,7 @@ You'll need to create a data and metadata folders and `chown` them for the desir /opt/clickhouse/metadata/default/ ``` -On Gentoo you can just use `emerge clickhouse` to install ClickHouse from sources. +On Gentoo, you can just use `emerge clickhouse` to install ClickHouse from sources. ## Launch @@ -156,7 +156,7 @@ After launching server, you can use the command-line client to connect to it: $ clickhouse-client ``` -By default it connects to `localhost:9000` on behalf of the user `default` without a password. It can also be used to connect to a remote server using `--host` argument. +By default, it connects to `localhost:9000` on behalf of the user `default` without a password. It can also be used to connect to a remote server using `--host` argument. The terminal must use UTF-8 encoding. For more information, see the section ["Command-line client"](../interfaces/cli.md). @@ -183,7 +183,7 @@ SELECT 1 **Congratulations, the system works!** -To continue experimenting, you can download one of test data sets or go through [tutorial](https://clickhouse.tech/tutorial.html). +To continue experimenting, you can download one of the test data sets or go through [tutorial](https://clickhouse.tech/tutorial.html). [Original article](https://clickhouse.tech/docs/en/getting_started/install/) diff --git a/docs/en/getting_started/tutorial.md b/docs/en/getting_started/tutorial.md index 4bc6ceb5a6b..a0e5afc6f8e 100644 --- a/docs/en/getting_started/tutorial.md +++ b/docs/en/getting_started/tutorial.md @@ -2,7 +2,7 @@ ## What to Expect from This Tutorial? -By going through this tutorial you'll learn how to set up basic ClickHouse cluster, it'll be small, but fault tolerant and scalable. We will use one of example datasets to fill it with data and execute some demo queries. +By going through this tutorial you'll learn how to set up basic ClickHouse cluster, it'll be small, but fault-tolerant and scalable. We will use one of the example datasets to fill it with data and execute some demo queries. ## Single Node Setup @@ -25,9 +25,9 @@ What do we have in the packages that got installed: * `clickhouse-common` package contains a ClickHouse executable file. * `clickhouse-server` package contains configuration files to run ClickHouse as a server. -Server config files are located in `/etc/clickhouse-server/`. Before going further please notice the `` element in `config.xml`. Path determines the location for data storage, so it should be located on volume with large disk capacity, the default value is `/var/lib/clickhouse/`. If you want to adjust the configuration it's not really handy to directly edit `config.xml` file, considering it might get rewritten on future package updates. Recommended way to override the config elements is to create [files in config.d directory](../operations/configuration_files.md) which serve as "patches" to config.xml. +Server config files are located in `/etc/clickhouse-server/`. Before going further please notice the `` element in `config.xml`. Path determines the location for data storage, so it should be located on volume with large disk capacity, the default value is `/var/lib/clickhouse/`. If you want to adjust the configuration it's not handy to directly edit `config.xml` file, considering it might get rewritten on future package updates. The recommended way to override the config elements is to create [files in config.d directory](../operations/configuration_files.md) which serve as "patches" to config.xml. -As you might have noticed, `clickhouse-server` is not launched automatically after package installation. It won't be automatically restarted after updates either. The way you start the server depends on your init system, usually it's: +As you might have noticed, `clickhouse-server` is not launched automatically after package installation. It won't be automatically restarted after updates either. The way you start the server depends on your init system, usually, it's: ``` bash sudo service clickhouse-server start @@ -38,7 +38,7 @@ or sudo /etc/init.d/clickhouse-server start ``` -The default location for server logs is `/var/log/clickhouse-server/`. Server will be ready to handle client connections once `Ready for connections` message was logged. +The default location for server logs is `/var/log/clickhouse-server/`. The server will be ready to handle client connections once `Ready for connections` message was logged. Once the `clickhouse-server` is up and running, we can use `clickhouse-client` to connect to the server and run some test queries like `SELECT "Hello, world!";`. @@ -71,7 +71,7 @@ clickhouse-client --query='INSERT INTO table FORMAT TabSeparated' < data.tsv ## Import Sample Dataset -Now it's time to fill our ClickHouse server with some sample data. In this tutorial we'll use anonymized data of Yandex.Metrica, the first service that run ClickHouse in production way before it became open-source (more on that in [history section](../introduction/history.md)). There are [multiple ways to import Yandex.Metrica dataset](example_datasets/metrica.md) and for the sake of the tutorial we'll go with the most realistic one. +Now it's time to fill our ClickHouse server with some sample data. In this tutorial, we'll use anonymized data of Yandex.Metrica, the first service that runs ClickHouse in production way before it became open-source (more on that in [history section](../introduction/history.md)). There are [multiple ways to import Yandex.Metrica dataset](example_datasets/metrica.md) and for the sake of the tutorial, we'll go with the most realistic one. ### Download and Extract Table Data @@ -96,7 +96,7 @@ Syntax for creating tables is way more complicated compared to databases (see [r 2. Table schema, i.e. list of columns and their [data types](../data_types/index.md). 3. [Table engine](../operations/table_engines/index.md) and it's settings, which determines all the details on how queries to this table will be physically executed. -Yandex.Metrica is a web analytics service and sample dataset doesn't cover it's full functionality, so there are only two tables to create: +Yandex.Metrica is a web analytics service and sample dataset doesn't cover its full functionality, so there are only two tables to create: * `hits` is a table with each action done by all users on all websites covered by the service. * `visits` is a table that contains pre-built sessions instead of individual actions. @@ -444,7 +444,7 @@ SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192 ``` -You can execute those queries using interactive mode of `clickhouse-client` (just launch it in terminal without specifying a query in advance) or try some [alternative interface](../interfaces/index.md) if you want. +You can execute those queries using the interactive mode of `clickhouse-client` (just launch it in a terminal without specifying a query in advance) or try some [alternative interface](../interfaces/index.md) if you want. As we can see, `hits_v1` uses the [basic MergeTree engine](../operations/table_engines/mergetree.md), while the `visits_v1` uses the [Collapsing](../operations/table_engines/collapsingmergetree.md) variant. @@ -470,7 +470,7 @@ FORMAT TSV max_insert_block_size 1048576 0 "The maximum block size for insertion, if we control the creation of blocks for insertion." ``` -Optionally you can [OPTIMIZE](../query_language/misc/#misc_operations-optimize) the tables after import. Tables that are configured with MergeTree-family engine always do merges of data parts in background to optimize data storage (or at least check if it makes sense). These queries will just force table engine to do storage optimization right now instead of some time later: +Optionally you can [OPTIMIZE](../query_language/misc/#misc_operations-optimize) the tables after import. Tables that are configured with MergeTree-family engine always do merges of data parts in background to optimize data storage (or at least check if it makes sense). These queries will just force the table engine to do storage optimization right now instead of some time later: ``` bash clickhouse-client --query "OPTIMIZE TABLE tutorial.hits_v1 FINAL" clickhouse-client --query "OPTIMIZE TABLE tutorial.visits_v1 FINAL" @@ -517,7 +517,7 @@ ClickHouse cluster is a homogenous cluster. Steps to set up: [Distributed table](../operations/table_engines/distributed.md) is actually a kind of "view" to local tables of ClickHouse cluster. SELECT query from a distributed table will be executed using resources of all cluster's shards. You may specify configs for multiple clusters and create multiple distributed tables providing views to different clusters. -Example config for cluster with three shards, one replica each: +Example config for a cluster with three shards, one replica each: ``` xml @@ -543,7 +543,7 @@ Example config for cluster with three shards, one replica each: ``` -For further demonstration let's create new local table with exactly the same `CREATE TABLE` query that we used for `hits_v1`, but different table name: +For further demonstration let's create a new local table with the same `CREATE TABLE` query that we used for `hits_v1`, but different table name: ``` sql CREATE TABLE tutorial.hits_local (...) ENGINE = MergeTree() ... ``` @@ -554,9 +554,9 @@ CREATE TABLE tutorial.hits_all AS tutorial.hits_local ENGINE = Distributed(perftest_3shards_1replicas, tutorial, hits_local, rand()); ``` -Common practice is to create similar Distributed tables on all machines of the cluster. This would allow to run distributed queries on any machine of the cluster. Also there's an alternative option to create temporary distributed table for a given SELECT query using [remote](../query_language/table_functions/remote.md) table function. +A common practice is to create similar Distributed tables on all machines of the cluster. This would allow running distributed queries on any machine of the cluster. Also there's an alternative option to create temporary distributed table for a given SELECT query using [remote](../query_language/table_functions/remote.md) table function. -Let's run [INSERT SELECT](../query_language/insert_into.md) into Distributed table to spread the table to multiple servers. +Let's run [INSERT SELECT](../query_language/insert_into.md) into the Distributed table to spread the table to multiple servers. ``` sql INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1; @@ -567,11 +567,11 @@ INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1; As you could expect computationally heavy queries are executed N times faster being launched on 3 servers instead of one. -In this case we have used a cluster with 3 shards each contains a single replica. +In this case, we have used a cluster with 3 shards each contains a single replica. -To provide resilience in production environment we recommend that each shard should contain 2-3 replicas distributed between multiple data-centers. Note that ClickHouse supports unlimited number of replicas. +To provide resilience in a production environment we recommend that each shard should contain 2-3 replicas distributed between multiple datacenters. Note that ClickHouse supports an unlimited number of replicas. -Example config for cluster of one shard containing three replicas: +Example config for a cluster of one shard containing three replicas: ``` xml ... @@ -597,10 +597,10 @@ Example config for cluster of one shard containing three replicas: To enable native replication
    ZooKeeper is required. ClickHouse will take care of data consistency on all replicas and run restore procedure after failure automatically. It's recommended to deploy ZooKeeper cluster to separate servers. -ZooKeeper is not a strict requirement: in some simple cases you can duplicate the data by writing it into all the replicas from your application code. This approach is **not** recommended, in this case ClickHouse won't be able to +ZooKeeper is not a strict requirement: in some simple cases, you can duplicate the data by writing it into all the replicas from your application code. This approach is **not** recommended, in this case, ClickHouse won't be able to guarantee data consistency on all replicas. This remains the responsibility of your application. -ZooKeeper locations need to be specified in configuration file: +ZooKeeper locations need to be specified in the configuration file: ``` xml @@ -618,7 +618,7 @@ ZooKeeper locations need to be specified in configuration file: ``` -Also we need to set macros for identifying each shard and replica, it will be used on table creation: +Also, we need to set macros for identifying each shard and replica, it will be used on table creation: ``` xml 01 @@ -626,7 +626,7 @@ Also we need to set macros for identifying each shard and replica, it will be us ``` -If there are no replicas at the moment on replicated table creation, a new first replica will be instantiated. If there are already live replicas, new replica will clone the data from existing ones. You have an option to create all replicated tables first and that insert data to it. Another option is to create some replicas and add the others after or during data insertion. +If there are no replicas at the moment on replicated table creation, a new first replica will be instantiated. If there are already live replicas, the new replica will clone the data from existing ones. You have an option to create all replicated tables first and that insert data to it. Another option is to create some replicas and add the others after or during data insertion. ``` sql CREATE TABLE tutorial.hits_replica (...) @@ -642,4 +642,6 @@ Here we use [ReplicatedMergeTree](../operations/table_engines/replication.md) ta ``` sql INSERT INTO tutorial.hits_replica SELECT * FROM tutorial.hits_local; ``` -Replication operates in multi-master mode. Data can be loaded into any replica and it will be synced with other instances automatically. Replication is asynchronous so at a given moment of time not all replicas may contain recently inserted data. To allow data insertion at least one replica should be up. Others will sync up data and repair consistency once they will become active again. Please notice that such approach allows for the low possibility of loss of just appended data. +Replication operates in multi-master mode. Data can be loaded into any replica and it will be synced with other instances automatically. Replication is asynchronous so at a given moment, not all replicas may contain recently inserted data. To allow data insertion at least one replica should be up. Others will sync up data and repair consistency once they will become active again. Please notice that such an approach allows for the low possibility of a loss of just appended data. + +[Original article](https://clickhouse.tech/docs/en/getting_started/tutorial/) diff --git a/docs/en/guides/apply_catboost_model.md b/docs/en/guides/apply_catboost_model.md index a9d8707f5ca..d8fd5a51e2d 100644 --- a/docs/en/guides/apply_catboost_model.md +++ b/docs/en/guides/apply_catboost_model.md @@ -46,9 +46,9 @@ $ docker run -it -p 8888:8888 yandex/tutorial-catboost-clickhouse ## 1. Create a Table {#create-table} -To create a ClickHouse table for the train sample: +To create a ClickHouse table for the training sample: -**1.** Start ClickHouse console client in interactive mode: +**1.** Start ClickHouse console client in the interactive mode: ```bash $ clickhouse client @@ -93,7 +93,7 @@ To insert the data: $ clickhouse client --host 127.0.0.1 --query 'INSERT INTO amazon_train FORMAT CSVWithNames' < ~/amazon/train.csv ``` -**2.** Start ClickHouse console client in interactive mode: +**2.** Start ClickHouse console client in the interactive mode: ```bash $ clickhouse client @@ -180,7 +180,7 @@ LIMIT 10 !!! note "Note" Function [modelEvaluate](../query_language/functions/other_functions.md#function-modelevaluate) returns tuple with per-class raw predictions for multiclass models. -Let's predict probability: +Let's predict the probability: ```sql :) SELECT @@ -228,3 +228,5 @@ FROM !!! note "Note" More info about [avg()](../query_language/agg_functions/reference.md#agg_function-avg) and [log()](../query_language/functions/math_functions.md) functions. + +[Original article](https://clickhouse.tech/docs/en/guides/apply_catboost_model/) diff --git a/docs/en/guides/index.md b/docs/en/guides/index.md index 32c2da1ad2f..0d21dd3147e 100644 --- a/docs/en/guides/index.md +++ b/docs/en/guides/index.md @@ -2,4 +2,6 @@ Detailed step-by-step instructions that will help you solve various tasks using ClickHouse. -- [Applying a CatBoost Model in ClickHouse](apply_catboost_model.md) \ No newline at end of file +- [Applying a CatBoost Model in ClickHouse](apply_catboost_model.md) + +[Original article](https://clickhouse.tech/docs/en/guides/) From 690d6606e1e0b154fac867c1cd3c7c441dc06770 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Thu, 19 Mar 2020 11:03:38 +0300 Subject: [PATCH 089/115] Bump soupsieve from 1.9.5 to 2.0 in /docs/tools (#9747) Bumps [soupsieve](https://github.com/facelessuser/soupsieve) from 1.9.5 to 2.0. - [Release notes](https://github.com/facelessuser/soupsieve/releases) - [Commits](https://github.com/facelessuser/soupsieve/compare/1.9.5...2.0.0) Signed-off-by: dependabot-preview[bot] Co-authored-by: dependabot-preview[bot] <27856297+dependabot-preview[bot]@users.noreply.github.com> --- docs/tools/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index 00e600c0510..8eb6267bbd7 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -30,7 +30,7 @@ repackage==0.7.3 requests==2.23.0 singledispatch==3.4.0.3 six==1.14.0 -soupsieve==1.9.5 +soupsieve==2.0 termcolor==1.1.0 tornado==5.1.1 Unidecode==1.1.1 From 233a7de21a74751b30f7e009c5b01a3a2f4371e7 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Thu, 19 Mar 2020 11:24:23 +0300 Subject: [PATCH 090/115] Bump numpy from 1.15.4 to 1.18.2 in /docs/tools (#9749) Bumps [numpy](https://github.com/numpy/numpy) from 1.15.4 to 1.18.2. - [Release notes](https://github.com/numpy/numpy/releases) - [Changelog](https://github.com/numpy/numpy/blob/master/doc/HOWTO_RELEASE.rst.txt) - [Commits](https://github.com/numpy/numpy/compare/v1.15.4...v1.18.2) Signed-off-by: dependabot-preview[bot] Co-authored-by: dependabot-preview[bot] <27856297+dependabot-preview[bot]@users.noreply.github.com> --- docs/tools/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index 8eb6267bbd7..9cf4491a474 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -20,7 +20,7 @@ mkdocs-htmlproofer-plugin==0.0.3 mkdocs-macros-plugin==0.4.4 nltk==3.4.5 nose==1.3.7 -numpy==1.15.4 +numpy==1.18.2 protobuf==3.6.1 Pygments==2.5.2 pymdown-extensions==6.3 From edee1c6a9fdfed841ab8080e38fb542f025080e0 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Thu, 19 Mar 2020 11:25:09 +0300 Subject: [PATCH 091/115] Bump protobuf from 3.6.1 to 3.11.3 in /docs/tools (#9748) Bumps [protobuf](https://github.com/protocolbuffers/protobuf) from 3.6.1 to 3.11.3. - [Release notes](https://github.com/protocolbuffers/protobuf/releases) - [Changelog](https://github.com/protocolbuffers/protobuf/blob/master/generate_changelog.py) - [Commits](https://github.com/protocolbuffers/protobuf/compare/v3.6.1...v3.11.3) Signed-off-by: dependabot-preview[bot] Co-authored-by: dependabot-preview[bot] <27856297+dependabot-preview[bot]@users.noreply.github.com> Co-authored-by: Ivan Blinkov --- docs/tools/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index 9cf4491a474..91ea34b0223 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -20,8 +20,8 @@ mkdocs-htmlproofer-plugin==0.0.3 mkdocs-macros-plugin==0.4.4 nltk==3.4.5 nose==1.3.7 +protobuf==3.11.3 numpy==1.18.2 -protobuf==3.6.1 Pygments==2.5.2 pymdown-extensions==6.3 python-slugify==1.2.6 From 0db2d878efffeac219fde51c2bcbae2e727f352e Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Thu, 19 Mar 2020 11:25:26 +0300 Subject: [PATCH 092/115] Bump pyyaml from 5.3 to 5.3.1 in /docs/tools (#9746) Bumps [pyyaml](https://github.com/yaml/pyyaml) from 5.3 to 5.3.1. - [Release notes](https://github.com/yaml/pyyaml/releases) - [Changelog](https://github.com/yaml/pyyaml/blob/master/CHANGES) - [Commits](https://github.com/yaml/pyyaml/compare/5.3...5.3.1) Signed-off-by: dependabot-preview[bot] Co-authored-by: dependabot-preview[bot] <27856297+dependabot-preview[bot]@users.noreply.github.com> --- docs/tools/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index 91ea34b0223..a86e15cec19 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -25,7 +25,7 @@ numpy==1.18.2 Pygments==2.5.2 pymdown-extensions==6.3 python-slugify==1.2.6 -PyYAML==5.3 +PyYAML==5.3.1 repackage==0.7.3 requests==2.23.0 singledispatch==3.4.0.3 From a764545120388dc5f0d23768065fbdec725d57e1 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 19 Mar 2020 11:31:06 +0300 Subject: [PATCH 093/115] Docs build and grammar fixes (#9750) --- docs/en/interfaces/cli.md | 4 +- docs/en/interfaces/formats.md | 62 +++++++------- docs/en/interfaces/http.md | 14 ++-- docs/en/interfaces/mysql.md | 6 +- docs/en/interfaces/tcp.md | 2 +- docs/en/interfaces/third-party/gui.md | 18 ++-- docs/en/interfaces/third-party/proxy.md | 4 +- docs/en/introduction/adopters.md | 2 +- .../features_considered_disadvantages.md | 2 +- docs/en/introduction/history.md | 12 +-- .../performance/sampling_query_profiler.md | 10 +-- .../en/operations/server_settings/settings.md | 37 ++++---- .../settings/constraints_on_settings.md | 8 +- .../settings/permissions_for_queries.md | 12 +-- .../operations/settings/query_complexity.md | 44 +++++----- docs/en/operations/settings/settings.md | 84 +++++++++---------- .../operations/settings/settings_profiles.md | 3 +- docs/en/operations/settings/settings_users.md | 2 +- docs/tools/build.py | 5 +- docs/tools/github.py | 7 +- 20 files changed, 173 insertions(+), 165 deletions(-) diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index 48965d11062..9702d2f2bbd 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -40,7 +40,7 @@ $ cat file.csv | clickhouse-client --database=test --query="INSERT INTO test FOR In batch mode, the default data format is TabSeparated. You can set the format in the FORMAT clause of the query. -By default, you can only process a single query in batch mode. To make multiple queries from a "script," use the --multiquery parameter. This works for all queries except INSERT. Query results are output consecutively without additional separators. +By default, you can only process a single query in batch mode. To make multiple queries from a "script," use the `--multiquery` parameter. This works for all queries except INSERT. Query results are output consecutively without additional separators. Similarly, to process a large number of queries, you can run 'clickhouse-client' for each query. Note that it may take tens of milliseconds to launch the 'clickhouse-client' program. In interactive mode, you get a command line where you can enter queries. @@ -67,7 +67,7 @@ When processing a query, the client shows: 3. The result in the specified format. 4. The number of lines in the result, the time passed, and the average speed of query processing. -You can cancel a long query by pressing Ctrl+C. However, you will still need to wait a little for the server to abort the request. It is not possible to cancel a query at certain stages. If you don't wait and press Ctrl+C a second time, the client will exit. +You can cancel a long query by pressing Ctrl+C. However, you will still need to wait for a little for the server to abort the request. It is not possible to cancel a query at certain stages. If you don't wait and press Ctrl+C a second time, the client will exit. The command-line client allows passing external data (external temporary tables) for querying. For more information, see the section "External data for query processing". diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index a6deb4ccb02..c3e7d75d4fc 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -44,7 +44,7 @@ You can control some format processing parameters with the ClickHouse settings. ## TabSeparated {#tabseparated} -In TabSeparated format, data is written by row. Each row contains values separated by tabs. Each value is follow by a tab, except the last value in the row, which is followed by a line feed. Strictly Unix line feeds are assumed everywhere. The last row also must contain a line feed at the end. Values are written in text format, without enclosing quotation marks, and with special characters escaped. +In TabSeparated format, data is written by row. Each row contains values separated by tabs. Each value is followed by a tab, except the last value in the row, which is followed by a line feed. Strictly Unix line feeds are assumed everywhere. The last row also must contain a line feed at the end. Values are written in text format, without enclosing quotation marks, and with special characters escaped. This format is also available under the name `TSV`. @@ -80,13 +80,13 @@ During formatting, accuracy may be lost on floating-point numbers. During parsing, it is not strictly required to read the nearest machine-representable number. Dates are written in YYYY-MM-DD format and parsed in the same format, but with any characters as separators. -Dates with times are written in the format YYYY-MM-DD hh:mm:ss and parsed in the same format, but with any characters as separators. -This all occurs in the system time zone at the time the client or server starts (depending on which one formats data). For dates with times, daylight saving time is not specified. So if a dump has times during daylight saving time, the dump does not unequivocally match the data, and parsing will select one of the two times. +Dates with times are written in the format `YYYY-MM-DD hh:mm:ss` and parsed in the same format, but with any characters as separators. +This all occurs in the system time zone at the time the client or server starts (depending on which of them formats data). For dates with times, daylight saving time is not specified. So if a dump has times during daylight saving time, the dump does not unequivocally match the data, and parsing will select one of the two times. During a read operation, incorrect dates and dates with times can be parsed with natural overflow or as null dates and times, without an error message. As an exception, parsing dates with times is also supported in Unix timestamp format, if it consists of exactly 10 decimal digits. The result is not time zone-dependent. The formats YYYY-MM-DD hh:mm:ss and NNNNNNNNNN are differentiated automatically. -Strings are output with backslash-escaped special characters. The following escape sequences are used for output: `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\'`, `\\`. Parsing also supports the sequences `\a`, `\v`, and `\xHH` (hex escape sequences) and any `\c` sequences, where `c` is any character (these sequences are converted to `c`). Thus, reading data supports formats where a line feed can be written as `\n` or `\`, or as a line feed. For example, the string `Hello world` with a line feed between the words instead of a space can be parsed in any of the following variations: +Strings are output with backslash-escaped special characters. The following escape sequences are used for output: `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\'`, `\\`. Parsing also supports the sequences `\a`, `\v`, and `\xHH` (hex escape sequences) and any `\c` sequences, where `c` is any character (these sequences are converted to `c`). Thus, reading data supports formats where a line feed can be written as `\n` or `\`, or as a line feed. For example, the string `Hello world` with a line feed between the words instead of space can be parsed in any of the following variations: ```text Hello\nworld @@ -127,7 +127,7 @@ INSERT INTO nestedt Values ( 1, [1], ['a']) SELECT * FROM nestedt FORMAT TSV ``` ```text -1 [1] ['a'] +1 [1] ['a'] ``` ## TabSeparatedRaw {#tabseparatedraw} @@ -154,7 +154,7 @@ This format is also available under the name `TSVWithNamesAndTypes`. ## Template {#format-template} -This format allows to specify a custom format string with placeholders for values with specified escaping rule. +This format allows specifying a custom format string with placeholders for values with a specified escaping rule. It uses settings `format_template_resultset`, `format_template_row`, `format_template_rows_between_delimiter` and some settings of other formats (e.g. `output_format_json_quote_64bit_integers` when using `JSON` escaping, see further) @@ -172,7 +172,7 @@ Setting `format_template_row` specifies path to file, which contains format stri - `Raw` (without escaping, similarly to `TSVRaw`) - `None` (no escaping rule, see further) - If escaping rule is omitted, then`None` will be used. `XML` and `Raw` are suitable only for output. + If an escaping rule is omitted, then `None` will be used. `XML` and `Raw` are suitable only for output. So, for the following format string: @@ -184,21 +184,21 @@ Setting `format_template_row` specifies path to file, which contains format stri The `format_template_rows_between_delimiter` setting specifies delimiter between rows, which is printed (or expected) after every row except the last one (`\n` by default) -Setting `format_template_resultset` specifies path to file, which contains format string for resultset. Format string for resultset has the same syntax as format string for row and allows to specify a prefix, a suffix and a way to print some additional information. It contains the following placeholders instead of column names: +Setting `format_template_resultset` specifies the path to file, which contains a format string for resultset. Format string for resultset has the same syntax as a format string for row and allows to specify a prefix, a suffix and a way to print some additional information. It contains the following placeholders instead of column names: - `data` is the rows with data in `format_template_row` format, separated by `format_template_rows_between_delimiter`. This placeholder must be the first placeholder in the format string. - `totals` is the row with total values in `format_template_row` format (when using WITH TOTALS) - - `min` is the row with minimum values in `format_template_row` format (when extremes is set to 1) - - `max` is the row with maximum values in `format_template_row` format (when extremes is set to 1) + - `min` is the row with minimum values in `format_template_row` format (when extremes are set to 1) + - `max` is the row with maximum values in `format_template_row` format (when extremes are set to 1) - `rows` is the total number of output rows - `rows_before_limit` is the minimal number of rows there would have been without LIMIT. Output only if the query contains LIMIT. If the query contains GROUP BY, rows_before_limit_at_least is the exact number of rows there would have been without a LIMIT. - `time` is the request execution time in seconds - - `rows_read` is the number of rows have been read - - `bytes_read` is the number of bytes (uncompressed) have been read + - `rows_read` is the number of rows has been read + - `bytes_read` is the number of bytes (uncompressed) has been read The placeholders `data`, `totals`, `min` and `max` must not have escaping rule specified (or `None` must be specified explicitly). The remaining placeholders may have any escaping rule specified. If the `format_template_resultset` setting is an empty string, `${data}` is used as default value. - For insert queries format allows to skip some columns or some fields if prefix or suffix (see example). + For insert queries format allows skipping some columns or some fields if prefix or suffix (see example). Select example: ```sql @@ -310,7 +310,7 @@ SELECT * FROM t_null FORMAT TSKV ``` ```text -x=1 y=\N +x=1 y=\N ``` When there is a large number of small columns, this format is ineffective, and there is generally no reason to use it. Nevertheless, it is no worse than JSONEachRow in terms of efficiency. @@ -323,7 +323,7 @@ Parsing allows the presence of the additional field `tskv` without the equal sig Comma Separated Values format ([RFC](https://tools.ietf.org/html/rfc4180)). -When formatting, rows are enclosed in double quotes. A double quote inside a string is output as two double quotes in a row. There are no other rules for escaping characters. Date and date-time are enclosed in double quotes. Numbers are output without quotes. Values are separated by a delimiter character, which is `,` by default. The delimiter character is defined in the setting [format_csv_delimiter](../operations/settings/settings.md#settings-format_csv_delimiter). Rows are separated using the Unix line feed (LF). Arrays are serialized in CSV as follows: first the array is serialized to a string as in TabSeparated format, and then the resulting string is output to CSV in double quotes. Tuples in CSV format are serialized as separate columns (that is, their nesting in the tuple is lost). +When formatting, rows are enclosed in double-quotes. A double quote inside a string is output as two double quotes in a row. There are no other rules for escaping characters. Date and date-time are enclosed in double-quotes. Numbers are output without quotes. Values are separated by a delimiter character, which is `,` by default. The delimiter character is defined in the setting [format_csv_delimiter](../operations/settings/settings.md#settings-format_csv_delimiter). Rows are separated using the Unix line feed (LF). Arrays are serialized in CSV as follows: first, the array is serialized to a string as in TabSeparated format, and then the resulting string is output to CSV in double-quotes. Tuples in CSV format are serialized as separate columns (that is, their nesting in the tuple is lost). ```bash $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FORMAT CSV" < data.csv @@ -422,7 +422,7 @@ SELECT SearchPhrase, count() AS c FROM test.hits GROUP BY SearchPhrase WITH TOTA } ``` -The JSON is compatible with JavaScript. To ensure this, some characters are additionally escaped: the slash `/` is escaped as `\/`; alternative line breaks `U+2028` and `U+2029`, which break some browsers, are escaped as `\uXXXX`. ASCII control characters are escaped: backspace, form feed, line feed, carriage return, and horizontal tab are replaced with `\b`, `\f`, `\n`, `\r`, `\t` , as well as the remaining bytes in the 00-1F range using `\uXXXX` sequences. Invalid UTF-8 sequences are changed to the replacement character � so the output text will consist of valid UTF-8 sequences. For compatibility with JavaScript, Int64 and UInt64 integers are enclosed in double quotes by default. To remove the quotes, you can set the configuration parameter [output_format_json_quote_64bit_integers](../operations/settings/settings.md#session_settings-output_format_json_quote_64bit_integers) to 0. +The JSON is compatible with JavaScript. To ensure this, some characters are additionally escaped: the slash `/` is escaped as `\/`; alternative line breaks `U+2028` and `U+2029`, which break some browsers, are escaped as `\uXXXX`. ASCII control characters are escaped: backspace, form feed, line feed, carriage return, and horizontal tab are replaced with `\b`, `\f`, `\n`, `\r`, `\t` , as well as the remaining bytes in the 00-1F range using `\uXXXX` sequences. Invalid UTF-8 sequences are changed to the replacement character � so the output text will consist of valid UTF-8 sequences. For compatibility with JavaScript, Int64 and UInt64 integers are enclosed in double-quotes by default. To remove the quotes, you can set the configuration parameter [output_format_json_quote_64bit_integers](../operations/settings/settings.md#session_settings-output_format_json_quote_64bit_integers) to 0. `rows` – The total number of output rows. @@ -431,7 +431,7 @@ If the query contains GROUP BY, rows_before_limit_at_least is the exact number o `totals` – Total values (when using WITH TOTALS). -`extremes` – Extreme values (when extremes is set to 1). +`extremes` – Extreme values (when extremes are set to 1). This format is only appropriate for outputting a query result, but not for parsing (retrieving data to insert in a table). @@ -617,12 +617,12 @@ You can use this format to quickly generate dumps that can only be read by the C ## Null {#null} -Nothing is output. However, the query is processed, and when using the command-line client, data is transmitted to the client. This is used for tests, including productivity testing. +Nothing is output. However, the query is processed, and when using the command-line client, data is transmitted to the client. This is used for tests, including performance testing. Obviously, this format is only appropriate for output, not for parsing. ## Pretty {#pretty} -Outputs data as Unicode-art tables, also using ANSI-escape sequences for setting colors in the terminal. +Outputs data as Unicode-art tables, also using ANSI-escape sequences for setting colours in the terminal. A full grid of the table is drawn, and each row occupies two lines in the terminal. Each result block is output as a separate table. This is necessary so that blocks can be output without buffering results (buffering would be necessary in order to pre-calculate the visible width of all the values). @@ -648,7 +648,7 @@ SELECT 'String with \'quotes\' and \t character' AS Escaping_test ```text ┌─Escaping_test────────────────────────┐ -│ String with 'quotes' and character │ +│ String with 'quotes' and character │ └──────────────────────────────────────┘ ``` @@ -720,9 +720,9 @@ Differs from [PrettyCompact](#prettycompact) in that whitespace (space character ## RowBinary {#rowbinary} Formats and parses data by row in binary format. Rows and values are listed consecutively, without separators. -This format is less efficient than the Native format, since it is row-based. +This format is less efficient than the Native format since it is row-based. -Integers use fixed-length little endian representation. For example, UInt64 uses 8 bytes. +Integers use fixed-length little-endian representation. For example, UInt64 uses 8 bytes. DateTime is represented as UInt32 containing the Unix timestamp as the value. Date is represented as a UInt16 object that contains the number of days since 1970-01-01 as the value. String is represented as a varint length (unsigned [LEB128](https://en.wikipedia.org/wiki/LEB128)), followed by the bytes of the string. @@ -742,7 +742,7 @@ Similar to [RowBinary](#rowbinary), but with added header: ## Values {#data-format-values} -Prints every row in brackets. Rows are separated by commas. There is no comma after the last row. The values inside the brackets are also comma-separated. Numbers are output in decimal format without quotes. Arrays are output in square brackets. Strings, dates, and dates with times are output in quotes. Escaping rules and parsing are similar to the [TabSeparated](#tabseparated) format. During formatting, extra spaces aren't inserted, but during parsing, they are allowed and skipped (except for spaces inside array values, which are not allowed). [NULL](../query_language/syntax.md) is represented as `NULL`. +Prints every row in brackets. Rows are separated by commas. There is no comma after the last row. The values inside the brackets are also comma-separated. Numbers are output in a decimal format without quotes. Arrays are output in square brackets. Strings, dates, and dates with times are output in quotes. Escaping rules and parsing are similar to the [TabSeparated](#tabseparated) format. During formatting, extra spaces aren't inserted, but during parsing, they are allowed and skipped (except for spaces inside array values, which are not allowed). [NULL](../query_language/syntax.md) is represented as `NULL`. The minimum set of characters that you need to escape when passing data in Values ​​format: single quotes and backslashes. @@ -752,7 +752,7 @@ See also: [input_format_values_interpret_expressions](../operations/settings/set ## Vertical {#vertical} -Prints each value on a separate line with the column name specified. This format is convenient for printing just one or a few rows, if each row consists of a large number of columns. +Prints each value on a separate line with the column name specified. This format is convenient for printing just one or a few rows if each row consists of a large number of columns. [NULL](../query_language/syntax.md) is output as `ᴺᵁᴸᴸ`. @@ -777,7 +777,7 @@ SELECT 'string with \'quotes\' and \t with some special \n characters' AS test F ```text Row 1: ────── -test: string with 'quotes' and with some special +test: string with 'quotes' and with some special characters ``` @@ -997,7 +997,7 @@ The root schema of input Avro file must be of `record` type. To find the correspondence between table columns and fields of Avro schema ClickHouse compares their names. This comparison is case-sensitive. Unused fields are skipped. -Data types of a ClickHouse table columns can differ from the corresponding fields of the Avro data inserted. When inserting data, ClickHouse interprets data types according to the table above and then [casts](../query_language/functions/type_conversion_functions/#type_conversion_function-cast) the data to corresponding column type. +Data types of ClickHouse table columns can differ from the corresponding fields of the Avro data inserted. When inserting data, ClickHouse interprets data types according to the table above and then [casts](../query_language/functions/type_conversion_functions/#type_conversion_function-cast) the data to corresponding column type. ### Selecting Data @@ -1092,7 +1092,7 @@ ClickHouse supports configurable precision of `Decimal` type. The `INSERT` query Unsupported Parquet data types: `DATE32`, `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`. -Data types of a ClickHouse table columns can differ from the corresponding fields of the Parquet data inserted. When inserting data, ClickHouse interprets data types according to the table above and then [cast](../query_language/functions/type_conversion_functions/#type_conversion_function-cast) the data to that data type which is set for the ClickHouse table column. +Data types of ClickHouse table columns can differ from the corresponding fields of the Parquet data inserted. When inserting data, ClickHouse interprets data types according to the table above and then [cast](../query_language/functions/type_conversion_functions/#type_conversion_function-cast) the data to that data type which is set for the ClickHouse table column. ### Inserting and Selecting Data @@ -1155,10 +1155,10 @@ To exchange data with Hadoop, you can use [HDFS table engine](../operations/tabl The file name containing the format schema is set by the setting `format_schema`. It's required to set this setting when it is used one of the formats `Cap'n Proto` and `Protobuf`. -The format schema is a combination of a file name and the name of a message type in this file, delimited by colon, +The format schema is a combination of a file name and the name of a message type in this file, delimited by a colon, e.g. `schemafile.proto:MessageType`. If the file has the standard extension for the format (for example, `.proto` for `Protobuf`), -it can be omitted and in this case the format schema looks like `schemafile:MessageType`. +it can be omitted and in this case, the format schema looks like `schemafile:MessageType`. If you input or output data via the [client](../interfaces/cli.md) in the [interactive mode](../interfaces/cli.md#cli_usage), the file name specified in the format schema can contain an absolute path or a path relative to the current directory on the client. @@ -1168,8 +1168,6 @@ If you input or output data via the [HTTP interface](../interfaces/http.md) the should be located in the directory specified in [format_schema_path](../operations/server_settings/settings.md#server_settings-format_schema_path) in the server configuration. -[Original article](https://clickhouse.tech/docs/en/interfaces/formats/) - ## Skipping Errors {#skippingerrors} Some formats such as `CSV`, `TabSeparated`, `TSKV`, `JSONEachRow`, `Template`, `CustomSeparated` and `Protobuf` can skip broken row if parsing error occurred and continue parsing from the beginning of next row. See [input_format_allow_errors_num](../operations/settings/settings.md#settings-input_format_allow_errors_num) and @@ -1177,3 +1175,5 @@ Some formats such as `CSV`, `TabSeparated`, `TSKV`, `JSONEachRow`, `Template`, ` Limitations: - In case of parsing error `JSONEachRow` skips all data until the new line (or EOF), so rows must be delimited by `\n` to count errors correctly. - `Template` and `CustomSeparated` use delimiter after the last column and delimiter between rows to find the beginning of next row, so skipping errors works only if at least one of them is not empty. + +[Original article](https://clickhouse.tech/docs/en/interfaces/formats/) diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md index 0ce700bdc54..05021efaaeb 100644 --- a/docs/en/interfaces/http.md +++ b/docs/en/interfaces/http.md @@ -10,7 +10,7 @@ $ curl 'http://localhost:8123/' Ok. ``` -Use GET /ping request in health-check scripts. This handler always return "Ok." (with a line feed at the end). Available from version 18.12.13. +Use GET /ping request in health-check scripts. This handler always returns "Ok." (with a line feed at the end). Available from version 18.12.13. ```bash $ curl 'http://localhost:8123/ping' Ok. @@ -22,7 +22,7 @@ Send the request as a URL 'query' parameter, or as a POST. Or send the beginning If successful, you receive the 200 response code and the result in the response body. If an error occurs, you receive the 500 response code and an error description text in the response body. -When using the GET method, 'readonly' is set. In other words, for queries that modify data, you can only use the POST method. You can send the query itself either in the POST body, or in the URL parameter. +When using the GET method, 'readonly' is set. In other words, for queries that modify data, you can only use the POST method. You can send the query itself either in the POST body or in the URL parameter. Examples: @@ -200,7 +200,7 @@ $ echo 'SELECT 1' | curl -H 'X-ClickHouse-User: user' -H 'X-ClickHouse-Key: pass ``` If the user name is not specified, the `default` name is used. If the password is not specified, the empty password is used. -You can also use the URL parameters to specify any settings for processing a single query, or entire profiles of settings. Example:http://localhost:8123/?profile=web&max_rows_to_read=1000000000&query=SELECT+1 +You can also use the URL parameters to specify any settings for processing a single query or entire profiles of settings. Example:http://localhost:8123/?profile=web&max_rows_to_read=1000000000&query=SELECT+1 For more information, see the [Settings](../operations/settings/index.md) section. @@ -238,7 +238,7 @@ Possible header fields: - `written_rows` — Number of rows written. - `written_bytes` — Volume of data written in bytes. -Running requests don't stop automatically if the HTTP connection is lost. Parsing and data formatting are performed on the server side, and using the network might be ineffective. +Running requests don't stop automatically if the HTTP connection is lost. Parsing and data formatting are performed on the server-side, and using the network might be ineffective. The optional 'query_id' parameter can be passed as the query ID (any string). For more information, see the section "Settings, replace_running_query". The optional 'quota_key' parameter can be passed as the quota key (any string). For more information, see the section "Quotas". @@ -247,9 +247,9 @@ The HTTP interface allows passing external data (external temporary tables) for ## Response Buffering -You can enable response buffering on the server side. The `buffer_size` and `wait_end_of_query` URL parameters are provided for this purpose. +You can enable response buffering on the server-side. The `buffer_size` and `wait_end_of_query` URL parameters are provided for this purpose. -`buffer_size` determines the number of bytes in the result to buffer in the server memory. If the result body is larger than this threshold, the buffer is written to the HTTP channel, and the remaining data is sent directly to the HTTP channel. +`buffer_size` determines the number of bytes in the result to buffer in the server memory. If a result body is larger than this threshold, the buffer is written to the HTTP channel, and the remaining data is sent directly to the HTTP channel. To ensure that the entire response is buffered, set `wait_end_of_query=1`. In this case, the data that is not stored in memory will be buffered in a temporary server file. @@ -259,7 +259,7 @@ Example: $ curl -sS 'http://localhost:8123/?max_result_bytes=4000000&buffer_size=3000000&wait_end_of_query=1' -d 'SELECT toUInt8(number) FROM system.numbers LIMIT 9000000 FORMAT RowBinary' ``` -Use buffering to avoid situations where a query processing error occurred after the response code and HTTP headers were sent to the client. In this situation, an error message is written at the end of the response body, and on the client side, the error can only be detected at the parsing stage. +Use buffering to avoid situations where a query processing error occurred after the response code and HTTP headers were sent to the client. In this situation, an error message is written at the end of the response body, and on the client-side, the error can only be detected at the parsing stage. ### Queries with Parameters {#cli-queries-with-parameters} diff --git a/docs/en/interfaces/mysql.md b/docs/en/interfaces/mysql.md index 454cdb9160d..d7b0c5194c6 100644 --- a/docs/en/interfaces/mysql.md +++ b/docs/en/interfaces/mysql.md @@ -5,12 +5,12 @@ ClickHouse supports MySQL wire protocol. It can be enabled by [mysql_port](../op 9004 ``` -Example of connecting using command-line tool mysql: +Example of connecting using command-line tool `mysql`: ```bash $ mysql --protocol tcp -u default -P 9004 ``` -Output if connection succeeded: +Output if a connection succeeded: ```text Welcome to the MySQL monitor. Commands end with ; or \g. Your MySQL connection id is 4 @@ -35,3 +35,5 @@ Restrictions: - prepared queries are not supported - some data types are sent as strings + +[Original article](https://clickhouse.tech/docs/en/interfaces/mysql/) diff --git a/docs/en/interfaces/tcp.md b/docs/en/interfaces/tcp.md index 6194809db82..0b3935b8394 100644 --- a/docs/en/interfaces/tcp.md +++ b/docs/en/interfaces/tcp.md @@ -1,5 +1,5 @@ # Native Interface (TCP) -The native protocol is used in the [command-line client](cli.md), for interserver communication during distributed query processing, and also in other C++ programs. Unfortunately, native ClickHouse protocol does not have formal specification yet, but it can be reverse engineered from ClickHouse source code (starting [around here](https://github.com/ClickHouse/ClickHouse/tree/master/dbms/src/Client)) and/or by intercepting and analyzing TCP traffic. +The native protocol is used in the [command-line client](cli.md), for inter-server communication during distributed query processing, and also in other C++ programs. Unfortunately, native ClickHouse protocol does not have formal specification yet, but it can be reverse-engineered from ClickHouse source code (starting [around here](https://github.com/ClickHouse/ClickHouse/tree/master/dbms/src/Client)) and/or by intercepting and analyzing TCP traffic. [Original article](https://clickhouse.tech/docs/en/interfaces/tcp/) diff --git a/docs/en/interfaces/third-party/gui.md b/docs/en/interfaces/third-party/gui.md index a01f524f3c7..296c1e02058 100644 --- a/docs/en/interfaces/third-party/gui.md +++ b/docs/en/interfaces/third-party/gui.md @@ -12,7 +12,7 @@ Features: - Query editor with syntax highlighting. - Auto-completion of commands. - Tools for graphical analysis of query execution. -- Color scheme options. +- Colour scheme options. [Tabix documentation](https://tabix.io/doc/). @@ -26,7 +26,7 @@ Features: - Export query results as CSV or JSON. - List of processes with descriptions. Write mode. Ability to stop (`KILL`) a process. - Database graph. Shows all tables and their columns with additional information. -- Quick view of the column size. +- A quick view of the column size. - Server configuration. The following features are planned for development: @@ -69,11 +69,11 @@ Features: - Query development with syntax highlight and autocompletion. - Table list with filters and metadata search. - Table data preview. -- Full text search. +- Full-text search. ### clickhouse-cli -[clickhouse-cli](https://github.com/hatarist/clickhouse-cli) is an alternative command line client for ClickHouse, written in Python 3. +[clickhouse-cli](https://github.com/hatarist/clickhouse-cli) is an alternative command-line client for ClickHouse, written in Python 3. Features: @@ -90,13 +90,13 @@ Features: ### DataGrip -[DataGrip](https://www.jetbrains.com/datagrip/) is a database IDE from JetBrains with dedicated support for ClickHouse. It is also embedded into other IntelliJ-based tools: PyCharm, IntelliJ IDEA, GoLand, PhpStorm and others. +[DataGrip](https://www.jetbrains.com/datagrip/) is a database IDE from JetBrains with dedicated support for ClickHouse. It is also embedded in other IntelliJ-based tools: PyCharm, IntelliJ IDEA, GoLand, PhpStorm and others. Features: - Very fast code completion. - ClickHouse syntax highlighting. -- Support for features specific to ClickHouse, for example nested columns, table engines. +- Support for features specific to ClickHouse, for example, nested columns, table engines. - Data Editor. - Refactorings. - Search and Navigation. @@ -127,7 +127,7 @@ Features: - SQL editor with visualizations, version control, auto-completion, reusable query components and dynamic filters. - Embedded analytics of reports and dashboards via iframe. - Data preparation and ETL capabilities. -- SQL data modeling support for relational mapping of data. +- SQL data modelling support for relational mapping of data. ### Looker [Looker](https://looker.com) is a data platform and business intelligence tool with support for 50+ database dialects including ClickHouse. Looker is available as a SaaS platform and self-hosted. Users can use Looker via the browser to explore data, build visualizations and dashboards, schedule reports, and share their insights with colleagues. Looker provides a rich set of tools to embed these features in other applications, and an API @@ -135,8 +135,8 @@ to integrate data with other applications. Features: -- Easy and agile development using LookML, a language which supports currated -[Data Modeling](https://looker.com/platform/data-modeling) to support report writers and end users. +- Easy and agile development using LookML, a language which supports curated +[Data Modeling](https://looker.com/platform/data-modeling) to support report writers and end-users. - Powerful workflow integration via Looker's [Data Actions](https://looker.com/platform/actions). diff --git a/docs/en/interfaces/third-party/proxy.md b/docs/en/interfaces/third-party/proxy.md index 7bce14748c8..6ac4c7cb1db 100644 --- a/docs/en/interfaces/third-party/proxy.md +++ b/docs/en/interfaces/third-party/proxy.md @@ -2,13 +2,13 @@ ## chproxy -[chproxy](https://github.com/Vertamedia/chproxy), is an http proxy and load balancer for ClickHouse database. +[chproxy](https://github.com/Vertamedia/chproxy), is an HTTP proxy and load balancer for ClickHouse database. Features: * Per-user routing and response caching. * Flexible limits. -* Automatic SSL cerificate renewal. +* Automatic SSL certificate renewal. Implemented in Go. diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 053dbef85b9..2f76ee00973 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -1,7 +1,7 @@ # ClickHouse Adopters !!! warning "Disclaimer" - The following list of companies using ClickHouse and their success stories is assembled from public sources, thus might differ from current reality. We'd really appreciate if you share the story of adopting ClickHouse in your company and [add it to the list](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/introduction/adopters.md), but please make sure you won't have any NDA issues by doing so. Providing updates with publications by other companies is also useful. + The following list of companies using ClickHouse and their success stories is assembled from public sources, thus might differ from current reality. We'd really appreciate if you share the story of adopting ClickHouse in your company and [add it to the list](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/introduction/adopters.md), but please make sure you won't have any NDA issues by doing so. Providing updates with publications from other companies is also useful. | Company | Industry | Usecase | Cluster Size | (Un)Compressed Data Size* | Reference | | --- | --- | --- | --- | --- | --- | diff --git a/docs/en/introduction/features_considered_disadvantages.md b/docs/en/introduction/features_considered_disadvantages.md index 6717e32bfe4..cd091289a75 100644 --- a/docs/en/introduction/features_considered_disadvantages.md +++ b/docs/en/introduction/features_considered_disadvantages.md @@ -2,6 +2,6 @@ 1. No full-fledged transactions. 2. Lack of ability to modify or delete already inserted data with high rate and low latency. There are batch deletes and updates available to clean up or modify data, for example to comply with [GDPR](https://gdpr-info.eu). -3. The sparse index makes ClickHouse not really suitable for point queries retrieving single rows by their keys. +3. The sparse index makes ClickHouse not so suitable for point queries retrieving single rows by their keys. [Original article](https://clickhouse.tech/docs/en/introduction/features_considered_disadvantages/) diff --git a/docs/en/introduction/history.md b/docs/en/introduction/history.md index 1d35a6df683..7f5a6a491d0 100644 --- a/docs/en/introduction/history.md +++ b/docs/en/introduction/history.md @@ -2,9 +2,9 @@ ClickHouse was originally developed to power [Yandex.Metrica](https://metrica.yandex.com/), [the second largest web analytics platform in the world](http://w3techs.com/technologies/overview/traffic_analysis/all), and continues to be the core component of this system. With more than 13 trillion records in the database and more than 20 billion events daily, ClickHouse allows generating custom reports on the fly directly from non-aggregated data. This article briefly covers the goals of ClickHouse in the early stages of its development. -Yandex.Metrica builds customized reports on the fly based on hits and sessions, with arbitrary segments defined by the user. This often requires building complex aggregates, such as the number of unique users. New data for building a report is received in real time. +Yandex.Metrica builds customized reports on the fly based on hits and sessions, with arbitrary segments defined by the user. This often requires building complex aggregates, such as the number of unique users. New data for building a report is received in real-time. -As of April 2014, Yandex.Metrica was tracking about 12 billion events (page views and clicks) daily. All these events must be stored in order to build custom reports. A single query may require scanning millions of rows within a few hundred milliseconds, or hundreds of millions of rows in just a few seconds. +As of April 2014, Yandex.Metrica was tracking about 12 billion events (page views and clicks) daily. All these events must be stored to build custom reports. A single query may require scanning millions of rows within a few hundred milliseconds, or hundreds of millions of rows in just a few seconds. ## Usage in Yandex.Metrica and Other Yandex Services @@ -23,20 +23,20 @@ ClickHouse has at least a dozen installations in other Yandex services: in searc ## Aggregated and Non-aggregated Data -There is a popular opinion that in order to effectively calculate statistics, you must aggregate data, since this reduces the volume of data. +There is a popular opinion that to effectively calculate statistics, you must aggregate data since this reduces the volume of data. But data aggregation is a very limited solution, for the following reasons: - You must have a pre-defined list of reports the user will need. - The user can't make custom reports. -- When aggregating a large quantity of keys, the volume of data is not reduced, and aggregation is useless. +- When aggregating a large number of keys, the volume of data is not reduced, and aggregation is useless. - For a large number of reports, there are too many aggregation variations (combinatorial explosion). - When aggregating keys with high cardinality (such as URLs), the volume of data is not reduced by much (less than twofold). - For this reason, the volume of data with aggregation might grow instead of shrink. -- Users do not view all the reports we generate for them. A large portion of calculations are useless. +- Users do not view all the reports we generate for them. A large portion of those calculations is useless. - The logical integrity of data may be violated for various aggregations. -If we do not aggregate anything and work with non-aggregated data, this might actually reduce the volume of calculations. +If we do not aggregate anything and work with non-aggregated data, this might reduce the volume of calculations. However, with aggregation, a significant part of the work is taken offline and completed relatively calmly. In contrast, online calculations require calculating as fast as possible, since the user is waiting for the result. diff --git a/docs/en/operations/performance/sampling_query_profiler.md b/docs/en/operations/performance/sampling_query_profiler.md index 7b453e9386e..ddc7b848fd4 100644 --- a/docs/en/operations/performance/sampling_query_profiler.md +++ b/docs/en/operations/performance/sampling_query_profiler.md @@ -1,25 +1,25 @@ # Sampling Query Profiler -ClickHouse runs sampling profiler that allows to analyze query execution. Using profiler you can find source code routines that used the most frequently during a query execution. You can trace CPU time and wall-clock time spent including idle time. +ClickHouse runs sampling profiler that allows analyzing query execution. Using profiler you can find source code routines that used the most frequently during query execution. You can trace CPU time and wall-clock time spent including idle time. To use profiler: - Setup the [trace_log](../server_settings/settings.md#server_settings-trace_log) section of the server configuration. - This section configures the [trace_log](../system_tables.md#system_tables-trace_log) system table containing the results of the profiler functioning. It is configured by default. Remember that data in this table is valid only for running server. After the server restart, ClickHouse doesn't clean up the table and all the stored virtual memory address may become invalid. + This section configures the [trace_log](../system_tables.md#system_tables-trace_log) system table containing the results of the profiler functioning. It is configured by default. Remember that data in this table is valid only for a running server. After the server restart, ClickHouse doesn't clean up the table and all the stored virtual memory address may become invalid. - Setup the [query_profiler_cpu_time_period_ns](../settings/settings.md#query_profiler_cpu_time_period_ns) or [query_profiler_real_time_period_ns](../settings/settings.md#query_profiler_real_time_period_ns) settings. Both settings can be used simultaneously. These settings allow you to configure profiler timers. As these are the session settings, you can get different sampling frequency for the whole server, individual users or user profiles, for your interactive session, and for each individual query. -Default sampling frequency is one sample per second and both CPU and real timers are enabled. This frequency allows to collect enough information about ClickHouse cluster. At the same time, working with this frequency, profiler doesn't affect ClickHouse server's performance. If you need to profile each individual query try to use higher sampling frequency. +The default sampling frequency is one sample per second and both CPU and real timers are enabled. This frequency allows collecting enough information about ClickHouse cluster. At the same time, working with this frequency, profiler doesn't affect ClickHouse server's performance. If you need to profile each individual query try to use higher sampling frequency. To analyze the `trace_log` system table: - Install the `clickhouse-common-static-dbg` package. See [Install from DEB Packages](../../getting_started/install.md#install-from-deb-packages). - Allow introspection functions by the [allow_introspection_functions](../settings/settings.md#settings-allow_introspection_functions) setting. - For security reasons introspection functions are disabled by default. + For security reasons, introspection functions are disabled by default. - Use the `addressToLine`, `addressToSymbol` and `demangle` [introspection functions](../../query_language/functions/introspection.md) to get function names and their positions in ClickHouse code. To get a profile for some query, you need to aggregate data from the `trace_log` table. You can aggregate data by individual functions or by the whole stack traces. @@ -30,7 +30,7 @@ If you need to visualize `trace_log` info, try [flamegraph](../../interfaces/thi In this example we: -- Filtering `trace_log` data by a query identifier and current date. +- Filtering `trace_log` data by a query identifier and the current date. - Aggregating by stack trace. - Using introspection functions, we will get a report of: diff --git a/docs/en/operations/server_settings/settings.md b/docs/en/operations/server_settings/settings.md index 1e48b374711..27b4862e120 100644 --- a/docs/en/operations/server_settings/settings.md +++ b/docs/en/operations/server_settings/settings.md @@ -1,5 +1,4 @@ -# Server settings - +# Server Settings ## builtin_dictionaries_reload_interval @@ -206,7 +205,7 @@ The port for connecting to the server over HTTP(s). If `https_port` is specified, [openSSL](#server_settings-openssl) must be configured. -If `http_port` is specified, the openSSL configuration is ignored even if it is set. +If `http_port` is specified, the OpenSSL configuration is ignored even if it is set. **Example** @@ -218,7 +217,7 @@ If `http_port` is specified, the openSSL configuration is ignored even if it is ## http_server_default_response {#server_settings-http_server_default_response} The page that is shown by default when you access the ClickHouse HTTP(s) server. -Default value is "Ok." (with a line feed at the end) +The default value is "Ok." (with a line feed at the end) **Example** @@ -256,7 +255,7 @@ Port for exchanging data between ClickHouse servers. ## interserver_http_host -The host name that can be used by other servers to access this server. +The hostname that can be used by other servers to access this server. If omitted, it is defined in the same way as the `hostname-f` command. @@ -415,7 +414,7 @@ The maximum number of open files. By default: `maximum`. -We recommend using this option in Mac OS X, since the `getrlimit()` function returns an incorrect value. +We recommend using this option in Mac OS X since the `getrlimit()` function returns an incorrect value. **Example** @@ -474,7 +473,7 @@ Keys for server/client settings: - loadDefaultCAFile – Indicates that built-in CA certificates for OpenSSL will be used. Acceptable values: `true`, `false`. | - cipherList – Supported OpenSSL encryptions. For example: `ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH`. - cacheSessions – Enables or disables caching sessions. Must be used in combination with ``sessionIdContext``. Acceptable values: `true`, `false`. -- sessionIdContext – A unique set of random characters that the server appends to each generated identifier. The length of the string must not exceed ``SSL_MAX_SSL_SESSION_ID_LENGTH``. This parameter is always recommended, since it helps avoid problems both if the server caches the session and if the client requested caching. Default value: ``${application.name}``. +- sessionIdContext – A unique set of random characters that the server appends to each generated identifier. The length of the string must not exceed ``SSL_MAX_SSL_SESSION_ID_LENGTH``. This parameter is always recommended since it helps avoid problems both if the server caches the session and if the client requested caching. Default value: ``${application.name}``. - sessionCacheSize – The maximum number of sessions that the server caches. Default value: 1024\*20. 0 – Unlimited sessions. - sessionTimeout – Time for caching the session on the server. - extendedVerification – Automatically extended verification of certificates after the session ends. Acceptable values: `true`, `false`. @@ -483,7 +482,7 @@ Keys for server/client settings: - requireTLSv1 – Require a TLSv1.2 connection. Acceptable values: `true`, `false`. - fips – Activates OpenSSL FIPS mode. Supported if the library's OpenSSL version supports FIPS. - privateKeyPassphraseHandler – Class (PrivateKeyPassphraseHandler subclass) that requests the passphrase for accessing the private key. For example: ````, ``KeyFileHandler``, ``test``, ````. -- invalidCertificateHandler – Class (subclass of CertificateHandler) for verifying invalid certificates. For example: `` ConsoleCertificateHandler `` . +- invalidCertificateHandler – Class (a subclass of CertificateHandler) for verifying invalid certificates. For example: `` ConsoleCertificateHandler `` . - disableProtocols – Protocols that are not allowed to use. - preferServerCiphers – Preferred server ciphers on the client. @@ -634,9 +633,9 @@ The default server configuration file `config.xml` contains the following settin ## query_masking_rules Regexp-based rules, which will be applied to queries as well as all log messages before storing them in server logs, -`system.query_log`, `system.text_log`, `system.processes` table, and in logs sent to client. That allows preventing -sensitive data leakage from SQL queries (like names / emails / personal -identifiers / credit card numbers etc) to logs. +`system.query_log`, `system.text_log`, `system.processes` table, and in logs sent to the client. That allows preventing +sensitive data leakage from SQL queries (like names, emails, personal +identifiers or credit card numbers) to logs. **Example** @@ -655,11 +654,11 @@ Config fields: - `regexp` - RE2 compatible regular expression (mandatory) - `replace` - substitution string for sensitive data (optional, by default - six asterisks) -The masking rules are applied on whole query (to prevent leaks of sensitive data from malformed / non parsable queries). +The masking rules are applied to the whole query (to prevent leaks of sensitive data from malformed / non-parsable queries). -`system.events` table have counter `QueryMaskingRulesMatch` which have overall number of query masking rules matches. +`system.events` table have counter `QueryMaskingRulesMatch` which have an overall number of query masking rules matches. -For distributed queries each server have to be configured separately, otherwise subquries passed to other +For distributed queries each server have to be configured separately, otherwise, subqueries passed to other nodes will be stored without masking. ## remote_servers {#server_settings_remote_servers} @@ -682,9 +681,9 @@ For the value of the `incl` attribute, see the section "[Configuration files](.. The server's time zone. -Specified as an IANA identifier for the UTC time zone or geographic location (for example, Africa/Abidjan). +Specified as an IANA identifier for the UTC timezone or geographic location (for example, Africa/Abidjan). -The time zone is necessary for conversions between String and DateTime formats when DateTime fields are output to text format (printed on the screen or in a file), and when getting DateTime from a string. In addition, the time zone is used in functions that work with the time and date if they didn't receive the time zone in the input parameters. +The time zone is necessary for conversions between String and DateTime formats when DateTime fields are output to text format (printed on the screen or in a file), and when getting DateTime from a string. Besides, the time zone is used in functions that work with the time and date if they didn't receive the time zone in the input parameters. **Example** @@ -856,11 +855,11 @@ This setting only applies to the `MergeTree` family. It can be specified: - Globally in the [merge_tree](#server_settings-merge_tree) section of the `config.xml` file. - ClickHouse uses the setting for all the tables on the server. You can change the setting at any time. Existing tables change their behavior when the setting changes. + ClickHouse uses the setting for all the tables on the server. You can change the setting at any time. Existing tables change their behaviour when the setting changes. -- For each individual table. +- For each table. - When creating a table, specify the corresponding [engine setting](../table_engines/mergetree.md#table_engine-mergetree-creating-a-table). The behavior of an existing table with this setting does not change, even if the global setting changes. + When creating a table, specify the corresponding [engine setting](../table_engines/mergetree.md#table_engine-mergetree-creating-a-table). The behaviour of an existing table with this setting does not change, even if the global setting changes. **Possible values** diff --git a/docs/en/operations/settings/constraints_on_settings.md b/docs/en/operations/settings/constraints_on_settings.md index 12419092acb..362272f6fe8 100644 --- a/docs/en/operations/settings/constraints_on_settings.md +++ b/docs/en/operations/settings/constraints_on_settings.md @@ -1,7 +1,7 @@ # Constraints on Settings The constraints on settings can be defined in the `profiles` section of the `user.xml` configuration file and prohibit users from changing some of the settings with the `SET` query. -The constraints are defined as following: +The constraints are defined as the following: ```xml @@ -25,8 +25,8 @@ The constraints are defined as following: ``` -If user tries to violate the constraints an exception is thrown and the setting isn't actually changed. -There are supported three types of constraints: `min`, `max`, `readonly`. The `min` and `max` constraints specify upper and lower boundaries for a numeric setting and can be used in combination. The `readonly` constraint specify that the user cannot change the corresponding setting at all. +If the user tries to violate the constraints an exception is thrown and the setting isn't changed. +There are supported three types of constraints: `min`, `max`, `readonly`. The `min` and `max` constraints specify upper and lower boundaries for a numeric setting and can be used in combination. The `readonly` constraint specifies that the user cannot change the corresponding setting at all. **Example:** Let `users.xml` includes lines: @@ -63,6 +63,6 @@ Code: 452, e.displayText() = DB::Exception: Setting max_memory_usage should not Code: 452, e.displayText() = DB::Exception: Setting force_index_by_date should not be changed. ``` -**Note:** the `default` profile has a special handling: all the constraints defined for the `default` profile become the default constraints, so they restrict all the users until they're overriden explicitly for these users. +**Note:** the `default` profile has special handling: all the constraints defined for the `default` profile become the default constraints, so they restrict all the users until they're overridden explicitly for these users. [Original article](https://clickhouse.tech/docs/en/operations/settings/constraints_on_settings/) diff --git a/docs/en/operations/settings/permissions_for_queries.md b/docs/en/operations/settings/permissions_for_queries.md index e6dcd490e97..0c6e86ae9fa 100644 --- a/docs/en/operations/settings/permissions_for_queries.md +++ b/docs/en/operations/settings/permissions_for_queries.md @@ -1,12 +1,12 @@ -# Permissions for queries {#permissions_for_queries} +# Permissions for Queries {#permissions_for_queries} Queries in ClickHouse can be divided into several types: 1. Read data queries: `SELECT`, `SHOW`, `DESCRIBE`, `EXISTS`. -1. Write data queries: `INSERT`, `OPTIMIZE`. -1. Change settings queries: `SET`, `USE`. -1. [DDL](https://en.wikipedia.org/wiki/Data_definition_language) queries: `CREATE`, `ALTER`, `RENAME`, `ATTACH`, `DETACH`, `DROP` `TRUNCATE`. -1. `KILL QUERY`. +2. Write data queries: `INSERT`, `OPTIMIZE`. +3. Change settings query: `SET`, `USE`. +4. [DDL](https://en.wikipedia.org/wiki/Data_definition_language) queries: `CREATE`, `ALTER`, `RENAME`, `ATTACH`, `DETACH`, `DROP` `TRUNCATE`. +5. `KILL QUERY`. The following settings regulate user permissions by the type of query: @@ -17,7 +17,7 @@ The following settings regulate user permissions by the type of query: ## readonly {#settings_readonly} -Restricts permissions for read data, write data and change settings queries. +Restricts permissions for reading data, write data and change settings queries. See how the queries are divided into types [above](#permissions_for_queries). diff --git a/docs/en/operations/settings/query_complexity.md b/docs/en/operations/settings/query_complexity.md index 5b9db828d03..d66f2198005 100644 --- a/docs/en/operations/settings/query_complexity.md +++ b/docs/en/operations/settings/query_complexity.md @@ -1,10 +1,10 @@ -# Restrictions on query complexity +# Restrictions on Query Complexity Restrictions on query complexity are part of the settings. -They are used in order to provide safer execution from the user interface. +They are used to provide safer execution from the user interface. Almost all the restrictions only apply to `SELECT`. For distributed query processing, restrictions are applied on each server separately. -ClickHouse checks the restrictions for data parts, not for each row. It means that you can exceed the value of restriction with a size of the data part. +ClickHouse checks the restrictions for data parts, not for each row. It means that you can exceed the value of restriction with the size of the data part. Restrictions on the "maximum amount of something" can take the value 0, which means "unrestricted". Most restrictions also have an 'overflow_mode' setting, meaning what to do when the limit is exceeded. @@ -25,7 +25,7 @@ In the default configuration file, the maximum is 10 GB. The setting doesn't consider the volume of available memory or the total volume of memory on the machine. The restriction applies to a single query within a single server. You can use `SHOW PROCESSLIST` to see the current memory consumption for each query. -In addition, the peak memory consumption is tracked for each query and written to the log. +Besides, the peak memory consumption is tracked for each query and written to the log. Memory usage is not monitored for the states of certain aggregate functions. @@ -54,11 +54,11 @@ See also the description of [max_memory_usage](#settings_max_memory_usage). The following restrictions can be checked on each block (instead of on each row). That is, the restrictions can be broken a little. When running a query in multiple threads, the following restrictions apply to each thread separately. -Maximum number of rows that can be read from a table when running a query. +A maximum number of rows that can be read from a table when running a query. ## max_bytes_to_read -Maximum number of bytes (uncompressed data) that can be read from a table when running a query. +A maximum number of bytes (uncompressed data) that can be read from a table when running a query. ## read_overflow_mode @@ -66,7 +66,7 @@ What to do when the volume of data read exceeds one of the limits: 'throw' or 'b ## max_rows_to_group_by {#settings-max_rows_to_group_by} -Maximum number of unique keys received from aggregation. This setting lets you limit memory consumption when aggregating. +A maximum number of unique keys received from aggregation. This setting lets you limit memory consumption when aggregating. ## group_by_overflow_mode @@ -86,11 +86,11 @@ Default value: 0. ## max_rows_to_sort -Maximum number of rows before sorting. This allows you to limit memory consumption when sorting. +A maximum number of rows before sorting. This allows you to limit memory consumption when sorting. ## max_bytes_to_sort -Maximum number of bytes before sorting. +A maximum number of bytes before sorting. ## sort_overflow_mode @@ -142,15 +142,15 @@ Minimal execution speed in rows per second. Checked on every data block when 'ti ## min_execution_speed_bytes -Minimum number of execution bytes per second. Checked on every data block when 'timeout_before_checking_execution_speed' expires. If the execution speed is lower, an exception is thrown. +A minimum number of execution bytes per second. Checked on every data block when 'timeout_before_checking_execution_speed' expires. If the execution speed is lower, an exception is thrown. ## max_execution_speed -Maximum number of execution rows per second. Checked on every data block when 'timeout_before_checking_execution_speed' expires. If the execution speed is high, the execution speed will be reduced. +A maximum number of execution rows per second. Checked on every data block when 'timeout_before_checking_execution_speed' expires. If the execution speed is high, the execution speed will be reduced. ## max_execution_speed_bytes -Maximum number of execution bytes per second. Checked on every data block when 'timeout_before_checking_execution_speed' expires. If the execution speed is high, the execution speed will be reduced. +A maximum number of execution bytes per second. Checked on every data block when 'timeout_before_checking_execution_speed' expires. If the execution speed is high, the execution speed will be reduced. ## timeout_before_checking_execution_speed @@ -158,11 +158,11 @@ Checks that execution speed is not too slow (no less than 'min_execution_speed') ## max_columns_to_read -Maximum number of columns that can be read from a table in a single query. If a query requires reading a greater number of columns, it throws an exception. +A maximum number of columns that can be read from a table in a single query. If a query requires reading a greater number of columns, it throws an exception. ## max_temporary_columns -Maximum number of temporary columns that must be kept in RAM at the same time when running a query, including constant columns. If there are more temporary columns than this, it throws an exception. +A maximum number of temporary columns that must be kept in RAM at the same time when running a query, including constant columns. If there are more temporary columns than this, it throws an exception. ## max_temporary_non_const_columns @@ -184,16 +184,16 @@ At this time, it isn't checked during parsing, but only after parsing the query. ## max_ast_elements -Maximum number of elements in a query syntactic tree. If exceeded, an exception is thrown. +A maximum number of elements in a query syntactic tree. If exceeded, an exception is thrown. In the same way as the previous setting, it is checked only after parsing the query. By default, 50,000. ## max_rows_in_set -Maximum number of rows for a data set in the IN clause created from a subquery. +A maximum number of rows for a data set in the IN clause created from a subquery. ## max_bytes_in_set -Maximum number of bytes (uncompressed data) used by a set in the IN clause created from a subquery. +A maximum number of bytes (uncompressed data) used by a set in the IN clause created from a subquery. ## set_overflow_mode @@ -201,11 +201,11 @@ What to do when the amount of data exceeds one of the limits: 'throw' or 'break' ## max_rows_in_distinct -Maximum number of different rows when using DISTINCT. +A maximum number of different rows when using DISTINCT. ## max_bytes_in_distinct -Maximum number of bytes used by a hash table when using DISTINCT. +A maximum number of bytes used by a hash table when using DISTINCT. ## distinct_overflow_mode @@ -213,11 +213,11 @@ What to do when the amount of data exceeds one of the limits: 'throw' or 'break' ## max_rows_to_transfer -Maximum number of rows that can be passed to a remote server or saved in a temporary table when using GLOBAL IN. +A maximum number of rows that can be passed to a remote server or saved in a temporary table when using GLOBAL IN. ## max_bytes_to_transfer -Maximum number of bytes (uncompressed data) that can be passed to a remote server or saved in a temporary table when using GLOBAL IN. +A maximum number of bytes (uncompressed data) that can be passed to a remote server or saved in a temporary table when using GLOBAL IN. ## transfer_overflow_mode @@ -290,6 +290,6 @@ Default value: 100. When inserting data, ClickHouse calculates the number of partitions in the inserted block. If the number of partitions is more than `max_partitions_per_insert_block`, ClickHouse throws an exception with the following text: -> "Too many partitions for single INSERT block (more than " + toString(max_parts) + "). The limit is controlled by 'max_partitions_per_insert_block' setting. Large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc)." +> "Too many partitions for single INSERT block (more than " + toString(max_parts) + "). The limit is controlled by 'max_partitions_per_insert_block' setting. A large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc)." [Original article](https://clickhouse.tech/docs/en/operations/settings/query_complexity/) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index b10553cbe77..636edc78ff1 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -41,7 +41,7 @@ Consider the following queries: 1. `SELECT count() FROM test_table WHERE date = '2018-10-10'` 2. `SELECT count() FROM (SELECT * FROM test_table) WHERE date = '2018-10-10'` -If `enable_optimize_predicate_expression = 1`, then the execution time of these queries is equal, because ClickHouse applies `WHERE` to the subquery when processing it. +If `enable_optimize_predicate_expression = 1`, then the execution time of these queries is equal because ClickHouse applies `WHERE` to the subquery when processing it. If `enable_optimize_predicate_expression = 0`, then the execution time of the second query is much longer, because the `WHERE` clause applies to all the data after the subquery finishes. @@ -61,7 +61,7 @@ Disables query execution if the index can't be used by date. Works with tables in the MergeTree family. -If `force_index_by_date=1`, ClickHouse checks whether the query has a date key condition that can be used for restricting data ranges. If there is no suitable condition, it throws an exception. However, it does not check whether the condition actually reduces the amount of data to read. For example, the condition `Date != ' 2000-01-01 '` is acceptable even when it matches all the data in the table (i.e., running the query requires a full scan). For more information about ranges of data in MergeTree tables, see [MergeTree](../table_engines/mergetree.md). +If `force_index_by_date=1`, ClickHouse checks whether the query has a date key condition that can be used for restricting data ranges. If there is no suitable condition, it throws an exception. However, it does not check whether the condition reduces the amount of data to read. For example, the condition `Date != ' 2000-01-01 '` is acceptable even when it matches all the data in the table (i.e., running the query requires a full scan). For more information about ranges of data in MergeTree tables, see [MergeTree](../table_engines/mergetree.md). ## force_primary_key @@ -70,7 +70,7 @@ Disables query execution if indexing by the primary key is not possible. Works with tables in the MergeTree family. -If `force_primary_key=1`, ClickHouse checks to see if the query has a primary key condition that can be used for restricting data ranges. If there is no suitable condition, it throws an exception. However, it does not check whether the condition actually reduces the amount of data to read. For more information about data ranges in MergeTree tables, see [MergeTree](../table_engines/mergetree.md). +If `force_primary_key=1`, ClickHouse checks to see if the query has a primary key condition that can be used for restricting data ranges. If there is no suitable condition, it throws an exception. However, it does not check whether the condition reduces the amount of data to read. For more information about data ranges in MergeTree tables, see [MergeTree](../table_engines/mergetree.md). ## format_schema @@ -219,7 +219,7 @@ Ok. ## input_format_values_deduce_templates_of_expressions {#settings-input_format_values_deduce_templates_of_expressions} -Enables or disables template deduction for an SQL expressions in [Values](../../interfaces/formats.md#data-format-values) format. It allows to parse and interpret expressions in `Values` much faster if expressions in consecutive rows have the same structure. ClickHouse will try to deduce template of an expression, parse the following rows using this template and evaluate the expression on batch of successfully parsed rows. For the following query: +Enables or disables template deduction for an SQL expressions in [Values](../../interfaces/formats.md#data-format-values) format. It allows to parse and interpret expressions in `Values` much faster if expressions in consecutive rows have the same structure. ClickHouse will try to deduce template of an expression, parse the following rows using this template and evaluate the expression on a batch of successfully parsed rows. For the following query: ```sql INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), (upper('Values')), ... @@ -240,7 +240,7 @@ This setting is used only when `input_format_values_deduce_templates_of_expressi (..., abs(-1), ...), -- Int64 literal ``` -When this setting is enabled, ClickHouse will check actual type of literal and will use expression template of the corresponding type. In some cases it may significantly slow down expression evaluation in `Values`. +When this setting is enabled, ClickHouse will check the actual type of literal and will use an expression template of the corresponding type. In some cases, it may significantly slow down expression evaluation in `Values`. When disabled, ClickHouse may use more general type for some literals (e.g. `Float64` or `Int64` instead of `UInt64` for `42`), but it may cause overflow and precision issues. Enabled by default. @@ -266,7 +266,7 @@ Disabled by default. ## input_format_null_as_default {#settings-input_format_null_as_default} -Enables or disables using default values if input data contain `NULL`, but data type of corresponding column in not `Nullable(T)` (for text input formats). +Enables or disables using default values if input data contain `NULL`, but data type of the corresponding column in not `Nullable(T)` (for text input formats). ## input_format_skip_unknown_fields {#settings-input_format_skip_unknown_fields} @@ -328,7 +328,7 @@ Default value: 1. ## date_time_input_format {#settings-date_time_input_format} -Allows to choose a parser of text representation of date and time. +Allows choosing a parser of the text representation of date and time. The setting doesn't apply to [date and time functions](../../query_language/functions/date_time_functions.md). @@ -355,7 +355,7 @@ Sets default strictness for [JOIN clauses](../../query_language/select.md#select Possible values: -- `ALL` — If the right table has several matching rows, ClickHouse creates a [Cartesian product](https://en.wikipedia.org/wiki/Cartesian_product) from matching rows. This is the normal `JOIN` behavior from standard SQL. +- `ALL` — If the right table has several matching rows, ClickHouse creates a [Cartesian product](https://en.wikipedia.org/wiki/Cartesian_product) from matching rows. This is the normal `JOIN` behaviour from standard SQL. - `ANY` — If the right table has several matching rows, only the first one found is joined. If the right table has only one matching row, the results of `ANY` and `ALL` are the same. - `ASOF` — For joining sequences with an uncertain match. - `Empty string` — If `ALL` or `ANY` is not specified in the query, ClickHouse throws an exception. @@ -364,7 +364,7 @@ Default value: `ALL`. ## join_any_take_last_row {#settings-join_any_take_last_row} -Changes behavior of join operations with `ANY` strictness. +Changes behaviour of join operations with `ANY` strictness. !!! warning "Attention" This setting applies only for `JOIN` operations with [Join](../table_engines/join.md) engine tables. @@ -395,7 +395,7 @@ Default value: 0. ## max_block_size {#setting-max_block_size} -In ClickHouse, data is processed by blocks (sets of column parts). The internal processing cycles for a single block are efficient enough, but there are noticeable expenditures on each block. The `max_block_size` setting is a recommendation for what size of block (in number of rows) to load from tables. The block size shouldn't be too small, so that the expenditures on each block are still noticeable, but not too large, so that the query with LIMIT that is completed after the first block is processed quickly. The goal is to avoid consuming too much memory when extracting a large number of columns in multiple threads, and to preserve at least some cache locality. +In ClickHouse, data is processed by blocks (sets of column parts). The internal processing cycles for a single block are efficient enough, but there are noticeable expenditures on each block. The `max_block_size` setting is a recommendation for what size of the block (in a count of rows) to load from tables. The block size shouldn't be too small, so that the expenditures on each block are still noticeable, but not too large so that the query with LIMIT that is completed after the first block is processed quickly. The goal is to avoid consuming too much memory when extracting a large number of columns in multiple threads and to preserve at least some cache locality. Default value: 65,536. @@ -429,7 +429,7 @@ Default value: 251658240. ## merge_tree_min_rows_for_seek {#setting-merge_tree_min_rows_for_seek} -If the distance between two data blocks to be read in one file is less than `merge_tree_min_rows_for_seek` rows, then ClickHouse does not seek through the file, but reads the data sequentially. +If the distance between two data blocks to be read in one file is less than `merge_tree_min_rows_for_seek` rows, then ClickHouse does not seek through the file but reads the data sequentially. Possible values: @@ -439,7 +439,7 @@ Default value: 0. ## merge_tree_min_bytes_for_seek {#setting-merge_tree_min_bytes_for_seek} -If the distance between two data blocks to be read in one file is less than `merge_tree_min_bytes_for_seek` bytes, then ClickHouse sequentially reads range of file that contains both blocks, thus avoiding extra seek. +If the distance between two data blocks to be read in one file is less than `merge_tree_min_bytes_for_seek` bytes, then ClickHouse sequentially reads a range of file that contains both blocks, thus avoiding extra seek. Possible values: @@ -450,7 +450,7 @@ Default value: 0. ## merge_tree_coarse_index_granularity {#setting-merge_tree_coarse_index_granularity} -When searching data, ClickHouse checks the data marks in the index file. If ClickHouse finds that required keys are in some range, it divides this range into `merge_tree_coarse_index_granularity` subranges and searches the required keys there recursively. +When searching for data, ClickHouse checks the data marks in the index file. If ClickHouse finds that required keys are in some range, it divides this range into `merge_tree_coarse_index_granularity` subranges and searches the required keys there recursively. Possible values: @@ -529,7 +529,7 @@ The setting also doesn't have a purpose when using INSERT SELECT, since data is Default value: 1,048,576. -The default is slightly more than `max_block_size`. The reason for this is because certain table engines (`*MergeTree`) form a data part on the disk for each inserted block, which is a fairly large entity. Similarly, `*MergeTree` tables sort data during insertion, and a large enough block size allows sorting more data in RAM. +The default is slightly more than `max_block_size`. The reason for this is because certain table engines (`*MergeTree`) form a data part on the disk for each inserted block, which is a fairly large entity. Similarly, `*MergeTree` tables sort data during insertion and a large enough block size allow sorting more data in RAM. ## max_replica_delay_for_distributed_queries {#settings-max_replica_delay_for_distributed_queries} @@ -599,9 +599,9 @@ Default value: 256 KiB. ## interactive_delay -The interval in microseconds for checking whether request execution has been canceled and sending the progress. +The interval in microseconds for checking whether request execution has been cancelled and sending the progress. -Default value: 100,000 (checks for canceling and sends the progress ten times per second). +Default value: 100,000 (checks for cancelling and sends the progress ten times per second). ## connect_timeout, receive_timeout, send_timeout @@ -611,7 +611,7 @@ Default value: 10, 300, 300. ## cancel_http_readonly_queries_on_client_close -Cancels HTTP readonly queries (e.g. SELECT) when a client closes the connection without waiting for response. +Cancels HTTP read-only queries (e.g. SELECT) when a client closes the connection without waiting for the response. Default value: 0 @@ -659,18 +659,18 @@ For more information, see the section "Extreme values". Whether to use a cache of uncompressed blocks. Accepts 0 or 1. By default, 0 (disabled). Using the uncompressed cache (only for tables in the MergeTree family) can significantly reduce latency and increase throughput when working with a large number of short queries. Enable this setting for users who send frequent short requests. Also pay attention to the [uncompressed_cache_size](../server_settings/settings.md#server-settings-uncompressed_cache_size) configuration parameter (only set in the config file) – the size of uncompressed cache blocks. By default, it is 8 GiB. The uncompressed cache is filled in as needed and the least-used data is automatically deleted. -For queries that read at least a somewhat large volume of data (one million rows or more), the uncompressed cache is disabled automatically in order to save space for truly small queries. This means that you can keep the 'use_uncompressed_cache' setting always set to 1. +For queries that read at least a somewhat large volume of data (one million rows or more), the uncompressed cache is disabled automatically to save space for truly small queries. This means that you can keep the 'use_uncompressed_cache' setting always set to 1. ## replace_running_query When using the HTTP interface, the 'query_id' parameter can be passed. This is any string that serves as the query identifier. -If a query from the same user with the same 'query_id' already exists at this time, the behavior depends on the 'replace_running_query' parameter. +If a query from the same user with the same 'query_id' already exists at this time, the behaviour depends on the 'replace_running_query' parameter. `0` (default) – Throw an exception (don't allow the query to run if a query with the same 'query_id' is already running). `1` – Cancel the old query and start running the new one. -Yandex.Metrica uses this parameter set to 1 for implementing suggestions for segmentation conditions. After entering the next character, if the old query hasn't finished yet, it should be canceled. +Yandex.Metrica uses this parameter set to 1 for implementing suggestions for segmentation conditions. After entering the next character, if the old query hasn't finished yet, it should be cancelled. ## stream_flush_interval_ms @@ -699,7 +699,7 @@ ClickHouse supports the following algorithms of choosing replicas: load_balancing = random ``` -The number of errors is counted for each replica. The query is sent to the replica with the fewest errors, and if there are several of these, to any one of them. +The number of errors is counted for each replica. The query is sent to the replica with the fewest errors, and if there are several of these, to anyone of them. Disadvantages: Server proximity is not accounted for; if the replicas have different data, you will also get different data. ### Nearest Hostname {#load_balancing-nearest_hostname} @@ -708,7 +708,7 @@ Disadvantages: Server proximity is not accounted for; if the replicas have diffe load_balancing = nearest_hostname ``` -The number of errors is counted for each replica. Every 5 minutes, the number of errors is integrally divided by 2. Thus, the number of errors is calculated for a recent time with exponential smoothing. If there is one replica with a minimal number of errors (i.e. errors occurred recently on the other replicas), the query is sent to it. If there are multiple replicas with the same minimal number of errors, the query is sent to the replica with a host name that is most similar to the server's host name in the config file (for the number of different characters in identical positions, up to the minimum length of both host names). +The number of errors is counted for each replica. Every 5 minutes, the number of errors is integrally divided by 2. Thus, the number of errors is calculated for a recent time with exponential smoothing. If there is one replica with a minimal number of errors (i.e. errors occurred recently on the other replicas), the query is sent to it. If there are multiple replicas with the same minimal number of errors, the query is sent to the replica with a hostname that is most similar to the server's hostname in the config file (for the number of different characters in identical positions, up to the minimum length of both hostnames). For instance, example01-01-1 and example01-01-2.yandex.ru are different in one position, while example01-01-1 and example01-02-2 differ in two places. This method might seem primitive, but it doesn't require external data about network topology, and it doesn't compare IP addresses, which would be complicated for our IPv6 addresses. @@ -722,7 +722,7 @@ We can also assume that when sending a query to the same server, in the absence load_balancing = in_order ``` -Replicas with the same number of errors are accessed in the same order as they are specified in configuration. +Replicas with the same number of errors are accessed in the same order as they are specified in the configuration. This method is appropriate when you know exactly which replica is preferable. @@ -734,7 +734,7 @@ load_balancing = first_or_random This algorithm chooses the first replica in the set or a random replica if the first is unavailable. It's effective in cross-replication topology setups, but useless in other configurations. -The `first_or_random` algorithm solves the problem of the `in_order` algorithm. With `in_order`, if one replica goes down, the next one gets a double load while the remaining replicas handle the usual amount of traffic. When using the `first_or_random` algorithm, load is evenly distributed among replicas that are still available. +The `first_or_random` algorithm solves the problem of the `in_order` algorithm. With `in_order`, if one replica goes down, the next one gets a double load while the remaining replicas handle the usual amount of traffic. When using the `first_or_random` algorithm, the load is evenly distributed among replicas that are still available. ## prefer_localhost_replica {#settings-prefer_localhost_replica} @@ -770,7 +770,7 @@ Replica lag is not controlled. Enable compilation of queries. By default, 0 (disabled). -Compilation is only used for part of the query-processing pipeline: for the first stage of aggregation (GROUP BY). +The compilation is only used for part of the query-processing pipeline: for the first stage of aggregation (GROUP BY). If this portion of the pipeline was compiled, the query may run faster due to deployment of short cycles and inlining aggregate function calls. The maximum performance improvement (up to four times faster in rare cases) is seen for queries with multiple simple aggregate functions. Typically, the performance gain is insignificant. In very rare cases, it may slow down query execution. ## min_count_to_compile @@ -780,7 +780,7 @@ For testing, the value can be set to 0: compilation runs synchronously and the q If the value is 1 or more, compilation occurs asynchronously in a separate thread. The result will be used as soon as it is ready, including queries that are currently running. Compiled code is required for each different combination of aggregate functions used in the query and the type of keys in the GROUP BY clause. -The results of compilation are saved in the build directory in the form of .so files. There is no restriction on the number of compilation results, since they don't use very much space. Old results will be used after server restarts, except in the case of a server upgrade – in this case, the old results are deleted. +The results of the compilation are saved in the build directory in the form of .so files. There is no restriction on the number of compilation results since they don't use very much space. Old results will be used after server restarts, except in the case of a server upgrade – in this case, the old results are deleted. ## output_format_json_quote_64bit_integers {#session_settings-output_format_json_quote_64bit_integers} @@ -796,15 +796,15 @@ For CSV input format enables or disables parsing of unquoted `NULL` as literal ( ## output_format_csv_crlf_end_of_line {#settings-output_format_csv_crlf_end_of_line} -Use DOS/Windows style line separator (CRLF) in CSV instead of Unix style (LF). +Use DOS/Windows-style line separator (CRLF) in CSV instead of Unix style (LF). ## output_format_tsv_crlf_end_of_line {#settings-output_format_tsv_crlf_end_of_line} -Use DOC/Windows style line separator (CRLF) in TSV instead of Unix style (LF). +Use DOC/Windows-style line separator (CRLF) in TSV instead of Unix style (LF). ## insert_quorum {#settings-insert_quorum} -Enables quorum writes. +Enables the quorum writes. - If `insert_quorum < 2`, the quorum writes are disabled. - If `insert_quorum >= 2`, the quorum writes are enabled. @@ -831,7 +831,7 @@ See also: ## insert_quorum_timeout {#settings-insert_quorum_timeout} -Quorum write timeout in seconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica. +Write to quorum timeout in seconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica. Default value: 60 seconds. @@ -887,10 +887,10 @@ Default value: 0. Usage -By default, deduplication is not performed for materialized views, but is done upstream, in the source table. -If an INSERTed block is skipped due to deduplication in the source table, there will be no insertion into attached materialized views. This behavior exists to enable insertion of highly aggregated data into materialized views, for cases where inserted blocks are the same after materialized view aggregation but derived from different INSERTs into the source table. -At the same time, this behavior "breaks" `INSERT` idempotency. If an `INSERT` into the main table was successful and `INSERT` into a materialized view failed (e.g. because of communication failure with Zookeeper) a client will get an error and can retry the operation. However, the materialized view won't receive the second insert because it will be discarded by deduplication in the main (source) table. The setting `deduplicate_blocks_in_dependent_materialized_views` allows to change this behavior. On retry a materialized view will receive the repeat insert and will perform deduplication check by itself, -ignoring check result for the source table, and will insert rows lost because of first failure. +By default, deduplication is not performed for materialized views but is done upstream, in the source table. +If an INSERTed block is skipped due to deduplication in the source table, there will be no insertion into attached materialized views. This behaviour exists to enable insertion of highly aggregated data into materialized views, for cases where inserted blocks are the same after materialized view aggregation but derived from different INSERTs into the source table. +At the same time, this behaviour "breaks" `INSERT` idempotency. If an `INSERT` into the main table was successful and `INSERT` into a materialized view failed (e.g. because of communication failure with Zookeeper) a client will get an error and can retry the operation. However, the materialized view won't receive the second insert because it will be discarded by deduplication in the main (source) table. The setting `deduplicate_blocks_in_dependent_materialized_views` allows for changing this behaviour. On retry, a materialized view will receive the repeat insert and will perform deduplication check by itself, +ignoring check result for the source table, and will insert rows lost because of the first failure. ## max_network_bytes {#settings-max_network_bytes} Limits the data volume (in bytes) that is received or transmitted over the network when executing a query. This setting applies to every individual query. @@ -981,7 +981,7 @@ Default value: 0. ## optimize_skip_unused_shards {#settings-optimize_skip_unused_shards} -Enables or disables skipping of unused shards for SELECT queries that has sharding key condition in PREWHERE/WHERE (assumes that the data is distributed by sharding key, otherwise do nothing). +Enables or disables skipping of unused shards for SELECT queries that have sharding key condition in PREWHERE/WHERE (assumes that the data is distributed by sharding key, otherwise do nothing). Default value: 0 @@ -1016,7 +1016,7 @@ Default value: 0. - Type: seconds - Default value: 60 seconds -Controls how fast errors in distributed tables are zeroed. If a replica is unavailabe for some time, accumulates 5 errors, and distributed_replica_error_half_life is set to 1 second, then the replica is considered normal 3 seconds after last error. +Controls how fast errors in distributed tables are zeroed. If a replica is unavailable for some time, accumulates 5 errors, and distributed_replica_error_half_life is set to 1 second, then the replica is considered normal 3 seconds after last error. See also: @@ -1041,7 +1041,7 @@ Base interval for the [Distributed](../table_engines/distributed.md) table engin Possible values: -- Positive integer number of milliseconds. +- A positive integer number of milliseconds. Default value: 100 milliseconds. @@ -1051,7 +1051,7 @@ Maximum interval for the [Distributed](../table_engines/distributed.md) table en Possible values: -- Positive integer number of milliseconds. +- A positive integer number of milliseconds. Default value: 30000 milliseconds (30 seconds). @@ -1059,7 +1059,7 @@ Default value: 30000 milliseconds (30 seconds). Enables/disables sending of inserted data in batches. -When batch sending is enabled, the [Distributed](../table_engines/distributed.md) table engine tries to send multiple files of inserted data in one operation instead of sending them separately. Batch sending improves cluster performance by better utilizing server and network resources. +When batch sending is enabled, the [Distributed](../table_engines/distributed.md) table engine tries to send multiple files of inserted data in one operation instead of sending them separately. Batch sending improves cluster performance by better-utilizing server and network resources. Possible values: @@ -1079,7 +1079,7 @@ Possible values: - You can set values in the range `[-20, 19]`. -Lower values mean higher priority. Threads with low `nice` priority values are executed more frequently than threads with high values. High values are preferable for long running non-interactive queries because it allows them to quickly give up resources in favor of short interactive queries when they arrive. +Lower values mean higher priority. Threads with low `nice` priority values are executed more frequently than threads with high values. High values are preferable for long-running non-interactive queries because it allows them to quickly give up resources in favour of short interactive queries when they arrive. Default value: 0. @@ -1112,11 +1112,11 @@ Sets the period for a CPU clock timer of the [query profiler](../../operations/p Possible values: -- Positive integer number of nanoseconds. +- A positive integer number of nanoseconds. Recommended values: - - 10000000 (100 times a second) nanosecods and more for for single queries. + - 10000000 (100 times a second) nanoseconds and more for single queries. - 1000000000 (once a second) for cluster-wide profiling. - 0 for turning off the timer. diff --git a/docs/en/operations/settings/settings_profiles.md b/docs/en/operations/settings/settings_profiles.md index 604206bf9da..21e1747225a 100644 --- a/docs/en/operations/settings/settings_profiles.md +++ b/docs/en/operations/settings/settings_profiles.md @@ -1,5 +1,4 @@ - -# Settings profiles +# Settings Profiles A settings profile is a collection of settings grouped under the same name. Each ClickHouse user has a profile. To apply all the settings in a profile, set the `profile` setting. diff --git a/docs/en/operations/settings/settings_users.md b/docs/en/operations/settings/settings_users.md index c30786759e6..2347be357c2 100644 --- a/docs/en/operations/settings/settings_users.md +++ b/docs/en/operations/settings/settings_users.md @@ -1,4 +1,4 @@ -# User settings +# User Settings The `users` section of the `user.xml` configuration file contains user settings. diff --git a/docs/tools/build.py b/docs/tools/build.py index 6ee7dae83e3..ac135e27690 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -87,7 +87,9 @@ def build_for_lang(lang, args): else: site_dir = os.path.join(args.docs_output_dir, lang) - plugins = ['macros', 'search'] + plugins = ['search'] + if not args.no_docs_macros: + plugins.append('macros') if args.htmlproofer: plugins.append('htmlproofer') @@ -312,6 +314,7 @@ if __name__ == '__main__': arg_parser.add_argument('--skip-website', action='store_true') arg_parser.add_argument('--minify', action='store_true') arg_parser.add_argument('--htmlproofer', action='store_true') + arg_parser.add_argument('--no-docs-macros', action='store_true') arg_parser.add_argument('--save-raw-single-page', type=str) arg_parser.add_argument('--livereload', type=int, default='0') arg_parser.add_argument('--verbose', action='store_true') diff --git a/docs/tools/github.py b/docs/tools/github.py index c6ff7f19350..4a02981b4b3 100644 --- a/docs/tools/github.py +++ b/docs/tools/github.py @@ -6,6 +6,7 @@ import os import sys import tarfile +import jinja2 import requests import util @@ -51,7 +52,11 @@ def process_release(args, callback, release): args.version_prefix = name args.is_stable_release = True args.docs_dir = os.path.join(base_dir, os.listdir(base_dir)[0], 'docs') - callback(args) + try: + callback(args) + except jinja2.exceptions.TemplateSyntaxError: + args.no_docs_macros = True + callback(args) def build_releases(args, callback): From c4553f2f7b899b148ff3a745662b50aafed68b08 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 19 Mar 2020 11:36:37 +0300 Subject: [PATCH 094/115] Reuse correct function --- .../getDictionaryConfigurationFromAST.cpp | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index daa3b1a272d..2fa7a98cbd3 100644 --- a/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -1,6 +1,7 @@ #include -#include +#include +#include #include #include #include @@ -32,16 +33,16 @@ namespace using NamesToTypeNames = std::unordered_map; /// Get value from field and convert it to string. /// Also remove quotes from strings. -String getUnescapedFieldString(const Field & field) +String getUnquotedFieldString(const Field & field) { String string = applyVisitor(FieldVisitorToString(), field); - - if (!string.empty() && string.front() == '\'' && string.back() == '\'') - string = string.substr(1, string.size() - 2); - - /// Escaping will be performed on dictionary providers side - boost::replace_all(string, "\\'", "'"); - boost::replace_all(string, "\\\\", "\\"); + if (string.front() == '\'') + { + String result; + ReadBufferFromString buf(string); + readQuotedString(result, buf); + return result; + } return string; } @@ -189,7 +190,7 @@ void buildSingleAttribute( AutoPtr null_value_element(doc->createElement("null_value")); String null_value_str; if (dict_attr->default_value) - null_value_str = getUnescapedFieldString(dict_attr->default_value->as()->value); + null_value_str = getUnquotedFieldString(dict_attr->default_value->as()->value); AutoPtr null_value(doc->createTextNode(null_value_str)); null_value_element->appendChild(null_value); attribute_element->appendChild(null_value_element); @@ -203,7 +204,7 @@ void buildSingleAttribute( if (const auto * literal = dict_attr->expression->as(); literal && literal->value.getType() == Field::Types::String) { - expression_str = getUnescapedFieldString(literal->value); + expression_str = getUnquotedFieldString(literal->value); } else expression_str = queryToString(dict_attr->expression); @@ -352,7 +353,7 @@ void buildConfigurationFromFunctionWithKeyValueArguments( } else if (auto literal = pair->second->as(); literal) { - AutoPtr value(doc->createTextNode(getUnescapedFieldString(literal->value))); + AutoPtr value(doc->createTextNode(getUnquotedFieldString(literal->value))); current_xml_element->appendChild(value); } else if (auto list = pair->second->as(); list) From 571d0d541ccf2b6c00c8b43272e21eb3fb0eaf68 Mon Sep 17 00:00:00 2001 From: Ivan <5627721+abyss7@users.noreply.github.com> Date: Thu, 19 Mar 2020 13:38:34 +0300 Subject: [PATCH 095/115] Improve CCTZ contrib (#9687) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * common/Types.h → common/types.h Also split Core/Defines.h and merge with common/likely.h * Improve cctz contrib * Fix ALWAYS_INLINE and unbundled build * Update Dockerfile from master * Fix test for unbundled library --- CMakeLists.txt | 1 - base/common/CMakeLists.txt | 11 +-- base/common/DateLUT.cpp | 7 +- base/common/DateLUT.h | 19 ++-- base/common/DateLUTImpl.cpp | 17 +--- base/common/DateLUTImpl.h | 5 +- base/common/DayNum.h | 2 +- base/common/JSON.h | 2 +- base/common/LineReader.h | 2 +- base/common/StringRef.h | 2 +- base/common/defines.h | 87 +++++++++++++++++++ base/common/itoa.h | 1 - base/common/likely.h | 15 ---- base/common/memory.h | 28 ++---- base/common/shift10.cpp | 2 +- base/common/shift10.h | 2 +- base/common/terminalColors.h | 2 +- base/common/{Types.h => types.h} | 0 base/daemon/BaseDaemon.h | 2 +- base/mysqlxx/Value.h | 2 +- cmake/find/cctz.cmake | 23 ----- contrib/CMakeLists.txt | 4 +- contrib/cctz-cmake/CMakeLists.txt | 69 +++++++++------ .../AggregateFunctionGroupArray.h | 1 - dbms/src/AggregateFunctions/QuantileTiming.h | 1 - .../src/AggregateFunctions/ReservoirSampler.h | 2 +- .../ReservoirSamplerDeterministic.h | 2 +- dbms/src/AggregateFunctions/UniquesHashSet.h | 2 +- dbms/src/Common/Arena.h | 1 - dbms/src/Common/ConcurrentBoundedQueue.h | 2 +- dbms/src/Common/CounterInFile.h | 2 +- dbms/src/Common/CurrentThread.cpp | 1 - dbms/src/Common/CurrentThread.h | 2 - dbms/src/Common/HashTable/HashTable.h | 2 - dbms/src/Common/HyperLogLogCounter.h | 2 +- dbms/src/Common/MemoryTracker.cpp | 1 - dbms/src/Common/MemoryTracker.h | 2 +- dbms/src/Common/PODArray.h | 1 - dbms/src/Common/PoolWithFailoverBase.h | 2 +- dbms/src/Common/SimpleIncrement.h | 2 +- dbms/src/Common/SipHash.h | 2 +- dbms/src/Common/SmallObjectPool.h | 1 - dbms/src/Common/Stopwatch.h | 2 +- dbms/src/Common/ZooKeeper/Types.h | 2 +- dbms/src/Common/formatIPv6.h | 2 +- dbms/src/Common/hex.h | 2 +- dbms/src/Common/isLocalAddress.h | 2 +- dbms/src/Common/new_delete.cpp | 6 +- dbms/src/Common/parseAddress.h | 2 +- dbms/src/Common/tests/gtest_rw_lock.cpp | 2 +- .../src/Compression/LZ4_decompress_faster.cpp | 3 +- dbms/src/Core/Defines.h | 76 +--------------- dbms/src/Core/Types.h | 2 +- dbms/src/DataStreams/SizeLimits.h | 2 +- dbms/src/DataTypes/DataTypeDecimalBase.h | 1 - .../Dictionaries/DictionarySourceHelpers.h | 2 +- .../Embedded/GeodataProviders/Types.h | 2 +- .../Dictionaries/Embedded/RegionsHierarchy.h | 2 +- dbms/src/Dictionaries/Embedded/RegionsNames.h | 2 +- dbms/src/Functions/DivisionUtils.h | 1 - .../Functions/FunctionsConsistentHashing.h | 1 - dbms/src/IO/MemoryReadWriteBuffer.cpp | 1 - dbms/src/IO/Progress.h | 2 +- dbms/src/IO/WriteIntText.h | 1 - dbms/src/IO/readFloatText.h | 1 - dbms/src/IO/tests/parse_int_perf.cpp | 2 +- dbms/src/Processors/Port.h | 1 - docker/test/stateless/Dockerfile | 2 +- utils/compressor/decompress_perf.cpp | 3 +- utils/compressor/mutator.cpp | 3 +- utils/fill-factor/main.cpp | 2 +- 71 files changed, 203 insertions(+), 265 deletions(-) create mode 100644 base/common/defines.h delete mode 100644 base/common/likely.h rename base/common/{Types.h => types.h} (100%) delete mode 100644 cmake/find/cctz.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 16993fdd9c7..3c787401f22 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -354,7 +354,6 @@ endif () # Need to process before "contrib" dir: include (cmake/find/jemalloc.cmake) -include (cmake/find/cctz.cmake) include (cmake/find/mysqlclient.cmake) # When testing for memory leaks with Valgrind, don't link tcmalloc or jemalloc. diff --git a/base/common/CMakeLists.txt b/base/common/CMakeLists.txt index 41e99768953..7d332959617 100644 --- a/base/common/CMakeLists.txt +++ b/base/common/CMakeLists.txt @@ -41,10 +41,6 @@ endif () target_include_directories(common PUBLIC .. ${CMAKE_CURRENT_BINARY_DIR}/..) -if(CCTZ_INCLUDE_DIR) - target_include_directories(common BEFORE PRIVATE ${CCTZ_INCLUDE_DIR}) -endif() - if (NOT USE_INTERNAL_BOOST_LIBRARY) target_include_directories (common SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS}) endif () @@ -53,10 +49,6 @@ if(NOT USE_INTERNAL_POCO_LIBRARY) target_include_directories (common SYSTEM BEFORE PUBLIC ${Poco_Foundation_INCLUDE_DIR}) endif() -if(CCTZ_LIBRARY) - target_link_libraries(common PRIVATE ${CCTZ_LIBRARY}) -endif() - # allow explicitly fallback to readline if (NOT ENABLE_REPLXX AND ENABLE_READLINE) message (STATUS "Attempt to fallback to readline explicitly") @@ -88,6 +80,9 @@ target_link_libraries (common ${Boost_SYSTEM_LIBRARY} FastMemcpy replxx + + PRIVATE + cctz ) if (ENABLE_TESTS) diff --git a/base/common/DateLUT.cpp b/base/common/DateLUT.cpp index 8db1458d00f..750900493aa 100644 --- a/base/common/DateLUT.cpp +++ b/base/common/DateLUT.cpp @@ -1,9 +1,10 @@ -#include +#include "DateLUT.h" -#include +#include #include #include -#include + +#include #include diff --git a/base/common/DateLUT.h b/base/common/DateLUT.h index 3cb935bc553..93c6cb403e2 100644 --- a/base/common/DateLUT.h +++ b/base/common/DateLUT.h @@ -1,20 +1,15 @@ #pragma once #include "DateLUTImpl.h" -#include -#include -#include -#include + +#include "defines.h" + #include -// Also defined in Core/Defines.h -#if !defined(ALWAYS_INLINE) -#if defined(_MSC_VER) - #define ALWAYS_INLINE __forceinline -#else - #define ALWAYS_INLINE __attribute__((__always_inline__)) -#endif -#endif +#include +#include +#include +#include /// This class provides lazy initialization and lookup of singleton DateLUTImpl objects for a given timezone. diff --git a/base/common/DateLUTImpl.cpp b/base/common/DateLUTImpl.cpp index 080f8fb6395..d7ab0046992 100644 --- a/base/common/DateLUTImpl.cpp +++ b/base/common/DateLUTImpl.cpp @@ -1,23 +1,14 @@ -#if __has_include() -#include // bundled, debian -#else -#include // freebsd -#endif +#include "DateLUTImpl.h" -#if __has_include() +#include #include -#else -#include -#endif - -#include #include -#include +#include #include #include -#include #include +#include #define DATE_LUT_MIN 0 diff --git a/base/common/DateLUTImpl.h b/base/common/DateLUTImpl.h index 2f2e431f950..d9d27c56ee3 100644 --- a/base/common/DateLUTImpl.h +++ b/base/common/DateLUTImpl.h @@ -1,8 +1,9 @@ #pragma once -#include "Types.h" #include "DayNum.h" -#include "likely.h" +#include "defines.h" +#include "types.h" + #include #include diff --git a/base/common/DayNum.h b/base/common/DayNum.h index 904a9281d64..a4ef0c43b69 100644 --- a/base/common/DayNum.h +++ b/base/common/DayNum.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include /** Represents number of days since 1970-01-01. diff --git a/base/common/JSON.h b/base/common/JSON.h index 5f3d9325626..7039036eeb3 100644 --- a/base/common/JSON.h +++ b/base/common/JSON.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include /** Очень простой класс для чтения JSON (или его кусочков). diff --git a/base/common/LineReader.h b/base/common/LineReader.h index 06f737a860b..044c44b2dcb 100644 --- a/base/common/LineReader.h +++ b/base/common/LineReader.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include diff --git a/base/common/StringRef.h b/base/common/StringRef.h index 54010f15085..961aab58980 100644 --- a/base/common/StringRef.h +++ b/base/common/StringRef.h @@ -5,7 +5,7 @@ #include #include -#include +#include #include #include diff --git a/base/common/defines.h b/base/common/defines.h new file mode 100644 index 00000000000..af5981023ff --- /dev/null +++ b/base/common/defines.h @@ -0,0 +1,87 @@ +#pragma once + +#if defined(_MSC_VER) +# if !defined(likely) +# define likely(x) (x) +# endif +# if !defined(unlikely) +# define unlikely(x) (x) +# endif +#else +# if !defined(likely) +# define likely(x) (__builtin_expect(!!(x), 1)) +# endif +# if !defined(unlikely) +# define unlikely(x) (__builtin_expect(!!(x), 0)) +# endif +#endif + +#if defined(_MSC_VER) +# define ALWAYS_INLINE __forceinline +# define NO_INLINE static __declspec(noinline) +# define MAY_ALIAS +#else +# define ALWAYS_INLINE __attribute__((__always_inline__)) +# define NO_INLINE __attribute__((__noinline__)) +# define MAY_ALIAS __attribute__((__may_alias__)) +#endif + +#if !defined(__x86_64__) && !defined(__aarch64__) && !defined(__PPC__) +# error "The only supported platforms are x86_64 and AArch64, PowerPC (work in progress)" +#endif + +/// Check for presence of address sanitizer +#if !defined(ADDRESS_SANITIZER) +# if defined(__has_feature) +# if __has_feature(address_sanitizer) +# define ADDRESS_SANITIZER 1 +# endif +# elif defined(__SANITIZE_ADDRESS__) +# define ADDRESS_SANITIZER 1 +# endif +#endif + +#if !defined(THREAD_SANITIZER) +# if defined(__has_feature) +# if __has_feature(thread_sanitizer) +# define THREAD_SANITIZER 1 +# endif +# elif defined(__SANITIZE_THREAD__) +# define THREAD_SANITIZER 1 +# endif +#endif + +#if !defined(MEMORY_SANITIZER) +# if defined(__has_feature) +# if __has_feature(memory_sanitizer) +# define MEMORY_SANITIZER 1 +# endif +# elif defined(__MEMORY_SANITIZER__) +# define MEMORY_SANITIZER 1 +# endif +#endif + +/// TODO: Strange enough, there is no way to detect UB sanitizer. + +/// Explicitly allow undefined behaviour for certain functions. Use it as a function attribute. +/// It is useful in case when compiler cannot see (and exploit) it, but UBSan can. +/// Example: multiplication of signed integers with possibility of overflow when both sides are from user input. +#if defined(__clang__) +# define NO_SANITIZE_UNDEFINED __attribute__((__no_sanitize__("undefined"))) +# define NO_SANITIZE_ADDRESS __attribute__((__no_sanitize__("address"))) +# define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread"))) +#else /// It does not work in GCC. GCC 7 cannot recognize this attribute and GCC 8 simply ignores it. +# define NO_SANITIZE_UNDEFINED +# define NO_SANITIZE_ADDRESS +# define NO_SANITIZE_THREAD +#endif + +#if defined __GNUC__ && !defined __clang__ +# define OPTIMIZE(x) __attribute__((__optimize__(x))) +#else +# define OPTIMIZE(x) +#endif + +/// A macro for suppressing warnings about unused variables or function results. +/// Useful for structured bindings which have no standard way to declare this. +#define UNUSED(...) (void)(__VA_ARGS__) diff --git a/base/common/itoa.h b/base/common/itoa.h index a29befd9c6d..5d660ca4378 100644 --- a/base/common/itoa.h +++ b/base/common/itoa.h @@ -30,7 +30,6 @@ #include #include #include -#include "likely.h" using int128_t = __int128; using uint128_t = unsigned __int128; diff --git a/base/common/likely.h b/base/common/likely.h deleted file mode 100644 index 338498af35f..00000000000 --- a/base/common/likely.h +++ /dev/null @@ -1,15 +0,0 @@ -#if defined(_MSC_VER) -# if !defined(likely) -# define likely(x) (x) -# endif -# if !defined(unlikely) -# define unlikely(x) (x) -# endif -#else -# if !defined(likely) -# define likely(x) (__builtin_expect(!!(x), 1)) -# endif -# if !defined(unlikely) -# define unlikely(x) (__builtin_expect(!!(x), 0)) -# endif -#endif diff --git a/base/common/memory.h b/base/common/memory.h index ab96cb593b9..e2bd8d618da 100644 --- a/base/common/memory.h +++ b/base/common/memory.h @@ -1,7 +1,7 @@ #pragma once #include -#include "likely.h" +#include "defines.h" #if __has_include() #include @@ -19,27 +19,11 @@ #include #endif -// Also defined in Core/Defines.h -#if !defined(ALWAYS_INLINE) -#if defined(_MSC_VER) - #define ALWAYS_INLINE inline __forceinline -#else - #define ALWAYS_INLINE inline __attribute__((__always_inline__)) -#endif -#endif - -#if !defined(NO_INLINE) -#if defined(_MSC_VER) - #define NO_INLINE static __declspec(noinline) -#else - #define NO_INLINE __attribute__((__noinline__)) -#endif -#endif namespace Memory { -ALWAYS_INLINE void * newImpl(std::size_t size) +inline ALWAYS_INLINE void * newImpl(std::size_t size) { auto * ptr = malloc(size); if (likely(ptr != nullptr)) @@ -49,19 +33,19 @@ ALWAYS_INLINE void * newImpl(std::size_t size) throw std::bad_alloc{}; } -ALWAYS_INLINE void * newNoExept(std::size_t size) noexcept +inline ALWAYS_INLINE void * newNoExept(std::size_t size) noexcept { return malloc(size); } -ALWAYS_INLINE void deleteImpl(void * ptr) noexcept +inline ALWAYS_INLINE void deleteImpl(void * ptr) noexcept { free(ptr); } #if USE_JEMALLOC -ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size) noexcept +inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size) noexcept { if (unlikely(ptr == nullptr)) return; @@ -71,7 +55,7 @@ ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size) noexcept #else -ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size [[maybe_unused]]) noexcept +inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size [[maybe_unused]]) noexcept { free(ptr); } diff --git a/base/common/shift10.cpp b/base/common/shift10.cpp index 45f5733bd76..341ced6aa81 100644 --- a/base/common/shift10.cpp +++ b/base/common/shift10.cpp @@ -1,6 +1,6 @@ #include -#include +#include "defines.h" #include diff --git a/base/common/shift10.h b/base/common/shift10.h index 68c18f34450..c50121cfb27 100644 --- a/base/common/shift10.h +++ b/base/common/shift10.h @@ -1,6 +1,6 @@ #pragma once -#include +#include /** Almost the same as x = x * exp10(exponent), but gives more accurate result. * Example: diff --git a/base/common/terminalColors.h b/base/common/terminalColors.h index 0c33b23752b..a1ba85dc8cd 100644 --- a/base/common/terminalColors.h +++ b/base/common/terminalColors.h @@ -1,5 +1,5 @@ #include -#include +#include /** Set color in terminal based on 64-bit hash value. diff --git a/base/common/Types.h b/base/common/types.h similarity index 100% rename from base/common/Types.h rename to base/common/types.h diff --git a/base/daemon/BaseDaemon.h b/base/daemon/BaseDaemon.h index 881c711d1df..f55dbb2a549 100644 --- a/base/daemon/BaseDaemon.h +++ b/base/daemon/BaseDaemon.h @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/base/mysqlxx/Value.h b/base/mysqlxx/Value.h index 2b3465d52d1..9fdb33a442d 100644 --- a/base/mysqlxx/Value.h +++ b/base/mysqlxx/Value.h @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include diff --git a/cmake/find/cctz.cmake b/cmake/find/cctz.cmake deleted file mode 100644 index aae8078512d..00000000000 --- a/cmake/find/cctz.cmake +++ /dev/null @@ -1,23 +0,0 @@ -option (USE_INTERNAL_CCTZ_LIBRARY "Set to FALSE to use system cctz library instead of bundled" ${NOT_UNBUNDLED}) - -if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cctz/include/cctz/time_zone.h") - if(USE_INTERNAL_CCTZ_LIBRARY) - message(WARNING "submodule contrib/cctz is missing. to fix try run: \n git submodule update --init --recursive") - endif() - set(USE_INTERNAL_CCTZ_LIBRARY 0) - set(MISSING_INTERNAL_CCTZ_LIBRARY 1) -endif() - -if (NOT USE_INTERNAL_CCTZ_LIBRARY) - find_library (CCTZ_LIBRARY cctz) - find_path (CCTZ_INCLUDE_DIR NAMES cctz/civil_time.h civil_time.h PATHS ${CCTZ_INCLUDE_PATHS}) -endif () - -if (CCTZ_LIBRARY AND CCTZ_INCLUDE_DIR) -elseif (NOT MISSING_INTERNAL_CCTZ_LIBRARY) - set (USE_INTERNAL_CCTZ_LIBRARY 1) - set (CCTZ_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/cctz/include") - set (CCTZ_LIBRARY cctz) -endif () - -message (STATUS "Using cctz: ${CCTZ_INCLUDE_DIR} : ${CCTZ_LIBRARY}") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index bc9c2528fb0..5dab20de6d3 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -75,9 +75,7 @@ if (USE_INTERNAL_ZLIB_LIBRARY) endif () endif () -if (USE_INTERNAL_CCTZ_LIBRARY) - add_subdirectory (cctz-cmake) -endif () +add_subdirectory (cctz-cmake) if (ENABLE_JEMALLOC AND USE_INTERNAL_JEMALLOC_LIBRARY) add_subdirectory (jemalloc-cmake) diff --git a/contrib/cctz-cmake/CMakeLists.txt b/contrib/cctz-cmake/CMakeLists.txt index 9c2f6d9a658..5770b1ef09c 100644 --- a/contrib/cctz-cmake/CMakeLists.txt +++ b/contrib/cctz-cmake/CMakeLists.txt @@ -1,31 +1,50 @@ -SET(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/cctz) +option (USE_INTERNAL_CCTZ "Use internal cctz library" ${NOT_UNBUNDLED}) -add_library(cctz - ${LIBRARY_DIR}/src/civil_time_detail.cc - ${LIBRARY_DIR}/src/time_zone_fixed.cc - ${LIBRARY_DIR}/src/time_zone_format.cc - ${LIBRARY_DIR}/src/time_zone_if.cc - ${LIBRARY_DIR}/src/time_zone_impl.cc - ${LIBRARY_DIR}/src/time_zone_info.cc - ${LIBRARY_DIR}/src/time_zone_libc.cc - ${LIBRARY_DIR}/src/time_zone_lookup.cc - ${LIBRARY_DIR}/src/time_zone_posix.cc - ${LIBRARY_DIR}/src/zone_info_source.cc +if (USE_INTERNAL_CCTZ) + SET(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/cctz) - ${LIBRARY_DIR}/src/time_zone_libc.h - ${LIBRARY_DIR}/src/time_zone_if.h - ${LIBRARY_DIR}/src/tzfile.h - ${LIBRARY_DIR}/src/time_zone_impl.h - ${LIBRARY_DIR}/src/time_zone_posix.h - ${LIBRARY_DIR}/src/time_zone_info.h + SET (SRCS + ${LIBRARY_DIR}/src/civil_time_detail.cc + ${LIBRARY_DIR}/src/time_zone_fixed.cc + ${LIBRARY_DIR}/src/time_zone_format.cc + ${LIBRARY_DIR}/src/time_zone_if.cc + ${LIBRARY_DIR}/src/time_zone_impl.cc + ${LIBRARY_DIR}/src/time_zone_info.cc + ${LIBRARY_DIR}/src/time_zone_libc.cc + ${LIBRARY_DIR}/src/time_zone_lookup.cc + ${LIBRARY_DIR}/src/time_zone_posix.cc + ${LIBRARY_DIR}/src/zone_info_source.cc + ) - ${LIBRARY_DIR}/include/cctz/time_zone.h - ${LIBRARY_DIR}/include/cctz/civil_time_detail.h - ${LIBRARY_DIR}/include/cctz/civil_time.h) + add_library (cctz ${SRCS}) + target_include_directories (cctz PUBLIC ${LIBRARY_DIR}/include) -if (CMAKE_SYSTEM MATCHES "FreeBSD") - # yes, need linux, because bsd check inside linux in time_zone_libc.cc:24 - target_compile_definitions (cctz PRIVATE __USE_BSD linux _XOPEN_SOURCE=600) + if (OS_FREEBSD) + # yes, need linux, because bsd check inside linux in time_zone_libc.cc:24 + target_compile_definitions (cctz PRIVATE __USE_BSD linux _XOPEN_SOURCE=600) + endif () +else () + find_library (LIBRARY_CCTZ cctz) + find_path (INCLUDE_CCTZ NAMES cctz/civil_time.h) + + add_library (cctz UNKNOWN IMPORTED) + set_property (TARGET cctz PROPERTY IMPORTED_LOCATION ${LIBRARY_CCTZ}) + set_property (TARGET cctz PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_CCTZ}) + + set(CMAKE_REQUIRED_LIBRARIES cctz) + check_cxx_source_compiles( + " + #include + int main() { + cctz::civil_day date; + } + " + EXTERNAL_CCTZ_WORKS + ) + + if (NOT EXTERNAL_CCTZ_WORKS) + message (FATAL_ERROR "cctz is unusable: ${LIBRARY_CCTZ} ${INCLUDE_CCTZ}") + endif () endif () -target_include_directories (cctz PUBLIC ${LIBRARY_DIR}/include) +message (STATUS "Using cctz") diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h b/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h index afee81a8d66..2d345cff1f7 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h @@ -17,7 +17,6 @@ #include #include -#include #define AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ARRAY_SIZE 0xFFFFFF diff --git a/dbms/src/AggregateFunctions/QuantileTiming.h b/dbms/src/AggregateFunctions/QuantileTiming.h index 8dde3dd5383..d7f425ee2d7 100644 --- a/dbms/src/AggregateFunctions/QuantileTiming.h +++ b/dbms/src/AggregateFunctions/QuantileTiming.h @@ -6,7 +6,6 @@ #include #include #include -#include namespace DB diff --git a/dbms/src/AggregateFunctions/ReservoirSampler.h b/dbms/src/AggregateFunctions/ReservoirSampler.h index 648707ae940..b61027ce692 100644 --- a/dbms/src/AggregateFunctions/ReservoirSampler.h +++ b/dbms/src/AggregateFunctions/ReservoirSampler.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/dbms/src/AggregateFunctions/ReservoirSamplerDeterministic.h b/dbms/src/AggregateFunctions/ReservoirSamplerDeterministic.h index 52d0181fce1..a520b8236b7 100644 --- a/dbms/src/AggregateFunctions/ReservoirSamplerDeterministic.h +++ b/dbms/src/AggregateFunctions/ReservoirSamplerDeterministic.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/dbms/src/AggregateFunctions/UniquesHashSet.h b/dbms/src/AggregateFunctions/UniquesHashSet.h index d1df7b0df0d..13c59cd5225 100644 --- a/dbms/src/AggregateFunctions/UniquesHashSet.h +++ b/dbms/src/AggregateFunctions/UniquesHashSet.h @@ -2,7 +2,7 @@ #include -#include +#include #include #include diff --git a/dbms/src/Common/Arena.h b/dbms/src/Common/Arena.h index feee0de0f76..e1556ef73c5 100644 --- a/dbms/src/Common/Arena.h +++ b/dbms/src/Common/Arena.h @@ -4,7 +4,6 @@ #include #include #include -#include #if __has_include() # include #endif diff --git a/dbms/src/Common/ConcurrentBoundedQueue.h b/dbms/src/Common/ConcurrentBoundedQueue.h index 69034c512a0..b888d68a286 100644 --- a/dbms/src/Common/ConcurrentBoundedQueue.h +++ b/dbms/src/Common/ConcurrentBoundedQueue.h @@ -6,7 +6,7 @@ #include #include -#include +#include namespace detail diff --git a/dbms/src/Common/CounterInFile.h b/dbms/src/Common/CounterInFile.h index 1f483ff5f40..3ede8349a17 100644 --- a/dbms/src/Common/CounterInFile.h +++ b/dbms/src/Common/CounterInFile.h @@ -16,7 +16,7 @@ #include #include -#include +#include namespace DB diff --git a/dbms/src/Common/CurrentThread.cpp b/dbms/src/Common/CurrentThread.cpp index ba2f28a8fe9..884cf2bfa9c 100644 --- a/dbms/src/Common/CurrentThread.cpp +++ b/dbms/src/Common/CurrentThread.cpp @@ -2,7 +2,6 @@ #include "CurrentThread.h" #include -#include #include #include #include diff --git a/dbms/src/Common/CurrentThread.h b/dbms/src/Common/CurrentThread.h index 1e0140c6330..de1ad969bc4 100644 --- a/dbms/src/Common/CurrentThread.h +++ b/dbms/src/Common/CurrentThread.h @@ -3,7 +3,6 @@ #include #include -#include #include #include @@ -107,4 +106,3 @@ private: }; } - diff --git a/dbms/src/Common/HashTable/HashTable.h b/dbms/src/Common/HashTable/HashTable.h index 5521cc043ad..ea85076fa5f 100644 --- a/dbms/src/Common/HashTable/HashTable.h +++ b/dbms/src/Common/HashTable/HashTable.h @@ -8,8 +8,6 @@ #include -#include - #include #include #include diff --git a/dbms/src/Common/HyperLogLogCounter.h b/dbms/src/Common/HyperLogLogCounter.h index 82eed74de89..0acffaaaf33 100644 --- a/dbms/src/Common/HyperLogLogCounter.h +++ b/dbms/src/Common/HyperLogLogCounter.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include #include diff --git a/dbms/src/Common/MemoryTracker.cpp b/dbms/src/Common/MemoryTracker.cpp index 084ed1af3cc..4696d3973e4 100644 --- a/dbms/src/Common/MemoryTracker.cpp +++ b/dbms/src/Common/MemoryTracker.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include #include diff --git a/dbms/src/Common/MemoryTracker.h b/dbms/src/Common/MemoryTracker.h index 7a203bcdf52..98f416cec40 100644 --- a/dbms/src/Common/MemoryTracker.h +++ b/dbms/src/Common/MemoryTracker.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include #include diff --git a/dbms/src/Common/PODArray.h b/dbms/src/Common/PODArray.h index 272cbdc4fe6..8fe1f74484e 100644 --- a/dbms/src/Common/PODArray.h +++ b/dbms/src/Common/PODArray.h @@ -8,7 +8,6 @@ #include -#include #include #include diff --git a/dbms/src/Common/PoolWithFailoverBase.h b/dbms/src/Common/PoolWithFailoverBase.h index 35f9b1b136b..a923088db3d 100644 --- a/dbms/src/Common/PoolWithFailoverBase.h +++ b/dbms/src/Common/PoolWithFailoverBase.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/dbms/src/Common/SimpleIncrement.h b/dbms/src/Common/SimpleIncrement.h index 29e0010b0fa..a91ef06d45b 100644 --- a/dbms/src/Common/SimpleIncrement.h +++ b/dbms/src/Common/SimpleIncrement.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include diff --git a/dbms/src/Common/SipHash.h b/dbms/src/Common/SipHash.h index 023a1d4b18c..6497cd82428 100644 --- a/dbms/src/Common/SipHash.h +++ b/dbms/src/Common/SipHash.h @@ -13,7 +13,7 @@ * (~ 700 MB/sec, 15 million strings per second) */ -#include +#include #include #include #include diff --git a/dbms/src/Common/SmallObjectPool.h b/dbms/src/Common/SmallObjectPool.h index 4eaf6cd26ab..469c102bdae 100644 --- a/dbms/src/Common/SmallObjectPool.h +++ b/dbms/src/Common/SmallObjectPool.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include diff --git a/dbms/src/Common/Stopwatch.h b/dbms/src/Common/Stopwatch.h index 502cd2e9010..772caa75373 100644 --- a/dbms/src/Common/Stopwatch.h +++ b/dbms/src/Common/Stopwatch.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include diff --git a/dbms/src/Common/ZooKeeper/Types.h b/dbms/src/Common/ZooKeeper/Types.h index 8577061c1ea..33be2853068 100644 --- a/dbms/src/Common/ZooKeeper/Types.h +++ b/dbms/src/Common/ZooKeeper/Types.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include #include diff --git a/dbms/src/Common/formatIPv6.h b/dbms/src/Common/formatIPv6.h index 2e687414224..2529d6dc796 100644 --- a/dbms/src/Common/formatIPv6.h +++ b/dbms/src/Common/formatIPv6.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include #include diff --git a/dbms/src/Common/hex.h b/dbms/src/Common/hex.h index 81fa725e17d..db094e1dfd1 100644 --- a/dbms/src/Common/hex.h +++ b/dbms/src/Common/hex.h @@ -21,7 +21,7 @@ inline char hexDigitLowercase(unsigned char c) #include #include -#include +#include /// Maps 0..255 to 00..FF or 00..ff correspondingly diff --git a/dbms/src/Common/isLocalAddress.h b/dbms/src/Common/isLocalAddress.h index 63de5e000a9..3d0db2d9550 100644 --- a/dbms/src/Common/isLocalAddress.h +++ b/dbms/src/Common/isLocalAddress.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include diff --git a/dbms/src/Common/new_delete.cpp b/dbms/src/Common/new_delete.cpp index 9791a53470c..0aa5f8aacce 100644 --- a/dbms/src/Common/new_delete.cpp +++ b/dbms/src/Common/new_delete.cpp @@ -19,7 +19,7 @@ namespace Memory { -ALWAYS_INLINE void trackMemory(std::size_t size) +inline ALWAYS_INLINE void trackMemory(std::size_t size) { #if USE_JEMALLOC /// The nallocx() function allocates no memory, but it performs the same size computation as the mallocx() function @@ -31,7 +31,7 @@ ALWAYS_INLINE void trackMemory(std::size_t size) #endif } -ALWAYS_INLINE bool trackMemoryNoExcept(std::size_t size) noexcept +inline ALWAYS_INLINE bool trackMemoryNoExcept(std::size_t size) noexcept { try { @@ -45,7 +45,7 @@ ALWAYS_INLINE bool trackMemoryNoExcept(std::size_t size) noexcept return true; } -ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t size [[maybe_unused]] = 0) noexcept +inline ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t size [[maybe_unused]] = 0) noexcept { try { diff --git a/dbms/src/Common/parseAddress.h b/dbms/src/Common/parseAddress.h index f7357b60cd4..602a9adc0b2 100644 --- a/dbms/src/Common/parseAddress.h +++ b/dbms/src/Common/parseAddress.h @@ -2,7 +2,7 @@ #include #include -#include +#include namespace DB diff --git a/dbms/src/Common/tests/gtest_rw_lock.cpp b/dbms/src/Common/tests/gtest_rw_lock.cpp index 81bc0d38a56..dec4c732fd5 100644 --- a/dbms/src/Common/tests/gtest_rw_lock.cpp +++ b/dbms/src/Common/tests/gtest_rw_lock.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/dbms/src/Compression/LZ4_decompress_faster.cpp b/dbms/src/Compression/LZ4_decompress_faster.cpp index 989b34b97bf..801b4925013 100644 --- a/dbms/src/Compression/LZ4_decompress_faster.cpp +++ b/dbms/src/Compression/LZ4_decompress_faster.cpp @@ -6,8 +6,7 @@ #include #include #include -#include -#include +#include #include #ifdef __SSE2__ diff --git a/dbms/src/Core/Defines.h b/dbms/src/Core/Defines.h index ce3a8122ead..f2d4a517712 100644 --- a/dbms/src/Core/Defines.h +++ b/dbms/src/Core/Defines.h @@ -1,5 +1,7 @@ #pragma once +#include + #define DBMS_DEFAULT_HOST "localhost" #define DBMS_DEFAULT_PORT 9000 #define DBMS_DEFAULT_SECURE_PORT 9440 @@ -81,76 +83,6 @@ // more aliases: https://mailman.videolan.org/pipermail/x264-devel/2014-May/010660.html -#if defined(_MSC_VER) - #define ALWAYS_INLINE __forceinline - #define NO_INLINE static __declspec(noinline) - #define MAY_ALIAS -#else - #define ALWAYS_INLINE __attribute__((__always_inline__)) - #define NO_INLINE __attribute__((__noinline__)) - #define MAY_ALIAS __attribute__((__may_alias__)) -#endif - - -#define PLATFORM_NOT_SUPPORTED "The only supported platforms are x86_64 and AArch64, PowerPC (work in progress)" - -#if !defined(__x86_64__) && !defined(__aarch64__) && !defined(__PPC__) - #error PLATFORM_NOT_SUPPORTED -#endif - -/// Check for presence of address sanitizer -#if !defined(ADDRESS_SANITIZER) -#if defined(__has_feature) - #if __has_feature(address_sanitizer) - #define ADDRESS_SANITIZER 1 - #endif -#elif defined(__SANITIZE_ADDRESS__) - #define ADDRESS_SANITIZER 1 -#endif -#endif - -#if !defined(THREAD_SANITIZER) -#if defined(__has_feature) - #if __has_feature(thread_sanitizer) - #define THREAD_SANITIZER 1 - #endif -#elif defined(__SANITIZE_THREAD__) - #define THREAD_SANITIZER 1 -#endif -#endif - -#if !defined(MEMORY_SANITIZER) -#if defined(__has_feature) - #if __has_feature(memory_sanitizer) - #define MEMORY_SANITIZER 1 - #endif -#elif defined(__MEMORY_SANITIZER__) - #define MEMORY_SANITIZER 1 -#endif -#endif - -/// TODO Strange enough, there is no way to detect UB sanitizer. - -/// Explicitly allow undefined behaviour for certain functions. Use it as a function attribute. -/// It is useful in case when compiler cannot see (and exploit) it, but UBSan can. -/// Example: multiplication of signed integers with possibility of overflow when both sides are from user input. -#if defined(__clang__) - #define NO_SANITIZE_UNDEFINED __attribute__((__no_sanitize__("undefined"))) - #define NO_SANITIZE_ADDRESS __attribute__((__no_sanitize__("address"))) - #define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread"))) -#else - /// It does not work in GCC. GCC 7 cannot recognize this attribute and GCC 8 simply ignores it. - #define NO_SANITIZE_UNDEFINED - #define NO_SANITIZE_ADDRESS - #define NO_SANITIZE_THREAD -#endif - -#if defined __GNUC__ && !defined __clang__ - #define OPTIMIZE(x) __attribute__((__optimize__(x))) -#else - #define OPTIMIZE(x) -#endif - /// Marks that extra information is sent to a shard. It could be any magic numbers. #define DBMS_DISTRIBUTED_SIGNATURE_HEADER 0xCAFEDACEull #define DBMS_DISTRIBUTED_SIGNATURE_HEADER_OLD_FORMAT 0xCAFECABEull @@ -159,7 +91,3 @@ # define ASAN_UNPOISON_MEMORY_REGION(a, b) # define ASAN_POISON_MEMORY_REGION(a, b) #endif - -/// A macro for suppressing warnings about unused variables or function results. -/// Useful for structured bindings which have no standard way to declare this. -#define UNUSED(...) (void)(__VA_ARGS__) diff --git a/dbms/src/Core/Types.h b/dbms/src/Core/Types.h index a7863511e69..208da48797e 100644 --- a/dbms/src/Core/Types.h +++ b/dbms/src/Core/Types.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include diff --git a/dbms/src/DataStreams/SizeLimits.h b/dbms/src/DataStreams/SizeLimits.h index 1779831a86f..48fd2b9343f 100644 --- a/dbms/src/DataStreams/SizeLimits.h +++ b/dbms/src/DataStreams/SizeLimits.h @@ -1,6 +1,6 @@ #pragma once -#include +#include namespace DB diff --git a/dbms/src/DataTypes/DataTypeDecimalBase.h b/dbms/src/DataTypes/DataTypeDecimalBase.h index bb5b0ca4ee8..2cf73467644 100644 --- a/dbms/src/DataTypes/DataTypeDecimalBase.h +++ b/dbms/src/DataTypes/DataTypeDecimalBase.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include diff --git a/dbms/src/Dictionaries/DictionarySourceHelpers.h b/dbms/src/Dictionaries/DictionarySourceHelpers.h index e3d7bd521c3..99d5df5bda7 100644 --- a/dbms/src/Dictionaries/DictionarySourceHelpers.h +++ b/dbms/src/Dictionaries/DictionarySourceHelpers.h @@ -2,7 +2,7 @@ #include #include -#include +#include namespace DB diff --git a/dbms/src/Dictionaries/Embedded/GeodataProviders/Types.h b/dbms/src/Dictionaries/Embedded/GeodataProviders/Types.h index e823ce23e0b..5f291ee7abd 100644 --- a/dbms/src/Dictionaries/Embedded/GeodataProviders/Types.h +++ b/dbms/src/Dictionaries/Embedded/GeodataProviders/Types.h @@ -1,6 +1,6 @@ #pragma once -#include +#include using RegionID = UInt32; diff --git a/dbms/src/Dictionaries/Embedded/RegionsHierarchy.h b/dbms/src/Dictionaries/Embedded/RegionsHierarchy.h index 63f5ce99a59..25625284e32 100644 --- a/dbms/src/Dictionaries/Embedded/RegionsHierarchy.h +++ b/dbms/src/Dictionaries/Embedded/RegionsHierarchy.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include "GeodataProviders/IHierarchiesProvider.h" diff --git a/dbms/src/Dictionaries/Embedded/RegionsNames.h b/dbms/src/Dictionaries/Embedded/RegionsNames.h index 1b00fd508d2..7216f238156 100644 --- a/dbms/src/Dictionaries/Embedded/RegionsNames.h +++ b/dbms/src/Dictionaries/Embedded/RegionsNames.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include "GeodataProviders/INamesProvider.h" diff --git a/dbms/src/Functions/DivisionUtils.h b/dbms/src/Functions/DivisionUtils.h index 9bf153d4d6e..df3b86f721d 100644 --- a/dbms/src/Functions/DivisionUtils.h +++ b/dbms/src/Functions/DivisionUtils.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include diff --git a/dbms/src/Functions/FunctionsConsistentHashing.h b/dbms/src/Functions/FunctionsConsistentHashing.h index 7ddf1c676ff..59bc8e2f521 100644 --- a/dbms/src/Functions/FunctionsConsistentHashing.h +++ b/dbms/src/Functions/FunctionsConsistentHashing.h @@ -6,7 +6,6 @@ #include #include #include -#include namespace DB diff --git a/dbms/src/IO/MemoryReadWriteBuffer.cpp b/dbms/src/IO/MemoryReadWriteBuffer.cpp index f9b5547bb13..0b0d9704de6 100644 --- a/dbms/src/IO/MemoryReadWriteBuffer.cpp +++ b/dbms/src/IO/MemoryReadWriteBuffer.cpp @@ -1,5 +1,4 @@ #include -#include #include diff --git a/dbms/src/IO/Progress.h b/dbms/src/IO/Progress.h index b15b2695e39..a3efb96db98 100644 --- a/dbms/src/IO/Progress.h +++ b/dbms/src/IO/Progress.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include diff --git a/dbms/src/IO/WriteIntText.h b/dbms/src/IO/WriteIntText.h index 4e18826600e..15276bba63f 100644 --- a/dbms/src/IO/WriteIntText.h +++ b/dbms/src/IO/WriteIntText.h @@ -3,7 +3,6 @@ #include #include #include -#include /// 40 digits or 39 digits and a sign #define WRITE_HELPERS_MAX_INT_WIDTH 40U diff --git a/dbms/src/IO/readFloatText.h b/dbms/src/IO/readFloatText.h index 1b9da8db49c..fc3ffc43a91 100644 --- a/dbms/src/IO/readFloatText.h +++ b/dbms/src/IO/readFloatText.h @@ -2,7 +2,6 @@ #include #include #include -#include #include #include diff --git a/dbms/src/IO/tests/parse_int_perf.cpp b/dbms/src/IO/tests/parse_int_perf.cpp index 8e0185df239..150df9805f1 100644 --- a/dbms/src/IO/tests/parse_int_perf.cpp +++ b/dbms/src/IO/tests/parse_int_perf.cpp @@ -1,7 +1,7 @@ #include #include -#include +#include #include #include diff --git a/dbms/src/Processors/Port.h b/dbms/src/Processors/Port.h index c5b3c5979d1..e200b8c1ecb 100644 --- a/dbms/src/Processors/Port.h +++ b/dbms/src/Processors/Port.h @@ -9,7 +9,6 @@ #include #include #include -#include namespace DB { diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 017b53036cf..6e5870a3560 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -1,4 +1,4 @@ -# docker build -t yandex/clickhouse-stateless-test . +# docker build -t yandex/clickhouse-stateless-test . FROM yandex/clickhouse-deb-builder ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.4.20200302/clickhouse-odbc-1.1.4-Linux.tar.gz" diff --git a/utils/compressor/decompress_perf.cpp b/utils/compressor/decompress_perf.cpp index 3e598b10876..881c1cf3918 100644 --- a/utils/compressor/decompress_perf.cpp +++ b/utils/compressor/decompress_perf.cpp @@ -1,8 +1,7 @@ #include #include #include -#include -#include +#include #include #include diff --git a/utils/compressor/mutator.cpp b/utils/compressor/mutator.cpp index 65125d073d0..13c80c292e2 100644 --- a/utils/compressor/mutator.cpp +++ b/utils/compressor/mutator.cpp @@ -1,8 +1,7 @@ #include #include #include -#include -#include +#include #include #include diff --git a/utils/fill-factor/main.cpp b/utils/fill-factor/main.cpp index b492be1be85..305c33a91d4 100644 --- a/utils/fill-factor/main.cpp +++ b/utils/fill-factor/main.cpp @@ -5,7 +5,7 @@ #include #endif -#include +#include #include #include From 412c9b6de0954e67f3cff9cdf56fd3207f137151 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 19 Mar 2020 19:16:05 +0800 Subject: [PATCH 096/115] correct highlighting for completion prefix --- contrib/replxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/replxx b/contrib/replxx index 07cbfbec550..1d7e4262021 160000 --- a/contrib/replxx +++ b/contrib/replxx @@ -1 +1 @@ -Subproject commit 07cbfbec550133b88c91c4073fa5af2ae2ae6a9a +Subproject commit 1d7e426202139e872a4e64975a34177061cee4f1 From d27cd773cce4dbe3abdb2e56e91ac6ec631ee6f6 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 19 Mar 2020 14:31:21 +0300 Subject: [PATCH 097/115] Fix mutations huge memory consumption --- dbms/src/Interpreters/PartLog.cpp | 5 ++++ dbms/src/Interpreters/PartLog.h | 2 ++ dbms/src/Storages/MergeTree/MergeTreeData.cpp | 1 + .../MergeTree/StorageFromMergeTreeDataPart.h | 5 ++++ dbms/src/Storages/ReadInOrderOptimizer.cpp | 21 +++++++++++++--- ...200_mutations_memory_consumption.reference | 1 + .../01200_mutations_memory_consumption.sql | 25 +++++++++++++++++++ 7 files changed, 57 insertions(+), 3 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/01200_mutations_memory_consumption.reference create mode 100644 dbms/tests/queries/0_stateless/01200_mutations_memory_consumption.sql diff --git a/dbms/src/Interpreters/PartLog.cpp b/dbms/src/Interpreters/PartLog.cpp index d77bb3fed59..b80d97ab36b 100644 --- a/dbms/src/Interpreters/PartLog.cpp +++ b/dbms/src/Interpreters/PartLog.cpp @@ -50,10 +50,13 @@ Block PartLogElement::createBlock() {ColumnUInt64::create(), std::make_shared(), "bytes_uncompressed"}, // Result bytes {ColumnUInt64::create(), std::make_shared(), "read_rows"}, {ColumnUInt64::create(), std::make_shared(), "read_bytes"}, + {ColumnUInt64::create(), std::make_shared(), "peak_memory_usage"}, /// Is there an error during the execution or commit {ColumnUInt16::create(), std::make_shared(), "error"}, {ColumnString::create(), std::make_shared(), "exception"}, + + }; } @@ -87,10 +90,12 @@ void PartLogElement::appendToBlock(Block & block) const columns[i++]->insert(bytes_uncompressed); columns[i++]->insert(rows_read); columns[i++]->insert(bytes_read_uncompressed); + columns[i++]->insert(peak_memory_usage); columns[i++]->insert(error); columns[i++]->insert(exception); + block.setColumns(std::move(columns)); } diff --git a/dbms/src/Interpreters/PartLog.h b/dbms/src/Interpreters/PartLog.h index 4c4930ccefa..b84138159a2 100644 --- a/dbms/src/Interpreters/PartLog.h +++ b/dbms/src/Interpreters/PartLog.h @@ -40,11 +40,13 @@ struct PartLogElement UInt64 bytes_uncompressed = 0; UInt64 rows_read = 0; UInt64 bytes_read_uncompressed = 0; + UInt64 peak_memory_usage = 0; /// Was the operation successful? UInt16 error = 0; String exception; + static std::string name() { return "PartLog"; } static Block createBlock(); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index f7e9cb80103..4967f0ff2ae 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -3755,6 +3755,7 @@ try part_log_elem.rows = (*merge_entry)->rows_written; part_log_elem.bytes_uncompressed = (*merge_entry)->bytes_written_uncompressed; + part_log_elem.peak_memory_usage = (*merge_entry)->memory_tracker.getPeak(); } part_log->add(part_log_elem); diff --git a/dbms/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/dbms/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index c44c744efaf..0b430439aae 100644 --- a/dbms/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/dbms/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -53,6 +53,11 @@ public: } + bool hasSortingKey() const { return part->storage.hasSortingKey(); } + + Names getSortingKeyColumns() const override { return part->storage.getSortingKeyColumns(); } + + protected: StorageFromMergeTreeDataPart(const MergeTreeData::DataPartPtr & part_) : IStorage(getIDFromPart(part_), part_->storage.getVirtuals()) diff --git a/dbms/src/Storages/ReadInOrderOptimizer.cpp b/dbms/src/Storages/ReadInOrderOptimizer.cpp index 667ce095932..753ff5de7a0 100644 --- a/dbms/src/Storages/ReadInOrderOptimizer.cpp +++ b/dbms/src/Storages/ReadInOrderOptimizer.cpp @@ -2,6 +2,7 @@ #include #include #include +#include namespace DB { @@ -31,14 +32,28 @@ ReadInOrderOptimizer::ReadInOrderOptimizer( InputSortingInfoPtr ReadInOrderOptimizer::getInputOrder(const StoragePtr & storage) const { - const MergeTreeData * merge_tree = dynamic_cast(storage.get()); - if (!merge_tree || !merge_tree->hasSortingKey()) + Names sorting_key_columns; + if (const auto * merge_tree = dynamic_cast(storage.get())) + { + if (!merge_tree->hasSortingKey()) + return {}; + sorting_key_columns = merge_tree->getSortingKeyColumns(); + } + else if (const auto * part = dynamic_cast(storage.get())) + { + if (!part->hasSortingKey()) + return {}; + sorting_key_columns = part->getSortingKeyColumns(); + } + else /// Inapplicable storage type + { return {}; + } + SortDescription order_key_prefix_descr; int read_direction = required_sort_description.at(0).direction; - const auto & sorting_key_columns = merge_tree->getSortingKeyColumns(); size_t prefix_size = std::min(required_sort_description.size(), sorting_key_columns.size()); for (size_t i = 0; i < prefix_size; ++i) diff --git a/dbms/tests/queries/0_stateless/01200_mutations_memory_consumption.reference b/dbms/tests/queries/0_stateless/01200_mutations_memory_consumption.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01200_mutations_memory_consumption.reference @@ -0,0 +1 @@ +1 diff --git a/dbms/tests/queries/0_stateless/01200_mutations_memory_consumption.sql b/dbms/tests/queries/0_stateless/01200_mutations_memory_consumption.sql new file mode 100644 index 00000000000..1a3e414ae26 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01200_mutations_memory_consumption.sql @@ -0,0 +1,25 @@ +DROP TABLE IF EXISTS table_with_pk; + +CREATE TABLE table_with_pk +( + key UInt8, + value String +) +ENGINE = MergeTree +ORDER BY key; + +INSERT INTO table_with_pk SELECT number, toString(number % 10) FROM numbers(10000000); + +ALTER TABLE table_with_pk DELETE WHERE key % 77 = 0 SETTINGS mutations_sync = 1; + +SYSTEM FLUSH LOGS; + +-- Memory usage for all mutations must be almost constant and less than +-- read_bytes. +SELECT + DISTINCT read_bytes >= peak_memory_usage +FROM + system.part_log2 +WHERE event_type = 'MutatePart' AND table = 'table_with_pk' AND database = currentDatabase(); + +DROP TABLE IF EXISTS table_with_pk; From fbad7cb23ba81dea62f8c9be89e3432faf97ce94 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 19 Mar 2020 14:32:06 +0300 Subject: [PATCH 098/115] Remove new lines --- dbms/src/Interpreters/PartLog.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/dbms/src/Interpreters/PartLog.cpp b/dbms/src/Interpreters/PartLog.cpp index b80d97ab36b..c91d581449b 100644 --- a/dbms/src/Interpreters/PartLog.cpp +++ b/dbms/src/Interpreters/PartLog.cpp @@ -55,8 +55,6 @@ Block PartLogElement::createBlock() /// Is there an error during the execution or commit {ColumnUInt16::create(), std::make_shared(), "error"}, {ColumnString::create(), std::make_shared(), "exception"}, - - }; } From 2b41cb2f5cfd40256657fac06da97cb7a115d236 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 19 Mar 2020 14:34:02 +0300 Subject: [PATCH 099/115] Rename part_log table --- .../queries/0_stateless/01200_mutations_memory_consumption.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/01200_mutations_memory_consumption.sql b/dbms/tests/queries/0_stateless/01200_mutations_memory_consumption.sql index 1a3e414ae26..3c9d14b58cb 100644 --- a/dbms/tests/queries/0_stateless/01200_mutations_memory_consumption.sql +++ b/dbms/tests/queries/0_stateless/01200_mutations_memory_consumption.sql @@ -19,7 +19,7 @@ SYSTEM FLUSH LOGS; SELECT DISTINCT read_bytes >= peak_memory_usage FROM - system.part_log2 + system.part_log WHERE event_type = 'MutatePart' AND table = 'table_with_pk' AND database = currentDatabase(); DROP TABLE IF EXISTS table_with_pk; From 72baec852371278cd8c2e3231cb1d32fadd10f91 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 19 Mar 2020 14:34:54 +0300 Subject: [PATCH 100/115] Better includes --- dbms/src/Storages/ReadInOrderOptimizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/ReadInOrderOptimizer.cpp b/dbms/src/Storages/ReadInOrderOptimizer.cpp index 753ff5de7a0..c05acfa71ab 100644 --- a/dbms/src/Storages/ReadInOrderOptimizer.cpp +++ b/dbms/src/Storages/ReadInOrderOptimizer.cpp @@ -1,8 +1,8 @@ #include #include +#include #include #include -#include namespace DB { From 5818dfeb458d26e47bc4e4bed2e19dbb50663166 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 19 Mar 2020 14:45:04 +0300 Subject: [PATCH 101/115] Simplier string get --- .../getDictionaryConfigurationFromAST.cpp | 20 +++++++------------ 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index 2fa7a98cbd3..717f8c943f7 100644 --- a/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -33,17 +33,11 @@ namespace using NamesToTypeNames = std::unordered_map; /// Get value from field and convert it to string. /// Also remove quotes from strings. -String getUnquotedFieldString(const Field & field) +String getFieldAsString(const Field & field) { - String string = applyVisitor(FieldVisitorToString(), field); - if (string.front() == '\'') - { - String result; - ReadBufferFromString buf(string); - readQuotedString(result, buf); - return result; - } - return string; + if (field.getType() == Field::Types::Which::String) + return field.get(); + return applyVisitor(FieldVisitorToString(), field); } @@ -190,7 +184,7 @@ void buildSingleAttribute( AutoPtr null_value_element(doc->createElement("null_value")); String null_value_str; if (dict_attr->default_value) - null_value_str = getUnquotedFieldString(dict_attr->default_value->as()->value); + null_value_str = getFieldAsString(dict_attr->default_value->as()->value); AutoPtr null_value(doc->createTextNode(null_value_str)); null_value_element->appendChild(null_value); attribute_element->appendChild(null_value_element); @@ -204,7 +198,7 @@ void buildSingleAttribute( if (const auto * literal = dict_attr->expression->as(); literal && literal->value.getType() == Field::Types::String) { - expression_str = getUnquotedFieldString(literal->value); + expression_str = getFieldAsString(literal->value); } else expression_str = queryToString(dict_attr->expression); @@ -353,7 +347,7 @@ void buildConfigurationFromFunctionWithKeyValueArguments( } else if (auto literal = pair->second->as(); literal) { - AutoPtr value(doc->createTextNode(getUnquotedFieldString(literal->value))); + AutoPtr value(doc->createTextNode(getFieldAsString(literal->value))); current_xml_element->appendChild(value); } else if (auto list = pair->second->as(); list) From 5d763dead06fafb0ffe42f7e6b76dfa1232dd67e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 19 Mar 2020 14:45:52 +0300 Subject: [PATCH 102/115] Change the way how rows_before_limit_at_least is calculated for processors pipeline. --- dbms/programs/server/TCPHandler.cpp | 2 - .../Interpreters/InterpreterSelectQuery.cpp | 16 +++-- dbms/src/Interpreters/executeQuery.cpp | 2 - dbms/src/Processors/Formats/IOutputFormat.cpp | 3 + dbms/src/Processors/Formats/IOutputFormat.h | 6 ++ dbms/src/Processors/LimitTransform.cpp | 4 +- dbms/src/Processors/LimitTransform.h | 5 +- dbms/src/Processors/QueryPipeline.cpp | 66 ++++++++----------- dbms/src/Processors/QueryPipeline.h | 5 +- dbms/src/Processors/RowsBeforeLimitCounter.h | 28 ++++++++ .../Sources/SourceFromInputStream.cpp | 14 ++++ .../Sources/SourceFromInputStream.h | 5 ++ .../Transforms/PartialSortingTransform.cpp | 8 +-- .../Transforms/PartialSortingTransform.h | 15 ++--- 14 files changed, 111 insertions(+), 68 deletions(-) create mode 100644 dbms/src/Processors/RowsBeforeLimitCounter.h diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index cb2df69d5d1..40ed50e45a2 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -647,8 +647,6 @@ void TCPHandler::processOrdinaryQueryWithProcessors(size_t num_threads) */ if (!isQueryCancelled()) { - pipeline.finalize(); - sendTotals(lazy_format->getTotals()); sendExtremes(lazy_format->getExtremes()); sendProfileInfo(lazy_format->getProfileInfo()); diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 3adc2eadbfc..085ebe52557 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -2046,10 +2046,12 @@ void InterpreterSelectQuery::executeOrder(QueryPipeline & pipeline, InputSorting if (need_finish_sorting) { - pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) + pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) -> ProcessorPtr { - bool do_count_rows = stream_type == QueryPipeline::StreamType::Main; - return std::make_shared(header, output_order_descr, limit, do_count_rows); + if (stream_type != QueryPipeline::StreamType::Main) + return nullptr; + + return std::make_shared(header, output_order_descr, limit); }); pipeline.addSimpleTransform([&](const Block & header) -> ProcessorPtr @@ -2063,10 +2065,12 @@ void InterpreterSelectQuery::executeOrder(QueryPipeline & pipeline, InputSorting return; } - pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) + pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) -> ProcessorPtr { - bool do_count_rows = stream_type == QueryPipeline::StreamType::Main; - return std::make_shared(header, output_order_descr, limit, do_count_rows); + if (stream_type != QueryPipeline::StreamType::Main) + return nullptr; + + return std::make_shared(header, output_order_descr, limit); }); /// Merge the sorted blocks. diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index cfd588078ff..fefca6b580f 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -748,8 +748,6 @@ void executeQuery( auto executor = pipeline.execute(); executor->execute(context.getSettingsRef().max_threads); } - - pipeline.finalize(); } } catch (...) diff --git a/dbms/src/Processors/Formats/IOutputFormat.cpp b/dbms/src/Processors/Formats/IOutputFormat.cpp index 971ad95d946..ff4ac393471 100644 --- a/dbms/src/Processors/Formats/IOutputFormat.cpp +++ b/dbms/src/Processors/Formats/IOutputFormat.cpp @@ -48,6 +48,9 @@ void IOutputFormat::work() { if (finished && !finalized) { + if (rows_before_limit_counter && rows_before_limit_counter->hasAppliedLimit()) + setRowsBeforeLimit(rows_before_limit_counter->get()); + finalize(); finalized = true; return; diff --git a/dbms/src/Processors/Formats/IOutputFormat.h b/dbms/src/Processors/Formats/IOutputFormat.h index 5200b897643..1137dd78446 100644 --- a/dbms/src/Processors/Formats/IOutputFormat.h +++ b/dbms/src/Processors/Formats/IOutputFormat.h @@ -2,6 +2,7 @@ #include #include +#include #include @@ -33,6 +34,8 @@ protected: bool finished = false; bool finalized = false; + RowsBeforeLimitCounterPtr rows_before_limit_counter; + virtual void consume(Chunk) = 0; virtual void consumeTotals(Chunk) {} virtual void consumeExtremes(Chunk) {} @@ -50,6 +53,9 @@ public: /// Value for rows_before_limit_at_least field. virtual void setRowsBeforeLimit(size_t /*rows_before_limit*/) {} + /// Counter to calculate rows_before_limit_at_least in processors pipeline. + void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) { rows_before_limit_counter.swap(counter); } + /// Notify about progress. Method could be called from different threads. /// Passed value are delta, that must be summarized. virtual void onProgress(const Progress & /*progress*/) {} diff --git a/dbms/src/Processors/LimitTransform.cpp b/dbms/src/Processors/LimitTransform.cpp index 219a663198f..fe8990f7b0f 100644 --- a/dbms/src/Processors/LimitTransform.cpp +++ b/dbms/src/Processors/LimitTransform.cpp @@ -185,7 +185,9 @@ LimitTransform::Status LimitTransform::preparePair(PortsData & data) data.current_chunk = input.pull(true); auto rows = data.current_chunk.getNumRows(); - rows_before_limit_at_least += rows; + + if (rows_before_limit_at_least) + rows_before_limit_at_least->add(rows); /// Skip block (for 'always_read_till_end' case). if (is_limit_reached || output_finished) diff --git a/dbms/src/Processors/LimitTransform.h b/dbms/src/Processors/LimitTransform.h index 1e7ec3bf322..a6989483c00 100644 --- a/dbms/src/Processors/LimitTransform.h +++ b/dbms/src/Processors/LimitTransform.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include namespace DB @@ -29,7 +30,7 @@ private: std::vector sort_column_positions; size_t rows_read = 0; /// including the last read block - size_t rows_before_limit_at_least = 0; + RowsBeforeLimitCounterPtr rows_before_limit_at_least; /// State of port's pair. /// Chunks from different port pairs are not mixed for berret cache locality. @@ -65,7 +66,7 @@ public: InputPort & getInputPort() { return inputs.front(); } OutputPort & getOutputPort() { return outputs.front(); } - size_t getRowsBeforeLimitAtLeast() const { return rows_before_limit_at_least; } + void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) { rows_before_limit_at_least.swap(counter); } }; } diff --git a/dbms/src/Processors/QueryPipeline.cpp b/dbms/src/Processors/QueryPipeline.cpp index 8398c2359ac..6b2ee284c2a 100644 --- a/dbms/src/Processors/QueryPipeline.cpp +++ b/dbms/src/Processors/QueryPipeline.cpp @@ -19,6 +19,7 @@ #include #include #include +#include namespace DB { @@ -438,6 +439,8 @@ void QueryPipeline::setOutput(ProcessorPtr output) connect(*streams.front(), main); connect(*totals_having_port, totals); connect(*extremes_port, extremes); + + initRowsBeforeLimit(); } void QueryPipeline::unitePipelines( @@ -552,25 +555,12 @@ void QueryPipeline::setProcessListElement(QueryStatus * elem) } } -void QueryPipeline::finalize() +void QueryPipeline::initRowsBeforeLimit() { - checkInitialized(); + RowsBeforeLimitCounterPtr rows_before_limit_at_least; - if (!output_format) - throw Exception("Cannot finalize pipeline because it doesn't have output.", ErrorCodes::LOGICAL_ERROR); - - calcRowsBeforeLimit(); -} - -void QueryPipeline::calcRowsBeforeLimit() -{ - /// TODO get from Remote - - UInt64 rows_before_limit_at_least = 0; - UInt64 rows_before_limit = 0; - - bool has_limit = false; - bool has_partial_sorting = false; + std::vector limits; + std::vector sources; std::unordered_set visited; @@ -593,30 +583,22 @@ void QueryPipeline::calcRowsBeforeLimit() if (!visited_limit) { - if (auto * limit = typeid_cast(processor)) + if (auto * limit = typeid_cast(processor)) { - has_limit = visited_limit = true; - rows_before_limit_at_least += limit->getRowsBeforeLimitAtLeast(); + visited_limit = true; + limits.emplace_back(limit); } if (auto * source = typeid_cast(processor)) - { - if (auto & stream = source->getStream()) - { - auto & info = stream->getProfileInfo(); - if (info.hasAppliedLimit()) - { - has_limit = visited_limit = true; - rows_before_limit_at_least += info.getRowsBeforeLimit(); - } - } - } + sources.emplace_back(source); } - if (auto * sorting = typeid_cast(processor)) + if (auto * sorting = typeid_cast(processor)) { - has_partial_sorting = true; - rows_before_limit += sorting->getNumReadRows(); + if (!rows_before_limit_at_least) + rows_before_limit_at_least = std::make_shared(); + + sorting->setRowsBeforeLimitCounter(rows_before_limit_at_least); /// Don't go to children. Take rows_before_limit from last PartialSortingTransform. /// continue; @@ -640,9 +622,19 @@ void QueryPipeline::calcRowsBeforeLimit() } } - /// Get num read rows from PartialSortingTransform if have it. - if (has_limit) - output_format->setRowsBeforeLimit(has_partial_sorting ? rows_before_limit : rows_before_limit_at_least); + if (!rows_before_limit_at_least && (!limits.empty() && !sources.empty())) + { + rows_before_limit_at_least = std::make_shared(); + + for (auto & limit : limits) + limit->setRowsBeforeLimitCounter(rows_before_limit_at_least); + + for (auto & source : sources) + source->setRowsBeforeLimitCounter(rows_before_limit_at_least); + } + + if (rows_before_limit_at_least) + output_format->setRowsBeforeLimitCounter(rows_before_limit_at_least); } Pipe QueryPipeline::getPipe() && diff --git a/dbms/src/Processors/QueryPipeline.h b/dbms/src/Processors/QueryPipeline.h index be90e07f281..e01087b717a 100644 --- a/dbms/src/Processors/QueryPipeline.h +++ b/dbms/src/Processors/QueryPipeline.h @@ -140,9 +140,6 @@ public: void setProgressCallback(const ProgressCallback & callback); void setProcessListElement(QueryStatus * elem); - /// Call after execution. - void finalize(); - /// Recommend number of threads for pipeline execution. size_t getNumThreads() const { @@ -200,7 +197,7 @@ private: template void addSimpleTransformImpl(const TProcessorGetter & getter); - void calcRowsBeforeLimit(); + void initRowsBeforeLimit(); }; } diff --git a/dbms/src/Processors/RowsBeforeLimitCounter.h b/dbms/src/Processors/RowsBeforeLimitCounter.h new file mode 100644 index 00000000000..1408010cec7 --- /dev/null +++ b/dbms/src/Processors/RowsBeforeLimitCounter.h @@ -0,0 +1,28 @@ +#include +#include + +namespace DB +{ + +class RowsBeforeLimitCounter +{ +public: + void add(uint64_t rows) + { + setAppliedLimit(); + rows_before_limit.fetch_add(rows, std::memory_order_release); + } + + uint64_t get() const { return rows_before_limit.load(std::memory_order_acquire); } + + bool setAppliedLimit() { has_applied_limit.store(true, std::memory_order::release); } + bool hasAppliedLimit() const { return has_applied_limit.load(std::memory_order_acquire); } + +private: + std::atomic rows_before_limit = 0; + std::atomic_bool has_applied_limit = false; +}; + +using RowsBeforeLimitCounterPtr = std::shared_ptr; + +} diff --git a/dbms/src/Processors/Sources/SourceFromInputStream.cpp b/dbms/src/Processors/Sources/SourceFromInputStream.cpp index 3cc050aaf95..6f2a7eeb28a 100644 --- a/dbms/src/Processors/Sources/SourceFromInputStream.cpp +++ b/dbms/src/Processors/Sources/SourceFromInputStream.cpp @@ -95,6 +95,13 @@ void SourceFromInputStream::work() if (!typeid_cast(stream.get())) stream->cancel(false); + if (rows_before_limit) + { + auto & info = stream->getProfileInfo(); + if (info.hasAppliedLimit()) + rows_before_limit->add(info.getRowsBeforeLimit()); + } + stream->readSuffix(); if (auto totals_block = stream->getTotals()) @@ -120,6 +127,13 @@ Chunk SourceFromInputStream::generate() auto block = stream->read(); if (!block && !isCancelled()) { + if (rows_before_limit) + { + auto & info = stream->getProfileInfo(); + if (info.hasAppliedLimit()) + rows_before_limit->add(info.getRowsBeforeLimit()); + } + stream->readSuffix(); if (auto totals_block = stream->getTotals()) diff --git a/dbms/src/Processors/Sources/SourceFromInputStream.h b/dbms/src/Processors/Sources/SourceFromInputStream.h index 00b03220cec..83e7f9929c9 100644 --- a/dbms/src/Processors/Sources/SourceFromInputStream.h +++ b/dbms/src/Processors/Sources/SourceFromInputStream.h @@ -1,5 +1,6 @@ #pragma once #include +#include namespace DB { @@ -23,6 +24,8 @@ public: void addTotalsPort(); + void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) { rows_before_limit.swap(counter); } + /// Implementation for methods from ISourceWithProgress. void setLimits(const LocalLimits & limits_) final { stream->setLimits(limits_); } void setQuota(const QuotaContextPtr & quota_) final { stream->setQuota(quota_); } @@ -38,6 +41,8 @@ private: bool force_add_aggregating_info = false; BlockInputStreamPtr stream; + RowsBeforeLimitCounterPtr rows_before_limit; + Chunk totals; bool has_totals_port = false; bool has_totals = false; diff --git a/dbms/src/Processors/Transforms/PartialSortingTransform.cpp b/dbms/src/Processors/Transforms/PartialSortingTransform.cpp index 7e217ea0a07..018614f0165 100644 --- a/dbms/src/Processors/Transforms/PartialSortingTransform.cpp +++ b/dbms/src/Processors/Transforms/PartialSortingTransform.cpp @@ -5,16 +5,16 @@ namespace DB { PartialSortingTransform::PartialSortingTransform( - const Block & header_, SortDescription & description_, UInt64 limit_, bool do_count_rows_) + const Block & header_, SortDescription & description_, UInt64 limit_) : ISimpleTransform(header_, header_, false) - , description(description_), limit(limit_), do_count_rows(do_count_rows_) + , description(description_), limit(limit_) { } void PartialSortingTransform::transform(Chunk & chunk) { - if (do_count_rows) - read_rows += chunk.getNumRows(); + if (read_rows) + read_rows->add(chunk.getNumRows()); auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); chunk.clear(); diff --git a/dbms/src/Processors/Transforms/PartialSortingTransform.h b/dbms/src/Processors/Transforms/PartialSortingTransform.h index 905b294c0be..158b0af202e 100644 --- a/dbms/src/Processors/Transforms/PartialSortingTransform.h +++ b/dbms/src/Processors/Transforms/PartialSortingTransform.h @@ -1,5 +1,6 @@ #pragma once #include +#include #include namespace DB @@ -12,17 +13,15 @@ class PartialSortingTransform : public ISimpleTransform { public: /// limit - if not 0, then you can sort each block not completely, but only `limit` first rows by order. - /// When count_rows is false, getNumReadRows() will always return 0. + /// When count_rows is false, read_rows is not changed. It is needed PartialSortingTransform( const Block & header_, SortDescription & description_, - UInt64 limit_ = 0, - bool do_count_rows_ = true); + UInt64 limit_ = 0); String getName() const override { return "PartialSortingTransform"; } - /// Total num rows passed to transform. - UInt64 getNumReadRows() const { return read_rows; } + void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) { read_rows.swap(counter); } protected: void transform(Chunk & chunk) override; @@ -30,11 +29,7 @@ protected: private: SortDescription description; UInt64 limit; - UInt64 read_rows = 0; - - /// Do we need calculate read_rows value? - /// Used to skip total row when count rows_before_limit_at_least. - bool do_count_rows; + RowsBeforeLimitCounterPtr read_rows; }; } From 9982f548f53046581359c44d5863b4bef374aae2 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 19 Mar 2020 14:48:11 +0300 Subject: [PATCH 103/115] Remove redundant header --- dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index 717f8c943f7..7b5d7e9975d 100644 --- a/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -1,7 +1,5 @@ #include -#include -#include #include #include #include From 80828b969bb726c15b0e820d64715f1319120d49 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 19 Mar 2020 14:51:22 +0300 Subject: [PATCH 104/115] Minor improvements in docs build and content (#9752) --- .../performance/sampling_query_profiler.md | 557 +----------------- ...sampling_query_profiler_example_result.txt | 556 +++++++++++++++++ .../table_engines/aggregatingmergetree.md | 5 +- .../table_engines/collapsingmergetree.md | 18 +- .../table_engines/custom_partitioning_key.md | 2 +- .../en/operations/table_engines/dictionary.md | 1 - .../operations/table_engines/distributed.md | 15 +- .../operations/table_engines/external_data.md | 1 - ...sampling_query_profiler_example_result.txt | 1 + ...sampling_query_profiler_example_result.txt | 1 + ...sampling_query_profiler_example_result.txt | 1 + docs/tools/build.py | 4 +- .../partials/social.html | 2 +- docs/tools/test.py | 6 +- ...sampling_query_profiler_example_result.txt | 1 + website/templates/index/community.html | 2 +- 16 files changed, 586 insertions(+), 587 deletions(-) create mode 100644 docs/en/operations/performance/sampling_query_profiler_example_result.txt create mode 120000 docs/fa/operations/performance/sampling_query_profiler_example_result.txt create mode 120000 docs/ja/operations/performance/sampling_query_profiler_example_result.txt create mode 120000 docs/ru/operations/performance/sampling_query_profiler_example_result.txt create mode 120000 docs/zh/operations/performance/sampling_query_profiler_example_result.txt diff --git a/docs/en/operations/performance/sampling_query_profiler.md b/docs/en/operations/performance/sampling_query_profiler.md index ddc7b848fd4..c2f2d92af0e 100644 --- a/docs/en/operations/performance/sampling_query_profiler.md +++ b/docs/en/operations/performance/sampling_query_profiler.md @@ -48,560 +48,5 @@ ORDER BY count() DESC LIMIT 10 ``` ```text -Row 1: -────── -count(): 6344 -sym: StackTrace::StackTrace(ucontext_t const&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Common/StackTrace.cpp:208 -DB::(anonymous namespace)::writeTraceInfo(DB::TimerType, int, siginfo_t*, void*) [clone .isra.0] - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/IO/BufferBase.h:99 - - -read - -DB::ReadBufferFromFileDescriptor::nextImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/IO/ReadBufferFromFileDescriptor.cpp:56 -DB::CompressedReadBufferBase::readCompressedData(unsigned long&, unsigned long&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/IO/ReadBuffer.h:54 -DB::CompressedReadBufferFromFile::nextImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Compression/CompressedReadBufferFromFile.cpp:22 -DB::CompressedReadBufferFromFile::seek(unsigned long, unsigned long) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Compression/CompressedReadBufferFromFile.cpp:63 -DB::MergeTreeReaderStream::seekToMark(unsigned long) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeReaderStream.cpp:200 -std::_Function_handler > const&), DB::MergeTreeReader::readData(std::__cxx11::basic_string, std::allocator > const&, DB::IDataType const&, DB::IColumn&, unsigned long, bool, unsigned long, bool)::{lambda(bool)#1}::operator()(bool) const::{lambda(std::vector > const&)#1}>::_M_invoke(std::_Any_data const&, std::vector > const&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeReader.cpp:212 -DB::IDataType::deserializeBinaryBulkWithMultipleStreams(DB::IColumn&, unsigned long, DB::IDataType::DeserializeBinaryBulkSettings&, std::shared_ptr&) const - /usr/local/include/c++/9.1.0/bits/std_function.h:690 -DB::MergeTreeReader::readData(std::__cxx11::basic_string, std::allocator > const&, DB::IDataType const&, DB::IColumn&, unsigned long, bool, unsigned long, bool) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeReader.cpp:232 -DB::MergeTreeReader::readRows(unsigned long, bool, unsigned long, DB::Block&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeReader.cpp:111 -DB::MergeTreeRangeReader::DelayedStream::finalize(DB::Block&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:35 -DB::MergeTreeRangeReader::continueReadingChain(DB::MergeTreeRangeReader::ReadResult&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:219 -DB::MergeTreeRangeReader::read(unsigned long, std::vector >&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:487 -DB::MergeTreeBaseSelectBlockInputStream::readFromPartImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp:158 -DB::MergeTreeBaseSelectBlockInputStream::readImpl() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::IBlockInputStream::read() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::ExpressionBlockInputStream::readImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ExpressionBlockInputStream.cpp:34 -DB::IBlockInputStream::read() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::PartialSortingBlockInputStream::readImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/PartialSortingBlockInputStream.cpp:13 -DB::IBlockInputStream::read() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::ParallelInputsProcessor::loop(unsigned long) - /usr/local/include/c++/9.1.0/bits/atomic_base.h:419 -DB::ParallelInputsProcessor::thread(std::shared_ptr, unsigned long) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ParallelInputsProcessor.h:215 -ThreadFromGlobalPool::ThreadFromGlobalPool::*)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*, std::shared_ptr, unsigned long&>(void (DB::ParallelInputsProcessor::*&&)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*&&, std::shared_ptr&&, unsigned long&)::{lambda()#1}::operator()() const - /usr/local/include/c++/9.1.0/bits/shared_ptr_base.h:729 -ThreadPoolImpl::worker(std::_List_iterator) - /usr/local/include/c++/9.1.0/bits/unique_lock.h:69 -execute_native_thread_routine - /home/milovidov/ClickHouse/ci/workspace/gcc/gcc-build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/unique_ptr.h:81 -start_thread - -__clone - - -Row 2: -────── -count(): 3295 -sym: StackTrace::StackTrace(ucontext_t const&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Common/StackTrace.cpp:208 -DB::(anonymous namespace)::writeTraceInfo(DB::TimerType, int, siginfo_t*, void*) [clone .isra.0] - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/IO/BufferBase.h:99 - - -__pthread_cond_wait - -std::condition_variable::wait(std::unique_lock&) - /home/milovidov/ClickHouse/ci/workspace/gcc/gcc-build/x86_64-pc-linux-gnu/libstdc++-v3/src/c++11/../../../../../gcc-9.1.0/libstdc++-v3/src/c++11/condition_variable.cc:55 -Poco::Semaphore::wait() - /home/milovidov/ClickHouse/build_gcc9/../contrib/poco/Foundation/src/Semaphore.cpp:61 -DB::UnionBlockInputStream::readImpl() - /usr/local/include/c++/9.1.0/x86_64-pc-linux-gnu/bits/gthr-default.h:748 -DB::IBlockInputStream::read() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::MergeSortingBlockInputStream::readImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Core/Block.h:90 -DB::IBlockInputStream::read() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::ExpressionBlockInputStream::readImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ExpressionBlockInputStream.cpp:34 -DB::IBlockInputStream::read() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::LimitBlockInputStream::readImpl() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::IBlockInputStream::read() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::AsynchronousBlockInputStream::calculate() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -std::_Function_handler::_M_invoke(std::_Any_data const&) - /usr/local/include/c++/9.1.0/bits/atomic_base.h:551 -ThreadPoolImpl::worker(std::_List_iterator) - /usr/local/include/c++/9.1.0/x86_64-pc-linux-gnu/bits/gthr-default.h:748 -ThreadFromGlobalPool::ThreadFromGlobalPool::scheduleImpl(std::function, int, std::optional)::{lambda()#3}>(ThreadPoolImpl::scheduleImpl(std::function, int, std::optional)::{lambda()#3}&&)::{lambda()#1}::operator()() const - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Common/ThreadPool.h:146 -ThreadPoolImpl::worker(std::_List_iterator) - /usr/local/include/c++/9.1.0/bits/unique_lock.h:69 -execute_native_thread_routine - /home/milovidov/ClickHouse/ci/workspace/gcc/gcc-build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/unique_ptr.h:81 -start_thread - -__clone - - -Row 3: -────── -count(): 1978 -sym: StackTrace::StackTrace(ucontext_t const&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Common/StackTrace.cpp:208 -DB::(anonymous namespace)::writeTraceInfo(DB::TimerType, int, siginfo_t*, void*) [clone .isra.0] - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/IO/BufferBase.h:99 - - -DB::VolnitskyBase >::search(unsigned char const*, unsigned long) const - /opt/milovidov/ClickHouse/build_gcc9/dbms/programs/clickhouse -DB::MatchImpl::vector_constant(DB::PODArray, 15ul, 16ul> const&, DB::PODArray, 15ul, 16ul> const&, std::__cxx11::basic_string, std::allocator > const&, DB::PODArray, 15ul, 16ul>&) - /opt/milovidov/ClickHouse/build_gcc9/dbms/programs/clickhouse -DB::FunctionsStringSearch, DB::NameLike>::executeImpl(DB::Block&, std::vector > const&, unsigned long, unsigned long) - /opt/milovidov/ClickHouse/build_gcc9/dbms/programs/clickhouse -DB::PreparedFunctionImpl::execute(DB::Block&, std::vector > const&, unsigned long, unsigned long, bool) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Functions/IFunction.cpp:464 -DB::ExpressionAction::execute(DB::Block&, bool) const - /usr/local/include/c++/9.1.0/bits/stl_vector.h:677 -DB::ExpressionActions::execute(DB::Block&, bool) const - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Interpreters/ExpressionActions.cpp:739 -DB::MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(DB::MergeTreeRangeReader::ReadResult&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:660 -DB::MergeTreeRangeReader::read(unsigned long, std::vector >&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:546 -DB::MergeTreeRangeReader::read(unsigned long, std::vector >&) - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::MergeTreeBaseSelectBlockInputStream::readFromPartImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp:158 -DB::MergeTreeBaseSelectBlockInputStream::readImpl() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::IBlockInputStream::read() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::ExpressionBlockInputStream::readImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ExpressionBlockInputStream.cpp:34 -DB::IBlockInputStream::read() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::PartialSortingBlockInputStream::readImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/PartialSortingBlockInputStream.cpp:13 -DB::IBlockInputStream::read() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::ParallelInputsProcessor::loop(unsigned long) - /usr/local/include/c++/9.1.0/bits/atomic_base.h:419 -DB::ParallelInputsProcessor::thread(std::shared_ptr, unsigned long) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ParallelInputsProcessor.h:215 -ThreadFromGlobalPool::ThreadFromGlobalPool::*)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*, std::shared_ptr, unsigned long&>(void (DB::ParallelInputsProcessor::*&&)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*&&, std::shared_ptr&&, unsigned long&)::{lambda()#1}::operator()() const - /usr/local/include/c++/9.1.0/bits/shared_ptr_base.h:729 -ThreadPoolImpl::worker(std::_List_iterator) - /usr/local/include/c++/9.1.0/bits/unique_lock.h:69 -execute_native_thread_routine - /home/milovidov/ClickHouse/ci/workspace/gcc/gcc-build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/unique_ptr.h:81 -start_thread - -__clone - - -Row 4: -────── -count(): 1913 -sym: StackTrace::StackTrace(ucontext_t const&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Common/StackTrace.cpp:208 -DB::(anonymous namespace)::writeTraceInfo(DB::TimerType, int, siginfo_t*, void*) [clone .isra.0] - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/IO/BufferBase.h:99 - - -DB::VolnitskyBase >::search(unsigned char const*, unsigned long) const - /opt/milovidov/ClickHouse/build_gcc9/dbms/programs/clickhouse -DB::MatchImpl::vector_constant(DB::PODArray, 15ul, 16ul> const&, DB::PODArray, 15ul, 16ul> const&, std::__cxx11::basic_string, std::allocator > const&, DB::PODArray, 15ul, 16ul>&) - /opt/milovidov/ClickHouse/build_gcc9/dbms/programs/clickhouse -DB::FunctionsStringSearch, DB::NameLike>::executeImpl(DB::Block&, std::vector > const&, unsigned long, unsigned long) - /opt/milovidov/ClickHouse/build_gcc9/dbms/programs/clickhouse -DB::PreparedFunctionImpl::execute(DB::Block&, std::vector > const&, unsigned long, unsigned long, bool) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Functions/IFunction.cpp:464 -DB::ExpressionAction::execute(DB::Block&, bool) const - /usr/local/include/c++/9.1.0/bits/stl_vector.h:677 -DB::ExpressionActions::execute(DB::Block&, bool) const - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Interpreters/ExpressionActions.cpp:739 -DB::MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(DB::MergeTreeRangeReader::ReadResult&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:660 -DB::MergeTreeRangeReader::read(unsigned long, std::vector >&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:546 -DB::MergeTreeRangeReader::read(unsigned long, std::vector >&) - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::MergeTreeBaseSelectBlockInputStream::readFromPartImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp:158 -DB::MergeTreeBaseSelectBlockInputStream::readImpl() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::IBlockInputStream::read() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::ExpressionBlockInputStream::readImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ExpressionBlockInputStream.cpp:34 -DB::IBlockInputStream::read() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::PartialSortingBlockInputStream::readImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/PartialSortingBlockInputStream.cpp:13 -DB::IBlockInputStream::read() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::ParallelInputsProcessor::loop(unsigned long) - /usr/local/include/c++/9.1.0/bits/atomic_base.h:419 -DB::ParallelInputsProcessor::thread(std::shared_ptr, unsigned long) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ParallelInputsProcessor.h:215 -ThreadFromGlobalPool::ThreadFromGlobalPool::*)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*, std::shared_ptr, unsigned long&>(void (DB::ParallelInputsProcessor::*&&)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*&&, std::shared_ptr&&, unsigned long&)::{lambda()#1}::operator()() const - /usr/local/include/c++/9.1.0/bits/shared_ptr_base.h:729 -ThreadPoolImpl::worker(std::_List_iterator) - /usr/local/include/c++/9.1.0/bits/unique_lock.h:69 -execute_native_thread_routine - /home/milovidov/ClickHouse/ci/workspace/gcc/gcc-build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/unique_ptr.h:81 -start_thread - -__clone - - -Row 5: -────── -count(): 1672 -sym: StackTrace::StackTrace(ucontext_t const&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Common/StackTrace.cpp:208 -DB::(anonymous namespace)::writeTraceInfo(DB::TimerType, int, siginfo_t*, void*) [clone .isra.0] - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/IO/BufferBase.h:99 - - -DB::VolnitskyBase >::search(unsigned char const*, unsigned long) const - /opt/milovidov/ClickHouse/build_gcc9/dbms/programs/clickhouse -DB::MatchImpl::vector_constant(DB::PODArray, 15ul, 16ul> const&, DB::PODArray, 15ul, 16ul> const&, std::__cxx11::basic_string, std::allocator > const&, DB::PODArray, 15ul, 16ul>&) - /opt/milovidov/ClickHouse/build_gcc9/dbms/programs/clickhouse -DB::FunctionsStringSearch, DB::NameLike>::executeImpl(DB::Block&, std::vector > const&, unsigned long, unsigned long) - /opt/milovidov/ClickHouse/build_gcc9/dbms/programs/clickhouse -DB::PreparedFunctionImpl::execute(DB::Block&, std::vector > const&, unsigned long, unsigned long, bool) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Functions/IFunction.cpp:464 -DB::ExpressionAction::execute(DB::Block&, bool) const - /usr/local/include/c++/9.1.0/bits/stl_vector.h:677 -DB::ExpressionActions::execute(DB::Block&, bool) const - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Interpreters/ExpressionActions.cpp:739 -DB::MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(DB::MergeTreeRangeReader::ReadResult&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:660 -DB::MergeTreeRangeReader::read(unsigned long, std::vector >&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:546 -DB::MergeTreeRangeReader::read(unsigned long, std::vector >&) - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::MergeTreeBaseSelectBlockInputStream::readFromPartImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp:158 -DB::MergeTreeBaseSelectBlockInputStream::readImpl() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::IBlockInputStream::read() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::ExpressionBlockInputStream::readImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ExpressionBlockInputStream.cpp:34 -DB::IBlockInputStream::read() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::PartialSortingBlockInputStream::readImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/PartialSortingBlockInputStream.cpp:13 -DB::IBlockInputStream::read() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::ParallelInputsProcessor::loop(unsigned long) - /usr/local/include/c++/9.1.0/bits/atomic_base.h:419 -DB::ParallelInputsProcessor::thread(std::shared_ptr, unsigned long) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ParallelInputsProcessor.h:215 -ThreadFromGlobalPool::ThreadFromGlobalPool::*)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*, std::shared_ptr, unsigned long&>(void (DB::ParallelInputsProcessor::*&&)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*&&, std::shared_ptr&&, unsigned long&)::{lambda()#1}::operator()() const - /usr/local/include/c++/9.1.0/bits/shared_ptr_base.h:729 -ThreadPoolImpl::worker(std::_List_iterator) - /usr/local/include/c++/9.1.0/bits/unique_lock.h:69 -execute_native_thread_routine - /home/milovidov/ClickHouse/ci/workspace/gcc/gcc-build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/unique_ptr.h:81 -start_thread - -__clone - - -Row 6: -────── -count(): 1531 -sym: StackTrace::StackTrace(ucontext_t const&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Common/StackTrace.cpp:208 -DB::(anonymous namespace)::writeTraceInfo(DB::TimerType, int, siginfo_t*, void*) [clone .isra.0] - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/IO/BufferBase.h:99 - - -read - -DB::ReadBufferFromFileDescriptor::nextImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/IO/ReadBufferFromFileDescriptor.cpp:56 -DB::CompressedReadBufferBase::readCompressedData(unsigned long&, unsigned long&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/IO/ReadBuffer.h:54 -DB::CompressedReadBufferFromFile::nextImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Compression/CompressedReadBufferFromFile.cpp:22 -void DB::deserializeBinarySSE2<4>(DB::PODArray, 15ul, 16ul>&, DB::PODArray, 15ul, 16ul>&, DB::ReadBuffer&, unsigned long) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/IO/ReadBuffer.h:53 -DB::DataTypeString::deserializeBinaryBulk(DB::IColumn&, DB::ReadBuffer&, unsigned long, double) const - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataTypes/DataTypeString.cpp:202 -DB::MergeTreeReader::readData(std::__cxx11::basic_string, std::allocator > const&, DB::IDataType const&, DB::IColumn&, unsigned long, bool, unsigned long, bool) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeReader.cpp:232 -DB::MergeTreeReader::readRows(unsigned long, bool, unsigned long, DB::Block&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeReader.cpp:111 -DB::MergeTreeRangeReader::DelayedStream::finalize(DB::Block&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:35 -DB::MergeTreeRangeReader::startReadingChain(unsigned long, std::vector >&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:219 -DB::MergeTreeRangeReader::read(unsigned long, std::vector >&) - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::MergeTreeRangeReader::read(unsigned long, std::vector >&) - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::MergeTreeBaseSelectBlockInputStream::readFromPartImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp:158 -DB::MergeTreeBaseSelectBlockInputStream::readImpl() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::IBlockInputStream::read() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::ExpressionBlockInputStream::readImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ExpressionBlockInputStream.cpp:34 -DB::IBlockInputStream::read() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::PartialSortingBlockInputStream::readImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/PartialSortingBlockInputStream.cpp:13 -DB::IBlockInputStream::read() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::ParallelInputsProcessor::loop(unsigned long) - /usr/local/include/c++/9.1.0/bits/atomic_base.h:419 -DB::ParallelInputsProcessor::thread(std::shared_ptr, unsigned long) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ParallelInputsProcessor.h:215 -ThreadFromGlobalPool::ThreadFromGlobalPool::*)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*, std::shared_ptr, unsigned long&>(void (DB::ParallelInputsProcessor::*&&)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*&&, std::shared_ptr&&, unsigned long&)::{lambda()#1}::operator()() const - /usr/local/include/c++/9.1.0/bits/shared_ptr_base.h:729 -ThreadPoolImpl::worker(std::_List_iterator) - /usr/local/include/c++/9.1.0/bits/unique_lock.h:69 -execute_native_thread_routine - /home/milovidov/ClickHouse/ci/workspace/gcc/gcc-build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/unique_ptr.h:81 -start_thread - -__clone - - -Row 7: -────── -count(): 1034 -sym: StackTrace::StackTrace(ucontext_t const&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Common/StackTrace.cpp:208 -DB::(anonymous namespace)::writeTraceInfo(DB::TimerType, int, siginfo_t*, void*) [clone .isra.0] - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/IO/BufferBase.h:99 - - -DB::VolnitskyBase >::search(unsigned char const*, unsigned long) const - /opt/milovidov/ClickHouse/build_gcc9/dbms/programs/clickhouse -DB::MatchImpl::vector_constant(DB::PODArray, 15ul, 16ul> const&, DB::PODArray, 15ul, 16ul> const&, std::__cxx11::basic_string, std::allocator > const&, DB::PODArray, 15ul, 16ul>&) - /opt/milovidov/ClickHouse/build_gcc9/dbms/programs/clickhouse -DB::FunctionsStringSearch, DB::NameLike>::executeImpl(DB::Block&, std::vector > const&, unsigned long, unsigned long) - /opt/milovidov/ClickHouse/build_gcc9/dbms/programs/clickhouse -DB::PreparedFunctionImpl::execute(DB::Block&, std::vector > const&, unsigned long, unsigned long, bool) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Functions/IFunction.cpp:464 -DB::ExpressionAction::execute(DB::Block&, bool) const - /usr/local/include/c++/9.1.0/bits/stl_vector.h:677 -DB::ExpressionActions::execute(DB::Block&, bool) const - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Interpreters/ExpressionActions.cpp:739 -DB::MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(DB::MergeTreeRangeReader::ReadResult&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:660 -DB::MergeTreeRangeReader::read(unsigned long, std::vector >&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:546 -DB::MergeTreeRangeReader::read(unsigned long, std::vector >&) - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::MergeTreeBaseSelectBlockInputStream::readFromPartImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp:158 -DB::MergeTreeBaseSelectBlockInputStream::readImpl() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::IBlockInputStream::read() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::ExpressionBlockInputStream::readImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ExpressionBlockInputStream.cpp:34 -DB::IBlockInputStream::read() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::PartialSortingBlockInputStream::readImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/PartialSortingBlockInputStream.cpp:13 -DB::IBlockInputStream::read() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::ParallelInputsProcessor::loop(unsigned long) - /usr/local/include/c++/9.1.0/bits/atomic_base.h:419 -DB::ParallelInputsProcessor::thread(std::shared_ptr, unsigned long) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ParallelInputsProcessor.h:215 -ThreadFromGlobalPool::ThreadFromGlobalPool::*)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*, std::shared_ptr, unsigned long&>(void (DB::ParallelInputsProcessor::*&&)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*&&, std::shared_ptr&&, unsigned long&)::{lambda()#1}::operator()() const - /usr/local/include/c++/9.1.0/bits/shared_ptr_base.h:729 -ThreadPoolImpl::worker(std::_List_iterator) - /usr/local/include/c++/9.1.0/bits/unique_lock.h:69 -execute_native_thread_routine - /home/milovidov/ClickHouse/ci/workspace/gcc/gcc-build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/unique_ptr.h:81 -start_thread - -__clone - - -Row 8: -────── -count(): 989 -sym: StackTrace::StackTrace(ucontext_t const&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Common/StackTrace.cpp:208 -DB::(anonymous namespace)::writeTraceInfo(DB::TimerType, int, siginfo_t*, void*) [clone .isra.0] - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/IO/BufferBase.h:99 - - -__lll_lock_wait - -pthread_mutex_lock - -DB::MergeTreeReaderStream::loadMarks() - /usr/local/include/c++/9.1.0/bits/std_mutex.h:103 -DB::MergeTreeReaderStream::MergeTreeReaderStream(std::__cxx11::basic_string, std::allocator > const&, std::__cxx11::basic_string, std::allocator > const&, unsigned long, std::vector > const&, DB::MarkCache*, bool, DB::UncompressedCache*, unsigned long, unsigned long, unsigned long, DB::MergeTreeIndexGranularityInfo const*, std::function const&, int) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeReaderStream.cpp:107 -std::_Function_handler > const&), DB::MergeTreeReader::addStreams(std::__cxx11::basic_string, std::allocator > const&, DB::IDataType const&, std::function const&, int)::{lambda(std::vector > const&)#1}>::_M_invoke(std::_Any_data const&, std::vector > const&) - /usr/local/include/c++/9.1.0/bits/unique_ptr.h:147 -DB::MergeTreeReader::addStreams(std::__cxx11::basic_string, std::allocator > const&, DB::IDataType const&, std::function const&, int) - /usr/local/include/c++/9.1.0/bits/stl_vector.h:677 -DB::MergeTreeReader::MergeTreeReader(std::__cxx11::basic_string, std::allocator > const&, std::shared_ptr const&, DB::NamesAndTypesList const&, DB::UncompressedCache*, DB::MarkCache*, bool, DB::MergeTreeData const&, std::vector > const&, unsigned long, unsigned long, std::map, std::allocator >, double, std::less, std::allocator > >, std::allocator, std::allocator > const, double> > > const&, std::function const&, int) - /usr/local/include/c++/9.1.0/bits/stl_list.h:303 -DB::MergeTreeThreadSelectBlockInputStream::getNewTask() - /usr/local/include/c++/9.1.0/bits/std_function.h:259 -DB::MergeTreeBaseSelectBlockInputStream::readImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp:54 -DB::IBlockInputStream::read() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::ExpressionBlockInputStream::readImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ExpressionBlockInputStream.cpp:34 -DB::IBlockInputStream::read() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::PartialSortingBlockInputStream::readImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/PartialSortingBlockInputStream.cpp:13 -DB::IBlockInputStream::read() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::ParallelInputsProcessor::loop(unsigned long) - /usr/local/include/c++/9.1.0/bits/atomic_base.h:419 -DB::ParallelInputsProcessor::thread(std::shared_ptr, unsigned long) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ParallelInputsProcessor.h:215 -ThreadFromGlobalPool::ThreadFromGlobalPool::*)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*, std::shared_ptr, unsigned long&>(void (DB::ParallelInputsProcessor::*&&)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*&&, std::shared_ptr&&, unsigned long&)::{lambda()#1}::operator()() const - /usr/local/include/c++/9.1.0/bits/shared_ptr_base.h:729 -ThreadPoolImpl::worker(std::_List_iterator) - /usr/local/include/c++/9.1.0/bits/unique_lock.h:69 -execute_native_thread_routine - /home/milovidov/ClickHouse/ci/workspace/gcc/gcc-build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/unique_ptr.h:81 -start_thread - -__clone - - -Row 9: -─────── -count(): 779 -sym: StackTrace::StackTrace(ucontext_t const&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Common/StackTrace.cpp:208 -DB::(anonymous namespace)::writeTraceInfo(DB::TimerType, int, siginfo_t*, void*) [clone .isra.0] - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/IO/BufferBase.h:99 - - -void DB::deserializeBinarySSE2<4>(DB::PODArray, 15ul, 16ul>&, DB::PODArray, 15ul, 16ul>&, DB::ReadBuffer&, unsigned long) - /usr/local/lib/gcc/x86_64-pc-linux-gnu/9.1.0/include/emmintrin.h:727 -DB::DataTypeString::deserializeBinaryBulk(DB::IColumn&, DB::ReadBuffer&, unsigned long, double) const - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataTypes/DataTypeString.cpp:202 -DB::MergeTreeReader::readData(std::__cxx11::basic_string, std::allocator > const&, DB::IDataType const&, DB::IColumn&, unsigned long, bool, unsigned long, bool) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeReader.cpp:232 -DB::MergeTreeReader::readRows(unsigned long, bool, unsigned long, DB::Block&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeReader.cpp:111 -DB::MergeTreeRangeReader::DelayedStream::finalize(DB::Block&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:35 -DB::MergeTreeRangeReader::startReadingChain(unsigned long, std::vector >&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:219 -DB::MergeTreeRangeReader::read(unsigned long, std::vector >&) - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::MergeTreeRangeReader::read(unsigned long, std::vector >&) - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::MergeTreeBaseSelectBlockInputStream::readFromPartImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp:158 -DB::MergeTreeBaseSelectBlockInputStream::readImpl() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::IBlockInputStream::read() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::ExpressionBlockInputStream::readImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ExpressionBlockInputStream.cpp:34 -DB::IBlockInputStream::read() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::PartialSortingBlockInputStream::readImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/PartialSortingBlockInputStream.cpp:13 -DB::IBlockInputStream::read() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::ParallelInputsProcessor::loop(unsigned long) - /usr/local/include/c++/9.1.0/bits/atomic_base.h:419 -DB::ParallelInputsProcessor::thread(std::shared_ptr, unsigned long) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ParallelInputsProcessor.h:215 -ThreadFromGlobalPool::ThreadFromGlobalPool::*)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*, std::shared_ptr, unsigned long&>(void (DB::ParallelInputsProcessor::*&&)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*&&, std::shared_ptr&&, unsigned long&)::{lambda()#1}::operator()() const - /usr/local/include/c++/9.1.0/bits/shared_ptr_base.h:729 -ThreadPoolImpl::worker(std::_List_iterator) - /usr/local/include/c++/9.1.0/bits/unique_lock.h:69 -execute_native_thread_routine - /home/milovidov/ClickHouse/ci/workspace/gcc/gcc-build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/unique_ptr.h:81 -start_thread - -__clone - - -Row 10: -─────── -count(): 666 -sym: StackTrace::StackTrace(ucontext_t const&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Common/StackTrace.cpp:208 -DB::(anonymous namespace)::writeTraceInfo(DB::TimerType, int, siginfo_t*, void*) [clone .isra.0] - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/IO/BufferBase.h:99 - - -void DB::deserializeBinarySSE2<4>(DB::PODArray, 15ul, 16ul>&, DB::PODArray, 15ul, 16ul>&, DB::ReadBuffer&, unsigned long) - /usr/local/lib/gcc/x86_64-pc-linux-gnu/9.1.0/include/emmintrin.h:727 -DB::DataTypeString::deserializeBinaryBulk(DB::IColumn&, DB::ReadBuffer&, unsigned long, double) const - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataTypes/DataTypeString.cpp:202 -DB::MergeTreeReader::readData(std::__cxx11::basic_string, std::allocator > const&, DB::IDataType const&, DB::IColumn&, unsigned long, bool, unsigned long, bool) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeReader.cpp:232 -DB::MergeTreeReader::readRows(unsigned long, bool, unsigned long, DB::Block&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeReader.cpp:111 -DB::MergeTreeRangeReader::DelayedStream::finalize(DB::Block&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:35 -DB::MergeTreeRangeReader::startReadingChain(unsigned long, std::vector >&) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:219 -DB::MergeTreeRangeReader::read(unsigned long, std::vector >&) - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::MergeTreeRangeReader::read(unsigned long, std::vector >&) - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::MergeTreeBaseSelectBlockInputStream::readFromPartImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp:158 -DB::MergeTreeBaseSelectBlockInputStream::readImpl() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::IBlockInputStream::read() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::ExpressionBlockInputStream::readImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ExpressionBlockInputStream.cpp:34 -DB::IBlockInputStream::read() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::PartialSortingBlockInputStream::readImpl() - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/PartialSortingBlockInputStream.cpp:13 -DB::IBlockInputStream::read() - /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 -DB::ParallelInputsProcessor::loop(unsigned long) - /usr/local/include/c++/9.1.0/bits/atomic_base.h:419 -DB::ParallelInputsProcessor::thread(std::shared_ptr, unsigned long) - /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ParallelInputsProcessor.h:215 -ThreadFromGlobalPool::ThreadFromGlobalPool::*)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*, std::shared_ptr, unsigned long&>(void (DB::ParallelInputsProcessor::*&&)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*&&, std::shared_ptr&&, unsigned long&)::{lambda()#1}::operator()() const - /usr/local/include/c++/9.1.0/bits/shared_ptr_base.h:729 -ThreadPoolImpl::worker(std::_List_iterator) - /usr/local/include/c++/9.1.0/bits/unique_lock.h:69 -execute_native_thread_routine - /home/milovidov/ClickHouse/ci/workspace/gcc/gcc-build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/unique_ptr.h:81 -start_thread - -__clone +{% include "operations/performance/sampling_query_profiler_example_result.txt" %} ``` diff --git a/docs/en/operations/performance/sampling_query_profiler_example_result.txt b/docs/en/operations/performance/sampling_query_profiler_example_result.txt new file mode 100644 index 00000000000..8e4e0e0fd70 --- /dev/null +++ b/docs/en/operations/performance/sampling_query_profiler_example_result.txt @@ -0,0 +1,556 @@ +Row 1: +────── +count(): 6344 +sym: StackTrace::StackTrace(ucontext_t const&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Common/StackTrace.cpp:208 +DB::(anonymous namespace)::writeTraceInfo(DB::TimerType, int, siginfo_t*, void*) [clone .isra.0] + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/IO/BufferBase.h:99 + + +read + +DB::ReadBufferFromFileDescriptor::nextImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/IO/ReadBufferFromFileDescriptor.cpp:56 +DB::CompressedReadBufferBase::readCompressedData(unsigned long&, unsigned long&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/IO/ReadBuffer.h:54 +DB::CompressedReadBufferFromFile::nextImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Compression/CompressedReadBufferFromFile.cpp:22 +DB::CompressedReadBufferFromFile::seek(unsigned long, unsigned long) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Compression/CompressedReadBufferFromFile.cpp:63 +DB::MergeTreeReaderStream::seekToMark(unsigned long) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeReaderStream.cpp:200 +std::_Function_handler > const&), DB::MergeTreeReader::readData(std::__cxx11::basic_string, std::allocator > const&, DB::IDataType const&, DB::IColumn&, unsigned long, bool, unsigned long, bool)::{lambda(bool)#1}::operator()(bool) const::{lambda(std::vector > const&)#1}>::_M_invoke(std::_Any_data const&, std::vector > const&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeReader.cpp:212 +DB::IDataType::deserializeBinaryBulkWithMultipleStreams(DB::IColumn&, unsigned long, DB::IDataType::DeserializeBinaryBulkSettings&, std::shared_ptr&) const + /usr/local/include/c++/9.1.0/bits/std_function.h:690 +DB::MergeTreeReader::readData(std::__cxx11::basic_string, std::allocator > const&, DB::IDataType const&, DB::IColumn&, unsigned long, bool, unsigned long, bool) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeReader.cpp:232 +DB::MergeTreeReader::readRows(unsigned long, bool, unsigned long, DB::Block&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeReader.cpp:111 +DB::MergeTreeRangeReader::DelayedStream::finalize(DB::Block&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:35 +DB::MergeTreeRangeReader::continueReadingChain(DB::MergeTreeRangeReader::ReadResult&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:219 +DB::MergeTreeRangeReader::read(unsigned long, std::vector >&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:487 +DB::MergeTreeBaseSelectBlockInputStream::readFromPartImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp:158 +DB::MergeTreeBaseSelectBlockInputStream::readImpl() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::IBlockInputStream::read() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::ExpressionBlockInputStream::readImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ExpressionBlockInputStream.cpp:34 +DB::IBlockInputStream::read() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::PartialSortingBlockInputStream::readImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/PartialSortingBlockInputStream.cpp:13 +DB::IBlockInputStream::read() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::ParallelInputsProcessor::loop(unsigned long) + /usr/local/include/c++/9.1.0/bits/atomic_base.h:419 +DB::ParallelInputsProcessor::thread(std::shared_ptr, unsigned long) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ParallelInputsProcessor.h:215 +ThreadFromGlobalPool::ThreadFromGlobalPool::*)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*, std::shared_ptr, unsigned long&>(void (DB::ParallelInputsProcessor::*&&)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*&&, std::shared_ptr&&, unsigned long&)::{lambda()#1}::operator()() const + /usr/local/include/c++/9.1.0/bits/shared_ptr_base.h:729 +ThreadPoolImpl::worker(std::_List_iterator) + /usr/local/include/c++/9.1.0/bits/unique_lock.h:69 +execute_native_thread_routine + /home/milovidov/ClickHouse/ci/workspace/gcc/gcc-build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/unique_ptr.h:81 +start_thread + +__clone + + +Row 2: +────── +count(): 3295 +sym: StackTrace::StackTrace(ucontext_t const&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Common/StackTrace.cpp:208 +DB::(anonymous namespace)::writeTraceInfo(DB::TimerType, int, siginfo_t*, void*) [clone .isra.0] + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/IO/BufferBase.h:99 + + +__pthread_cond_wait + +std::condition_variable::wait(std::unique_lock&) + /home/milovidov/ClickHouse/ci/workspace/gcc/gcc-build/x86_64-pc-linux-gnu/libstdc++-v3/src/c++11/../../../../../gcc-9.1.0/libstdc++-v3/src/c++11/condition_variable.cc:55 +Poco::Semaphore::wait() + /home/milovidov/ClickHouse/build_gcc9/../contrib/poco/Foundation/src/Semaphore.cpp:61 +DB::UnionBlockInputStream::readImpl() + /usr/local/include/c++/9.1.0/x86_64-pc-linux-gnu/bits/gthr-default.h:748 +DB::IBlockInputStream::read() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::MergeSortingBlockInputStream::readImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Core/Block.h:90 +DB::IBlockInputStream::read() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::ExpressionBlockInputStream::readImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ExpressionBlockInputStream.cpp:34 +DB::IBlockInputStream::read() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::LimitBlockInputStream::readImpl() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::IBlockInputStream::read() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::AsynchronousBlockInputStream::calculate() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +std::_Function_handler::_M_invoke(std::_Any_data const&) + /usr/local/include/c++/9.1.0/bits/atomic_base.h:551 +ThreadPoolImpl::worker(std::_List_iterator) + /usr/local/include/c++/9.1.0/x86_64-pc-linux-gnu/bits/gthr-default.h:748 +ThreadFromGlobalPool::ThreadFromGlobalPool::scheduleImpl(std::function, int, std::optional)::{lambda()#3}>(ThreadPoolImpl::scheduleImpl(std::function, int, std::optional)::{lambda()#3}&&)::{lambda()#1}::operator()() const + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Common/ThreadPool.h:146 +ThreadPoolImpl::worker(std::_List_iterator) + /usr/local/include/c++/9.1.0/bits/unique_lock.h:69 +execute_native_thread_routine + /home/milovidov/ClickHouse/ci/workspace/gcc/gcc-build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/unique_ptr.h:81 +start_thread + +__clone + + +Row 3: +────── +count(): 1978 +sym: StackTrace::StackTrace(ucontext_t const&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Common/StackTrace.cpp:208 +DB::(anonymous namespace)::writeTraceInfo(DB::TimerType, int, siginfo_t*, void*) [clone .isra.0] + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/IO/BufferBase.h:99 + + +DB::VolnitskyBase >::search(unsigned char const*, unsigned long) const + /opt/milovidov/ClickHouse/build_gcc9/dbms/programs/clickhouse +DB::MatchImpl::vector_constant(DB::PODArray, 15ul, 16ul> const&, DB::PODArray, 15ul, 16ul> const&, std::__cxx11::basic_string, std::allocator > const&, DB::PODArray, 15ul, 16ul>&) + /opt/milovidov/ClickHouse/build_gcc9/dbms/programs/clickhouse +DB::FunctionsStringSearch, DB::NameLike>::executeImpl(DB::Block&, std::vector > const&, unsigned long, unsigned long) + /opt/milovidov/ClickHouse/build_gcc9/dbms/programs/clickhouse +DB::PreparedFunctionImpl::execute(DB::Block&, std::vector > const&, unsigned long, unsigned long, bool) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Functions/IFunction.cpp:464 +DB::ExpressionAction::execute(DB::Block&, bool) const + /usr/local/include/c++/9.1.0/bits/stl_vector.h:677 +DB::ExpressionActions::execute(DB::Block&, bool) const + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Interpreters/ExpressionActions.cpp:739 +DB::MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(DB::MergeTreeRangeReader::ReadResult&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:660 +DB::MergeTreeRangeReader::read(unsigned long, std::vector >&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:546 +DB::MergeTreeRangeReader::read(unsigned long, std::vector >&) + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::MergeTreeBaseSelectBlockInputStream::readFromPartImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp:158 +DB::MergeTreeBaseSelectBlockInputStream::readImpl() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::IBlockInputStream::read() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::ExpressionBlockInputStream::readImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ExpressionBlockInputStream.cpp:34 +DB::IBlockInputStream::read() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::PartialSortingBlockInputStream::readImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/PartialSortingBlockInputStream.cpp:13 +DB::IBlockInputStream::read() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::ParallelInputsProcessor::loop(unsigned long) + /usr/local/include/c++/9.1.0/bits/atomic_base.h:419 +DB::ParallelInputsProcessor::thread(std::shared_ptr, unsigned long) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ParallelInputsProcessor.h:215 +ThreadFromGlobalPool::ThreadFromGlobalPool::*)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*, std::shared_ptr, unsigned long&>(void (DB::ParallelInputsProcessor::*&&)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*&&, std::shared_ptr&&, unsigned long&)::{lambda()#1}::operator()() const + /usr/local/include/c++/9.1.0/bits/shared_ptr_base.h:729 +ThreadPoolImpl::worker(std::_List_iterator) + /usr/local/include/c++/9.1.0/bits/unique_lock.h:69 +execute_native_thread_routine + /home/milovidov/ClickHouse/ci/workspace/gcc/gcc-build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/unique_ptr.h:81 +start_thread + +__clone + + +Row 4: +────── +count(): 1913 +sym: StackTrace::StackTrace(ucontext_t const&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Common/StackTrace.cpp:208 +DB::(anonymous namespace)::writeTraceInfo(DB::TimerType, int, siginfo_t*, void*) [clone .isra.0] + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/IO/BufferBase.h:99 + + +DB::VolnitskyBase >::search(unsigned char const*, unsigned long) const + /opt/milovidov/ClickHouse/build_gcc9/dbms/programs/clickhouse +DB::MatchImpl::vector_constant(DB::PODArray, 15ul, 16ul> const&, DB::PODArray, 15ul, 16ul> const&, std::__cxx11::basic_string, std::allocator > const&, DB::PODArray, 15ul, 16ul>&) + /opt/milovidov/ClickHouse/build_gcc9/dbms/programs/clickhouse +DB::FunctionsStringSearch, DB::NameLike>::executeImpl(DB::Block&, std::vector > const&, unsigned long, unsigned long) + /opt/milovidov/ClickHouse/build_gcc9/dbms/programs/clickhouse +DB::PreparedFunctionImpl::execute(DB::Block&, std::vector > const&, unsigned long, unsigned long, bool) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Functions/IFunction.cpp:464 +DB::ExpressionAction::execute(DB::Block&, bool) const + /usr/local/include/c++/9.1.0/bits/stl_vector.h:677 +DB::ExpressionActions::execute(DB::Block&, bool) const + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Interpreters/ExpressionActions.cpp:739 +DB::MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(DB::MergeTreeRangeReader::ReadResult&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:660 +DB::MergeTreeRangeReader::read(unsigned long, std::vector >&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:546 +DB::MergeTreeRangeReader::read(unsigned long, std::vector >&) + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::MergeTreeBaseSelectBlockInputStream::readFromPartImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp:158 +DB::MergeTreeBaseSelectBlockInputStream::readImpl() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::IBlockInputStream::read() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::ExpressionBlockInputStream::readImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ExpressionBlockInputStream.cpp:34 +DB::IBlockInputStream::read() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::PartialSortingBlockInputStream::readImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/PartialSortingBlockInputStream.cpp:13 +DB::IBlockInputStream::read() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::ParallelInputsProcessor::loop(unsigned long) + /usr/local/include/c++/9.1.0/bits/atomic_base.h:419 +DB::ParallelInputsProcessor::thread(std::shared_ptr, unsigned long) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ParallelInputsProcessor.h:215 +ThreadFromGlobalPool::ThreadFromGlobalPool::*)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*, std::shared_ptr, unsigned long&>(void (DB::ParallelInputsProcessor::*&&)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*&&, std::shared_ptr&&, unsigned long&)::{lambda()#1}::operator()() const + /usr/local/include/c++/9.1.0/bits/shared_ptr_base.h:729 +ThreadPoolImpl::worker(std::_List_iterator) + /usr/local/include/c++/9.1.0/bits/unique_lock.h:69 +execute_native_thread_routine + /home/milovidov/ClickHouse/ci/workspace/gcc/gcc-build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/unique_ptr.h:81 +start_thread + +__clone + + +Row 5: +────── +count(): 1672 +sym: StackTrace::StackTrace(ucontext_t const&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Common/StackTrace.cpp:208 +DB::(anonymous namespace)::writeTraceInfo(DB::TimerType, int, siginfo_t*, void*) [clone .isra.0] + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/IO/BufferBase.h:99 + + +DB::VolnitskyBase >::search(unsigned char const*, unsigned long) const + /opt/milovidov/ClickHouse/build_gcc9/dbms/programs/clickhouse +DB::MatchImpl::vector_constant(DB::PODArray, 15ul, 16ul> const&, DB::PODArray, 15ul, 16ul> const&, std::__cxx11::basic_string, std::allocator > const&, DB::PODArray, 15ul, 16ul>&) + /opt/milovidov/ClickHouse/build_gcc9/dbms/programs/clickhouse +DB::FunctionsStringSearch, DB::NameLike>::executeImpl(DB::Block&, std::vector > const&, unsigned long, unsigned long) + /opt/milovidov/ClickHouse/build_gcc9/dbms/programs/clickhouse +DB::PreparedFunctionImpl::execute(DB::Block&, std::vector > const&, unsigned long, unsigned long, bool) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Functions/IFunction.cpp:464 +DB::ExpressionAction::execute(DB::Block&, bool) const + /usr/local/include/c++/9.1.0/bits/stl_vector.h:677 +DB::ExpressionActions::execute(DB::Block&, bool) const + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Interpreters/ExpressionActions.cpp:739 +DB::MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(DB::MergeTreeRangeReader::ReadResult&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:660 +DB::MergeTreeRangeReader::read(unsigned long, std::vector >&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:546 +DB::MergeTreeRangeReader::read(unsigned long, std::vector >&) + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::MergeTreeBaseSelectBlockInputStream::readFromPartImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp:158 +DB::MergeTreeBaseSelectBlockInputStream::readImpl() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::IBlockInputStream::read() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::ExpressionBlockInputStream::readImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ExpressionBlockInputStream.cpp:34 +DB::IBlockInputStream::read() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::PartialSortingBlockInputStream::readImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/PartialSortingBlockInputStream.cpp:13 +DB::IBlockInputStream::read() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::ParallelInputsProcessor::loop(unsigned long) + /usr/local/include/c++/9.1.0/bits/atomic_base.h:419 +DB::ParallelInputsProcessor::thread(std::shared_ptr, unsigned long) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ParallelInputsProcessor.h:215 +ThreadFromGlobalPool::ThreadFromGlobalPool::*)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*, std::shared_ptr, unsigned long&>(void (DB::ParallelInputsProcessor::*&&)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*&&, std::shared_ptr&&, unsigned long&)::{lambda()#1}::operator()() const + /usr/local/include/c++/9.1.0/bits/shared_ptr_base.h:729 +ThreadPoolImpl::worker(std::_List_iterator) + /usr/local/include/c++/9.1.0/bits/unique_lock.h:69 +execute_native_thread_routine + /home/milovidov/ClickHouse/ci/workspace/gcc/gcc-build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/unique_ptr.h:81 +start_thread + +__clone + + +Row 6: +────── +count(): 1531 +sym: StackTrace::StackTrace(ucontext_t const&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Common/StackTrace.cpp:208 +DB::(anonymous namespace)::writeTraceInfo(DB::TimerType, int, siginfo_t*, void*) [clone .isra.0] + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/IO/BufferBase.h:99 + + +read + +DB::ReadBufferFromFileDescriptor::nextImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/IO/ReadBufferFromFileDescriptor.cpp:56 +DB::CompressedReadBufferBase::readCompressedData(unsigned long&, unsigned long&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/IO/ReadBuffer.h:54 +DB::CompressedReadBufferFromFile::nextImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Compression/CompressedReadBufferFromFile.cpp:22 +void DB::deserializeBinarySSE2<4>(DB::PODArray, 15ul, 16ul>&, DB::PODArray, 15ul, 16ul>&, DB::ReadBuffer&, unsigned long) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/IO/ReadBuffer.h:53 +DB::DataTypeString::deserializeBinaryBulk(DB::IColumn&, DB::ReadBuffer&, unsigned long, double) const + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataTypes/DataTypeString.cpp:202 +DB::MergeTreeReader::readData(std::__cxx11::basic_string, std::allocator > const&, DB::IDataType const&, DB::IColumn&, unsigned long, bool, unsigned long, bool) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeReader.cpp:232 +DB::MergeTreeReader::readRows(unsigned long, bool, unsigned long, DB::Block&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeReader.cpp:111 +DB::MergeTreeRangeReader::DelayedStream::finalize(DB::Block&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:35 +DB::MergeTreeRangeReader::startReadingChain(unsigned long, std::vector >&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:219 +DB::MergeTreeRangeReader::read(unsigned long, std::vector >&) + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::MergeTreeRangeReader::read(unsigned long, std::vector >&) + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::MergeTreeBaseSelectBlockInputStream::readFromPartImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp:158 +DB::MergeTreeBaseSelectBlockInputStream::readImpl() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::IBlockInputStream::read() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::ExpressionBlockInputStream::readImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ExpressionBlockInputStream.cpp:34 +DB::IBlockInputStream::read() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::PartialSortingBlockInputStream::readImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/PartialSortingBlockInputStream.cpp:13 +DB::IBlockInputStream::read() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::ParallelInputsProcessor::loop(unsigned long) + /usr/local/include/c++/9.1.0/bits/atomic_base.h:419 +DB::ParallelInputsProcessor::thread(std::shared_ptr, unsigned long) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ParallelInputsProcessor.h:215 +ThreadFromGlobalPool::ThreadFromGlobalPool::*)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*, std::shared_ptr, unsigned long&>(void (DB::ParallelInputsProcessor::*&&)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*&&, std::shared_ptr&&, unsigned long&)::{lambda()#1}::operator()() const + /usr/local/include/c++/9.1.0/bits/shared_ptr_base.h:729 +ThreadPoolImpl::worker(std::_List_iterator) + /usr/local/include/c++/9.1.0/bits/unique_lock.h:69 +execute_native_thread_routine + /home/milovidov/ClickHouse/ci/workspace/gcc/gcc-build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/unique_ptr.h:81 +start_thread + +__clone + + +Row 7: +────── +count(): 1034 +sym: StackTrace::StackTrace(ucontext_t const&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Common/StackTrace.cpp:208 +DB::(anonymous namespace)::writeTraceInfo(DB::TimerType, int, siginfo_t*, void*) [clone .isra.0] + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/IO/BufferBase.h:99 + + +DB::VolnitskyBase >::search(unsigned char const*, unsigned long) const + /opt/milovidov/ClickHouse/build_gcc9/dbms/programs/clickhouse +DB::MatchImpl::vector_constant(DB::PODArray, 15ul, 16ul> const&, DB::PODArray, 15ul, 16ul> const&, std::__cxx11::basic_string, std::allocator > const&, DB::PODArray, 15ul, 16ul>&) + /opt/milovidov/ClickHouse/build_gcc9/dbms/programs/clickhouse +DB::FunctionsStringSearch, DB::NameLike>::executeImpl(DB::Block&, std::vector > const&, unsigned long, unsigned long) + /opt/milovidov/ClickHouse/build_gcc9/dbms/programs/clickhouse +DB::PreparedFunctionImpl::execute(DB::Block&, std::vector > const&, unsigned long, unsigned long, bool) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Functions/IFunction.cpp:464 +DB::ExpressionAction::execute(DB::Block&, bool) const + /usr/local/include/c++/9.1.0/bits/stl_vector.h:677 +DB::ExpressionActions::execute(DB::Block&, bool) const + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Interpreters/ExpressionActions.cpp:739 +DB::MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(DB::MergeTreeRangeReader::ReadResult&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:660 +DB::MergeTreeRangeReader::read(unsigned long, std::vector >&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:546 +DB::MergeTreeRangeReader::read(unsigned long, std::vector >&) + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::MergeTreeBaseSelectBlockInputStream::readFromPartImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp:158 +DB::MergeTreeBaseSelectBlockInputStream::readImpl() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::IBlockInputStream::read() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::ExpressionBlockInputStream::readImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ExpressionBlockInputStream.cpp:34 +DB::IBlockInputStream::read() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::PartialSortingBlockInputStream::readImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/PartialSortingBlockInputStream.cpp:13 +DB::IBlockInputStream::read() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::ParallelInputsProcessor::loop(unsigned long) + /usr/local/include/c++/9.1.0/bits/atomic_base.h:419 +DB::ParallelInputsProcessor::thread(std::shared_ptr, unsigned long) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ParallelInputsProcessor.h:215 +ThreadFromGlobalPool::ThreadFromGlobalPool::*)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*, std::shared_ptr, unsigned long&>(void (DB::ParallelInputsProcessor::*&&)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*&&, std::shared_ptr&&, unsigned long&)::{lambda()#1}::operator()() const + /usr/local/include/c++/9.1.0/bits/shared_ptr_base.h:729 +ThreadPoolImpl::worker(std::_List_iterator) + /usr/local/include/c++/9.1.0/bits/unique_lock.h:69 +execute_native_thread_routine + /home/milovidov/ClickHouse/ci/workspace/gcc/gcc-build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/unique_ptr.h:81 +start_thread + +__clone + + +Row 8: +────── +count(): 989 +sym: StackTrace::StackTrace(ucontext_t const&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Common/StackTrace.cpp:208 +DB::(anonymous namespace)::writeTraceInfo(DB::TimerType, int, siginfo_t*, void*) [clone .isra.0] + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/IO/BufferBase.h:99 + + +__lll_lock_wait + +pthread_mutex_lock + +DB::MergeTreeReaderStream::loadMarks() + /usr/local/include/c++/9.1.0/bits/std_mutex.h:103 +DB::MergeTreeReaderStream::MergeTreeReaderStream(std::__cxx11::basic_string, std::allocator > const&, std::__cxx11::basic_string, std::allocator > const&, unsigned long, std::vector > const&, DB::MarkCache*, bool, DB::UncompressedCache*, unsigned long, unsigned long, unsigned long, DB::MergeTreeIndexGranularityInfo const*, std::function const&, int) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeReaderStream.cpp:107 +std::_Function_handler > const&), DB::MergeTreeReader::addStreams(std::__cxx11::basic_string, std::allocator > const&, DB::IDataType const&, std::function const&, int)::{lambda(std::vector > const&)#1}>::_M_invoke(std::_Any_data const&, std::vector > const&) + /usr/local/include/c++/9.1.0/bits/unique_ptr.h:147 +DB::MergeTreeReader::addStreams(std::__cxx11::basic_string, std::allocator > const&, DB::IDataType const&, std::function const&, int) + /usr/local/include/c++/9.1.0/bits/stl_vector.h:677 +DB::MergeTreeReader::MergeTreeReader(std::__cxx11::basic_string, std::allocator > const&, std::shared_ptr const&, DB::NamesAndTypesList const&, DB::UncompressedCache*, DB::MarkCache*, bool, DB::MergeTreeData const&, std::vector > const&, unsigned long, unsigned long, std::map, std::allocator >, double, std::less, std::allocator > >, std::allocator, std::allocator > const, double> > > const&, std::function const&, int) + /usr/local/include/c++/9.1.0/bits/stl_list.h:303 +DB::MergeTreeThreadSelectBlockInputStream::getNewTask() + /usr/local/include/c++/9.1.0/bits/std_function.h:259 +DB::MergeTreeBaseSelectBlockInputStream::readImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp:54 +DB::IBlockInputStream::read() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::ExpressionBlockInputStream::readImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ExpressionBlockInputStream.cpp:34 +DB::IBlockInputStream::read() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::PartialSortingBlockInputStream::readImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/PartialSortingBlockInputStream.cpp:13 +DB::IBlockInputStream::read() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::ParallelInputsProcessor::loop(unsigned long) + /usr/local/include/c++/9.1.0/bits/atomic_base.h:419 +DB::ParallelInputsProcessor::thread(std::shared_ptr, unsigned long) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ParallelInputsProcessor.h:215 +ThreadFromGlobalPool::ThreadFromGlobalPool::*)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*, std::shared_ptr, unsigned long&>(void (DB::ParallelInputsProcessor::*&&)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*&&, std::shared_ptr&&, unsigned long&)::{lambda()#1}::operator()() const + /usr/local/include/c++/9.1.0/bits/shared_ptr_base.h:729 +ThreadPoolImpl::worker(std::_List_iterator) + /usr/local/include/c++/9.1.0/bits/unique_lock.h:69 +execute_native_thread_routine + /home/milovidov/ClickHouse/ci/workspace/gcc/gcc-build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/unique_ptr.h:81 +start_thread + +__clone + + +Row 9: +─────── +count(): 779 +sym: StackTrace::StackTrace(ucontext_t const&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Common/StackTrace.cpp:208 +DB::(anonymous namespace)::writeTraceInfo(DB::TimerType, int, siginfo_t*, void*) [clone .isra.0] + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/IO/BufferBase.h:99 + + +void DB::deserializeBinarySSE2<4>(DB::PODArray, 15ul, 16ul>&, DB::PODArray, 15ul, 16ul>&, DB::ReadBuffer&, unsigned long) + /usr/local/lib/gcc/x86_64-pc-linux-gnu/9.1.0/include/emmintrin.h:727 +DB::DataTypeString::deserializeBinaryBulk(DB::IColumn&, DB::ReadBuffer&, unsigned long, double) const + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataTypes/DataTypeString.cpp:202 +DB::MergeTreeReader::readData(std::__cxx11::basic_string, std::allocator > const&, DB::IDataType const&, DB::IColumn&, unsigned long, bool, unsigned long, bool) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeReader.cpp:232 +DB::MergeTreeReader::readRows(unsigned long, bool, unsigned long, DB::Block&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeReader.cpp:111 +DB::MergeTreeRangeReader::DelayedStream::finalize(DB::Block&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:35 +DB::MergeTreeRangeReader::startReadingChain(unsigned long, std::vector >&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:219 +DB::MergeTreeRangeReader::read(unsigned long, std::vector >&) + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::MergeTreeRangeReader::read(unsigned long, std::vector >&) + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::MergeTreeBaseSelectBlockInputStream::readFromPartImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp:158 +DB::MergeTreeBaseSelectBlockInputStream::readImpl() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::IBlockInputStream::read() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::ExpressionBlockInputStream::readImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ExpressionBlockInputStream.cpp:34 +DB::IBlockInputStream::read() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::PartialSortingBlockInputStream::readImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/PartialSortingBlockInputStream.cpp:13 +DB::IBlockInputStream::read() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::ParallelInputsProcessor::loop(unsigned long) + /usr/local/include/c++/9.1.0/bits/atomic_base.h:419 +DB::ParallelInputsProcessor::thread(std::shared_ptr, unsigned long) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ParallelInputsProcessor.h:215 +ThreadFromGlobalPool::ThreadFromGlobalPool::*)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*, std::shared_ptr, unsigned long&>(void (DB::ParallelInputsProcessor::*&&)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*&&, std::shared_ptr&&, unsigned long&)::{lambda()#1}::operator()() const + /usr/local/include/c++/9.1.0/bits/shared_ptr_base.h:729 +ThreadPoolImpl::worker(std::_List_iterator) + /usr/local/include/c++/9.1.0/bits/unique_lock.h:69 +execute_native_thread_routine + /home/milovidov/ClickHouse/ci/workspace/gcc/gcc-build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/unique_ptr.h:81 +start_thread + +__clone + + +Row 10: +─────── +count(): 666 +sym: StackTrace::StackTrace(ucontext_t const&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Common/StackTrace.cpp:208 +DB::(anonymous namespace)::writeTraceInfo(DB::TimerType, int, siginfo_t*, void*) [clone .isra.0] + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/IO/BufferBase.h:99 + + +void DB::deserializeBinarySSE2<4>(DB::PODArray, 15ul, 16ul>&, DB::PODArray, 15ul, 16ul>&, DB::ReadBuffer&, unsigned long) + /usr/local/lib/gcc/x86_64-pc-linux-gnu/9.1.0/include/emmintrin.h:727 +DB::DataTypeString::deserializeBinaryBulk(DB::IColumn&, DB::ReadBuffer&, unsigned long, double) const + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataTypes/DataTypeString.cpp:202 +DB::MergeTreeReader::readData(std::__cxx11::basic_string, std::allocator > const&, DB::IDataType const&, DB::IColumn&, unsigned long, bool, unsigned long, bool) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeReader.cpp:232 +DB::MergeTreeReader::readRows(unsigned long, bool, unsigned long, DB::Block&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeReader.cpp:111 +DB::MergeTreeRangeReader::DelayedStream::finalize(DB::Block&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:35 +DB::MergeTreeRangeReader::startReadingChain(unsigned long, std::vector >&) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp:219 +DB::MergeTreeRangeReader::read(unsigned long, std::vector >&) + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::MergeTreeRangeReader::read(unsigned long, std::vector >&) + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::MergeTreeBaseSelectBlockInputStream::readFromPartImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp:158 +DB::MergeTreeBaseSelectBlockInputStream::readImpl() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::IBlockInputStream::read() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::ExpressionBlockInputStream::readImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ExpressionBlockInputStream.cpp:34 +DB::IBlockInputStream::read() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::PartialSortingBlockInputStream::readImpl() + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/PartialSortingBlockInputStream.cpp:13 +DB::IBlockInputStream::read() + /usr/local/include/c++/9.1.0/bits/stl_vector.h:108 +DB::ParallelInputsProcessor::loop(unsigned long) + /usr/local/include/c++/9.1.0/bits/atomic_base.h:419 +DB::ParallelInputsProcessor::thread(std::shared_ptr, unsigned long) + /home/milovidov/ClickHouse/build_gcc9/../dbms/src/DataStreams/ParallelInputsProcessor.h:215 +ThreadFromGlobalPool::ThreadFromGlobalPool::*)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*, std::shared_ptr, unsigned long&>(void (DB::ParallelInputsProcessor::*&&)(std::shared_ptr, unsigned long), DB::ParallelInputsProcessor*&&, std::shared_ptr&&, unsigned long&)::{lambda()#1}::operator()() const + /usr/local/include/c++/9.1.0/bits/shared_ptr_base.h:729 +ThreadPoolImpl::worker(std::_List_iterator) + /usr/local/include/c++/9.1.0/bits/unique_lock.h:69 +execute_native_thread_routine + /home/milovidov/ClickHouse/ci/workspace/gcc/gcc-build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/unique_ptr.h:81 +start_thread + +__clone diff --git a/docs/en/operations/table_engines/aggregatingmergetree.md b/docs/en/operations/table_engines/aggregatingmergetree.md index f45ded4067c..26d04d072ae 100644 --- a/docs/en/operations/table_engines/aggregatingmergetree.md +++ b/docs/en/operations/table_engines/aggregatingmergetree.md @@ -1,4 +1,3 @@ - # AggregatingMergeTree The engine inherits from [MergeTree](mergetree.md#table_engines-mergetree), altering the logic for data parts merging. ClickHouse replaces all rows with the same primary key (or more accurately, with the same [sorting key](mergetree.md)) with a single row (within a one data part) that stores a combination of states of aggregate functions. @@ -53,7 +52,7 @@ All of the parameters have the same meaning as in `MergeTree`. To insert data, use [INSERT SELECT](../../query_language/insert_into.md) query with aggregate -State- functions. When selecting data from `AggregatingMergeTree` table, use `GROUP BY` clause and the same aggregate functions as when inserting data, but using `-Merge` suffix. -In the results of `SELECT` query the values of `AggregateFunction` type have implementation-specific binary representation for all of the ClickHouse output formats. If dump data into, for example, `TabSeparated` format with `SELECT` query then this dump can be loaded back using `INSERT` query. +In the results of `SELECT` query, the values of `AggregateFunction` type have implementation-specific binary representation for all of the ClickHouse output formats. If dump data into, for example, `TabSeparated` format with `SELECT` query then this dump can be loaded back using `INSERT` query. ## Example of an Aggregated Materialized View @@ -71,7 +70,7 @@ FROM test.visits GROUP BY CounterID, StartDate; ``` -Inserting of data into the `test.visits` table. +Inserting data into the `test.visits` table. ```sql INSERT INTO test.visits ... diff --git a/docs/en/operations/table_engines/collapsingmergetree.md b/docs/en/operations/table_engines/collapsingmergetree.md index 9c50dd959ed..4dc0c3a6df6 100644 --- a/docs/en/operations/table_engines/collapsingmergetree.md +++ b/docs/en/operations/table_engines/collapsingmergetree.md @@ -4,7 +4,7 @@ The engine inherits from [MergeTree](mergetree.md) and adds the logic of rows co `CollapsingMergeTree` asynchronously deletes (collapses) pairs of rows if all of the fields in a sorting key (`ORDER BY`) are equivalent excepting the particular field `Sign` which can have `1` and `-1` values. Rows without a pair are kept. For more details see the [Collapsing](#table_engine-collapsingmergetree-collapsing) section of the document. -The engine may significantly reduce the volume of storage and increase efficiency of `SELECT` query as a consequence. +The engine may significantly reduce the volume of storage and increase the efficiency of `SELECT` query as a consequence. ## Creating a Table @@ -63,7 +63,7 @@ Consider the situation where you need to save continually changing data for some Use the particular column `Sign`. If `Sign = 1` it means that the row is a state of an object, let's call it "state" row. If `Sign = -1` it means the cancellation of the state of an object with the same attributes, let's call it "cancel" row. -For example, we want to calculate how much pages users checked at some site and how long they were there. At some moment of time we write the following row with the state of user activity: +For example, we want to calculate how much pages users checked at some site and how long they were there. At some moment we write the following row with the state of user activity: ```text ┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐ @@ -80,7 +80,7 @@ At some moment later we register the change of user activity and write it with t └─────────────────────┴───────────┴──────────┴──────┘ ``` -The first row cancels the previous state of the object (user). It should copy the sorting key fields of the canceled state excepting `Sign`. +The first row cancels the previous state of the object (user). It should copy the sorting key fields of the cancelled state excepting `Sign`. The second row contains the current state. @@ -100,7 +100,7 @@ Why we need 2 rows for each change read in the [Algorithm](#table_engine-collaps **Peculiar properties of such approach** 1. The program that writes the data should remember the state of an object to be able to cancel it. "Cancel" string should contain copies of the sorting key fields of the "state" string and the opposite `Sign`. It increases the initial size of storage but allows to write the data quickly. -2. Long growing arrays in columns reduce the efficiency of the engine due to load for writing. The more straightforward data, the higher efficiency. +2. Long growing arrays in columns reduce the efficiency of the engine due to load for writing. The more straightforward data, the higher the efficiency. 3. The `SELECT` results depend strongly on the consistency of object changes history. Be accurate when preparing data for inserting. You can get unpredictable results in inconsistent data, for example, negative values for non-negative metrics such as session depth. ### Algorithm {#table_engine-collapsingmergetree-collapsing-algorithm} @@ -110,11 +110,11 @@ When ClickHouse merges data parts, each group of consecutive rows with the same For each resulting data part ClickHouse saves: 1. The first "cancel" and the last "state" rows, if the number of "state" and "cancel" rows matches and the last row is a "state" row. - 2. The last "state" row, if there is more "state" rows than "cancel" rows. - 3. The first "cancel" row, if there is more "cancel" rows than "state" rows. + 2. The last "state" row, if there are more "state" rows than "cancel" rows. + 3. The first "cancel" row, if there are more "cancel" rows than "state" rows. 4. None of the rows, in all other cases. - In addition when there is at least 2 more "state" rows than "cancel" rows, or at least 2 more "cancel" rows then "state" rows, the merge continues, but ClickHouse treats this situation as a logical error and records it in the server log. This error can occur if the same data were inserted more than once. + Also when there are at least 2 more "state" rows than "cancel" rows, or at least 2 more "cancel" rows then "state" rows, the merge continues, but ClickHouse treats this situation as a logical error and records it in the server log. This error can occur if the same data were inserted more than once. Thus, collapsing should not change the results of calculating statistics. Changes gradually collapsed so that in the end only the last state of almost every object left. @@ -123,7 +123,7 @@ The `Sign` is required because the merging algorithm doesn't guarantee that all To finalize collapsing, write a query with `GROUP BY` clause and aggregate functions that account for the sign. For example, to calculate quantity, use `sum(Sign)` instead of `count()`. To calculate the sum of something, use `sum(Sign * x)` instead of `sum(x)`, and so on, and also add `HAVING sum(Sign) > 0`. -The aggregates `count`, `sum` and `avg` could be calculated this way. The aggregate `uniq` could be calculated if an object has at least one state not collapsed. The aggregates `min` and `max` could not be calculated because `CollapsingMergeTree` does not save values history of the collapsed states. +The aggregates `count`, `sum` and `avg` could be calculated this way. The aggregate `uniq` could be calculated if an object has at least one state not collapsed. The aggregates `min` and `max` could not be calculated because `CollapsingMergeTree` does not save the values history of the collapsed states. If you need to extract data without aggregation (for example, to check whether rows are present whose newest values match certain conditions), you can use the `FINAL` modifier for the `FROM` clause. This approach is significantly less efficient. @@ -182,7 +182,7 @@ SELECT * FROM UAct What do we see and where is collapsing? -With two `INSERT` queries, we created 2 data parts. The `SELECT` query was performed in 2 threads, and we got a random order of rows. Collapsing not occurred because there was no merge of the data parts yet. ClickHouse merges data part in an unknown moment of time which we can not predict. +With two `INSERT` queries, we created 2 data parts. The `SELECT` query was performed in 2 threads, and we got a random order of rows. Collapsing not occurred because there was no merge of the data parts yet. ClickHouse merges data part in an unknown moment which we can not predict. Thus we need aggregation: diff --git a/docs/en/operations/table_engines/custom_partitioning_key.md b/docs/en/operations/table_engines/custom_partitioning_key.md index d5e80e30f7e..83e00ddacbe 100644 --- a/docs/en/operations/table_engines/custom_partitioning_key.md +++ b/docs/en/operations/table_engines/custom_partitioning_key.md @@ -2,7 +2,7 @@ Partitioning is available for the [MergeTree](mergetree.md) family tables (including [replicated](replication.md) tables). [Materialized views](materializedview.md) based on MergeTree tables support partitioning, as well. -A partition is a logical combination of records in a table by a specified criterion. You can set a partition by an arbitrary criterion, such as by month, by day, or by event type. Each partition is stored separately in order to simplify manipulations of this data. When accessing the data, ClickHouse uses the smallest subset of partitions possible. +A partition is a logical combination of records in a table by a specified criterion. You can set a partition by an arbitrary criterion, such as by month, by day, or by event type. Each partition is stored separately to simplify manipulations of this data. When accessing the data, ClickHouse uses the smallest subset of partitions possible. The partition is specified in the `PARTITION BY expr` clause when [creating a table](mergetree.md#table_engine-mergetree-creating-a-table). The partition key can be any expression from the table columns. For example, to specify partitioning by month, use the expression `toYYYYMM(date_column)`: diff --git a/docs/en/operations/table_engines/dictionary.md b/docs/en/operations/table_engines/dictionary.md index c3de79f69cf..82311b6dee7 100644 --- a/docs/en/operations/table_engines/dictionary.md +++ b/docs/en/operations/table_engines/dictionary.md @@ -1,4 +1,3 @@ - # Dictionary The `Dictionary` engine displays the [dictionary](../../query_language/dicts/external_dicts.md) data as a ClickHouse table. diff --git a/docs/en/operations/table_engines/distributed.md b/docs/en/operations/table_engines/distributed.md index 39b02310575..3201ecc4785 100644 --- a/docs/en/operations/table_engines/distributed.md +++ b/docs/en/operations/table_engines/distributed.md @@ -1,7 +1,6 @@ - # Distributed -**The Distributed engine does not store data itself**, but allows distributed query processing on multiple servers. +**Tables with Distributed engine do not store any data by themself**, but allow distributed query processing on multiple servers. Reading is automatically parallelized. During a read, the table indexes on remote servers are used, if there are any. The Distributed engine accepts parameters: @@ -23,7 +22,7 @@ Distributed(logs, default, hits[, sharding_key[, policy_name]]) ``` Data will be read from all servers in the 'logs' cluster, from the default.hits table located on every server in the cluster. -Data is not only read, but is partially processed on the remote servers (to the extent that this is possible). +Data is not only read but is partially processed on the remote servers (to the extent that this is possible). For example, for a query with GROUP BY, data will be aggregated on remote servers, and the intermediate states of aggregate functions will be sent to the requestor server. Then data will be further aggregated. Instead of the database name, you can use a constant expression that returns a string. For example: currentDatabase(). @@ -83,7 +82,7 @@ The parameters `host`, `port`, and optionally `user`, `password`, `secure`, `com When specifying replicas, one of the available replicas will be selected for each of the shards when reading. You can configure the algorithm for load balancing (the preference for which replica to access) – see the [load_balancing](../settings/settings.md#settings-load_balancing) setting. If the connection with the server is not established, there will be an attempt to connect with a short timeout. If the connection failed, the next replica will be selected, and so on for all the replicas. If the connection attempt failed for all the replicas, the attempt will be repeated the same way, several times. -This works in favor of resiliency, but does not provide complete fault tolerance: a remote server might accept the connection, but might not work, or work poorly. +This works in favour of resiliency, but does not provide complete fault tolerance: a remote server might accept the connection, but might not work, or work poorly. You can specify just one of the shards (in this case, query processing should be called remote, rather than distributed) or up to any number of shards. In each shard, you can specify from one to any number of replicas. You can specify a different number of replicas for each shard. @@ -99,9 +98,9 @@ The Distributed engine requires writing clusters to the config file. Clusters fr There are two methods for writing data to a cluster: -First, you can define which servers to write which data to and perform the write directly on each shard. In other words, perform INSERT in the tables that the distributed table "looks at". This is the most flexible solution as you can use any sharding scheme, which could be non-trivial due to the requirements of the subject area. This is also the most optimal solution, since data can be written to different shards completely independently. +First, you can define which servers to write which data to and perform the write directly on each shard. In other words, perform INSERT in the tables that the distributed table "looks at". This is the most flexible solution as you can use any sharding scheme, which could be non-trivial due to the requirements of the subject area. This is also the most optimal solution since data can be written to different shards completely independently. -Second, you can perform INSERT in a Distributed table. In this case, the table will distribute the inserted data across servers itself. In order to write to a Distributed table, it must have a sharding key set (the last parameter). In addition, if there is only one shard, the write operation works without specifying the sharding key, since it doesn't mean anything in this case. +Second, you can perform INSERT in a Distributed table. In this case, the table will distribute the inserted data across the servers itself. In order to write to a Distributed table, it must have a sharding key set (the last parameter). In addition, if there is only one shard, the write operation works without specifying the sharding key, since it doesn't mean anything in this case. Each shard can have a weight defined in the config file. By default, the weight is equal to one. Data is distributed across shards in the amount proportional to the shard weight. For example, if there are two shards and the first has a weight of 9 while the second has a weight of 10, the first will be sent 9 / 19 parts of the rows, and the second will be sent 10 / 19. @@ -115,9 +114,9 @@ To select the shard that a row of data is sent to, the sharding expression is an The sharding expression can be any expression from constants and table columns that returns an integer. For example, you can use the expression 'rand()' for random distribution of data, or 'UserID' for distribution by the remainder from dividing the user's ID (then the data of a single user will reside on a single shard, which simplifies running IN and JOIN by users). If one of the columns is not distributed evenly enough, you can wrap it in a hash function: intHash64(UserID). -A simple remainder from division is a limited solution for sharding and isn't always appropriate. It works for medium and large volumes of data (dozens of servers), but not for very large volumes of data (hundreds of servers or more). In the latter case, use the sharding scheme required by the subject area, rather than using entries in Distributed tables. +A simple reminder from the division is a limited solution for sharding and isn't always appropriate. It works for medium and large volumes of data (dozens of servers), but not for very large volumes of data (hundreds of servers or more). In the latter case, use the sharding scheme required by the subject area, rather than using entries in Distributed tables. -SELECT queries are sent to all the shards, and work regardless of how data is distributed across the shards (they can be distributed completely randomly). When you add a new shard, you don't have to transfer the old data to it. You can write new data with a heavier weight – the data will be distributed slightly unevenly, but queries will work correctly and efficiently. +SELECT queries are sent to all the shards and work regardless of how data is distributed across the shards (they can be distributed completely randomly). When you add a new shard, you don't have to transfer the old data to it. You can write new data with a heavier weight – the data will be distributed slightly unevenly, but queries will work correctly and efficiently. You should be concerned about the sharding scheme in the following cases: diff --git a/docs/en/operations/table_engines/external_data.md b/docs/en/operations/table_engines/external_data.md index 12fc34f516e..ae5485e3490 100644 --- a/docs/en/operations/table_engines/external_data.md +++ b/docs/en/operations/table_engines/external_data.md @@ -1,4 +1,3 @@ - # External Data for Query Processing ClickHouse allows sending a server the data that is needed for processing a query, together with a SELECT query. This data is put in a temporary table (see the section "Temporary tables") and can be used in the query (for example, in IN operators). diff --git a/docs/fa/operations/performance/sampling_query_profiler_example_result.txt b/docs/fa/operations/performance/sampling_query_profiler_example_result.txt new file mode 120000 index 00000000000..58c5abe7122 --- /dev/null +++ b/docs/fa/operations/performance/sampling_query_profiler_example_result.txt @@ -0,0 +1 @@ +../../../en/operations/performance/sampling_query_profiler_example_result.txt \ No newline at end of file diff --git a/docs/ja/operations/performance/sampling_query_profiler_example_result.txt b/docs/ja/operations/performance/sampling_query_profiler_example_result.txt new file mode 120000 index 00000000000..58c5abe7122 --- /dev/null +++ b/docs/ja/operations/performance/sampling_query_profiler_example_result.txt @@ -0,0 +1 @@ +../../../en/operations/performance/sampling_query_profiler_example_result.txt \ No newline at end of file diff --git a/docs/ru/operations/performance/sampling_query_profiler_example_result.txt b/docs/ru/operations/performance/sampling_query_profiler_example_result.txt new file mode 120000 index 00000000000..58c5abe7122 --- /dev/null +++ b/docs/ru/operations/performance/sampling_query_profiler_example_result.txt @@ -0,0 +1 @@ +../../../en/operations/performance/sampling_query_profiler_example_result.txt \ No newline at end of file diff --git a/docs/tools/build.py b/docs/tools/build.py index ac135e27690..2193f498234 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -207,10 +207,10 @@ def build_single_page_version(lang, args, cfg): ] }) mkdocs_build.build(cfg) - if not args.version_prefix: # maybe enable in future - test.test_single_page(os.path.join(test_dir, 'single', 'index.html'), lang) if args.save_raw_single_page: shutil.copytree(test_dir, args.save_raw_single_page) + if not args.version_prefix: # maybe enable in future + test.test_single_page(os.path.join(test_dir, 'single', 'index.html'), lang) def write_redirect_html(out_path, to_url): diff --git a/docs/tools/mkdocs-material-theme/partials/social.html b/docs/tools/mkdocs-material-theme/partials/social.html index 86fc636584c..89e6f0fec87 100644 --- a/docs/tools/mkdocs-material-theme/partials/social.html +++ b/docs/tools/mkdocs-material-theme/partials/social.html @@ -1,3 +1,3 @@ diff --git a/docs/tools/test.py b/docs/tools/test.py index f02d8fe0cfb..0efbd9bb6ce 100755 --- a/docs/tools/test.py +++ b/docs/tools/test.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- from __future__ import unicode_literals import logging @@ -33,10 +33,8 @@ def test_single_page(input_path, lang): if duplicate_anchor_points: logging.warning('Found %d duplicate anchor points' % duplicate_anchor_points) - if links_to_nowhere: - logging.error('Found %d links to nowhere' % links_to_nowhere) - sys.exit(10) + assert not links_to_nowhere, 'Found %d links to nowhere' % links_to_nowhere assert len(anchor_points) > 10, 'Html parsing is probably broken' diff --git a/docs/zh/operations/performance/sampling_query_profiler_example_result.txt b/docs/zh/operations/performance/sampling_query_profiler_example_result.txt new file mode 120000 index 00000000000..58c5abe7122 --- /dev/null +++ b/docs/zh/operations/performance/sampling_query_profiler_example_result.txt @@ -0,0 +1 @@ +../../../en/operations/performance/sampling_query_profiler_example_result.txt \ No newline at end of file diff --git a/website/templates/index/community.html b/website/templates/index/community.html index ae933331700..26e6466d228 100644 --- a/website/templates/index/community.html +++ b/website/templates/index/community.html @@ -150,7 +150,7 @@
    -

    Hosting ClickHouse Meetups

    +

    Hosting ClickHouse Meetups

    ClickHouse meetups are essential for strengthening community worldwide, but they couldn't be possible without the help of local organizers. Please, feel this form if you want to become one or want to meet ClickHouse core team for any other reason.

    From 77b26f6a93146d5214de63d5fc22c4a55a41fda9 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 19 Mar 2020 16:00:34 +0300 Subject: [PATCH 105/115] Update PartLog.cpp --- dbms/src/Interpreters/PartLog.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/src/Interpreters/PartLog.cpp b/dbms/src/Interpreters/PartLog.cpp index c91d581449b..c797ad8a8a9 100644 --- a/dbms/src/Interpreters/PartLog.cpp +++ b/dbms/src/Interpreters/PartLog.cpp @@ -93,7 +93,6 @@ void PartLogElement::appendToBlock(Block & block) const columns[i++]->insert(error); columns[i++]->insert(exception); - block.setColumns(std::move(columns)); } From f7c17435b820dfca38fb068f1f4bd7f43c8e9489 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 19 Mar 2020 16:29:59 +0300 Subject: [PATCH 106/115] Fix build --- dbms/src/Processors/Executors/PipelineExecutor.cpp | 2 ++ dbms/src/Processors/RowsBeforeLimitCounter.h | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/dbms/src/Processors/Executors/PipelineExecutor.cpp b/dbms/src/Processors/Executors/PipelineExecutor.cpp index ea7478e3f80..fc33cbbfb3a 100644 --- a/dbms/src/Processors/Executors/PipelineExecutor.cpp +++ b/dbms/src/Processors/Executors/PipelineExecutor.cpp @@ -488,6 +488,8 @@ void PipelineExecutor::execute(size_t num_threads) if (!all_processors_finished) throw Exception("Pipeline stuck. Current state:\n" + dumpPipeline(), ErrorCodes::LOGICAL_ERROR); + +std::cerr << dumpPipeline() << std::endl; } void PipelineExecutor::executeSingleThread(size_t thread_num, size_t num_threads) diff --git a/dbms/src/Processors/RowsBeforeLimitCounter.h b/dbms/src/Processors/RowsBeforeLimitCounter.h index 1408010cec7..25b8a641600 100644 --- a/dbms/src/Processors/RowsBeforeLimitCounter.h +++ b/dbms/src/Processors/RowsBeforeLimitCounter.h @@ -1,3 +1,4 @@ +#pragma once #include #include @@ -15,7 +16,7 @@ public: uint64_t get() const { return rows_before_limit.load(std::memory_order_acquire); } - bool setAppliedLimit() { has_applied_limit.store(true, std::memory_order::release); } + void setAppliedLimit() { has_applied_limit.store(true, std::memory_order::release); } bool hasAppliedLimit() const { return has_applied_limit.load(std::memory_order_acquire); } private: From 100fe4c93cb4281d99c590084a52d61ffe4e2b97 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 19 Mar 2020 16:45:19 +0300 Subject: [PATCH 107/115] Fix build. --- .../Processors/Executors/PipelineExecutor.cpp | 2 - .../TreeExecutorBlockInputStream.cpp | 66 +++++++++++-------- .../Executors/TreeExecutorBlockInputStream.h | 4 +- 3 files changed, 43 insertions(+), 29 deletions(-) diff --git a/dbms/src/Processors/Executors/PipelineExecutor.cpp b/dbms/src/Processors/Executors/PipelineExecutor.cpp index fc33cbbfb3a..ea7478e3f80 100644 --- a/dbms/src/Processors/Executors/PipelineExecutor.cpp +++ b/dbms/src/Processors/Executors/PipelineExecutor.cpp @@ -488,8 +488,6 @@ void PipelineExecutor::execute(size_t num_threads) if (!all_processors_finished) throw Exception("Pipeline stuck. Current state:\n" + dumpPipeline(), ErrorCodes::LOGICAL_ERROR); - -std::cerr << dumpPipeline() << std::endl; } void PipelineExecutor::executeSingleThread(size_t thread_num, size_t num_threads) diff --git a/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.cpp b/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.cpp index d08edeaea80..52a77a6eda5 100644 --- a/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.cpp +++ b/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.cpp @@ -120,6 +120,8 @@ void TreeExecutorBlockInputStream::init() connect(*totals_port, *input_totals_port); input_totals_port->setNeeded(); } + + initRowsBeforeLimit(); } void TreeExecutorBlockInputStream::execute(bool on_totals) @@ -206,42 +208,45 @@ void TreeExecutorBlockInputStream::execute(bool on_totals) } } -void TreeExecutorBlockInputStream::calcRowsBeforeLimit() +void TreeExecutorBlockInputStream::initRowsBeforeLimit() { - std::stack stack; - stack.push(root); + std::vector limits; + std::vector sources; - size_t rows_before_limit = 0; - bool has_limit = false; + struct StackEntry + { + IProcessor * processor; + bool visited_limit; + }; + + std::stack stack; + stack.push({root, false}); while (!stack.empty()) { - auto processor = stack.top(); + auto processor = stack.top().processor; + bool visited_limit = stack.top().visited_limit; stack.pop(); - if (auto * limit = typeid_cast(processor)) + if (!visited_limit) { - has_limit = true; - rows_before_limit += limit->getRowsBeforeLimitAtLeast(); - } - if (auto * source = typeid_cast(processor)) - { - if (auto & stream = source->getStream()) + if (auto * limit = typeid_cast(processor)) { - auto & profile_info = stream->getProfileInfo(); - if (profile_info.hasAppliedLimit()) - { - has_limit = true; - rows_before_limit += profile_info.getRowsBeforeLimit(); - } + visited_limit = true; + limits.emplace_back(limit); } + + if (auto * source = typeid_cast(processor)) + sources.emplace_back(source); } - if (auto * sorting = typeid_cast(processor)) + if (auto * sorting = typeid_cast(processor)) { - rows_before_limit += sorting->getNumReadRows(); - has_limit = true; + if (!rows_before_limit_at_least) + rows_before_limit_at_least = std::make_shared(); + + sorting->setRowsBeforeLimitCounter(rows_before_limit_at_least); /// Don't go to children. Take rows_before_limit from last PartialSortingTransform. continue; @@ -250,12 +255,20 @@ void TreeExecutorBlockInputStream::calcRowsBeforeLimit() for (auto & child_port : processor->getInputs()) { auto * child_processor = &child_port.getOutputPort().getProcessor(); - stack.push(child_processor); + stack.push({child_processor, visited_limit}); } } - if (has_limit) - info.setRowsBeforeLimit(rows_before_limit); + if (!rows_before_limit_at_least && (!limits.empty() && !sources.empty())) + { + rows_before_limit_at_least = std::make_shared(); + + for (auto & limit : limits) + limit->setRowsBeforeLimitCounter(rows_before_limit_at_least); + + for (auto & source : sources) + source->setRowsBeforeLimitCounter(rows_before_limit_at_least); + } } Block TreeExecutorBlockInputStream::readImpl() @@ -271,7 +284,8 @@ Block TreeExecutorBlockInputStream::readImpl() totals = getHeader().cloneWithColumns(input_totals_port->pull().detachColumns()); } - calcRowsBeforeLimit(); + if (rows_before_limit_at_least && rows_before_limit_at_least->hasAppliedLimit()) + info.setRowsBeforeLimit(rows_before_limit_at_least->get()); return {}; } diff --git a/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.h b/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.h index 8170d8fdb50..3ab8dde6948 100644 --- a/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.h +++ b/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include namespace DB { @@ -55,6 +56,7 @@ private: IProcessor * root = nullptr; std::unique_ptr input_port; std::unique_ptr input_totals_port; + RowsBeforeLimitCounterPtr rows_before_limit_at_least; /// Remember sources that support progress. std::vector sources_with_progress; @@ -65,7 +67,7 @@ private: /// Execute tree step-by-step until root returns next chunk or execution is finished. void execute(bool on_totals); - void calcRowsBeforeLimit(); + void initRowsBeforeLimit(); /// Moved from pipe. std::vector> interpreter_context; From db9ad40db5c004ff988d5fae2f450184aafc9ecb Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 19 Mar 2020 16:53:25 +0300 Subject: [PATCH 108/115] Fix build. --- .../Processors/Executors/TreeExecutorBlockInputStream.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.cpp b/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.cpp index 52a77a6eda5..f95425ecf43 100644 --- a/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.cpp +++ b/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.cpp @@ -210,7 +210,7 @@ void TreeExecutorBlockInputStream::execute(bool on_totals) void TreeExecutorBlockInputStream::initRowsBeforeLimit() { - std::vector limits; + std::vector limit_transforms; std::vector sources; struct StackEntry @@ -234,7 +234,7 @@ void TreeExecutorBlockInputStream::initRowsBeforeLimit() if (auto * limit = typeid_cast(processor)) { visited_limit = true; - limits.emplace_back(limit); + limit_transforms.emplace_back(limit); } if (auto * source = typeid_cast(processor)) @@ -259,11 +259,11 @@ void TreeExecutorBlockInputStream::initRowsBeforeLimit() } } - if (!rows_before_limit_at_least && (!limits.empty() && !sources.empty())) + if (!rows_before_limit_at_least && (!limit_transforms.empty() && !sources.empty())) { rows_before_limit_at_least = std::make_shared(); - for (auto & limit : limits) + for (auto & limit : limit_transforms) limit->setRowsBeforeLimitCounter(rows_before_limit_at_least); for (auto & source : sources) From b4a93c092d612d3c89a6ab3c34fe3587e65373dd Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 19 Mar 2020 17:16:49 +0300 Subject: [PATCH 109/115] Fix build. --- dbms/src/Processors/Executors/TreeExecutorBlockInputStream.cpp | 2 +- dbms/src/Processors/QueryPipeline.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.cpp b/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.cpp index f95425ecf43..c07499b28dc 100644 --- a/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.cpp +++ b/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.cpp @@ -259,7 +259,7 @@ void TreeExecutorBlockInputStream::initRowsBeforeLimit() } } - if (!rows_before_limit_at_least && (!limit_transforms.empty() && !sources.empty())) + if (!rows_before_limit_at_least && (!limit_transforms.empty() || !sources.empty())) { rows_before_limit_at_least = std::make_shared(); diff --git a/dbms/src/Processors/QueryPipeline.cpp b/dbms/src/Processors/QueryPipeline.cpp index 6b2ee284c2a..60ea824ad5c 100644 --- a/dbms/src/Processors/QueryPipeline.cpp +++ b/dbms/src/Processors/QueryPipeline.cpp @@ -622,7 +622,7 @@ void QueryPipeline::initRowsBeforeLimit() } } - if (!rows_before_limit_at_least && (!limits.empty() && !sources.empty())) + if (!rows_before_limit_at_least && (!limits.empty() || !sources.empty())) { rows_before_limit_at_least = std::make_shared(); From b6d7cb45dd283569a221072d77e7eccb385cce17 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 19 Mar 2020 17:30:43 +0300 Subject: [PATCH 110/115] Added comment. --- dbms/src/Processors/RowsBeforeLimitCounter.h | 1 + dbms/src/Processors/Transforms/PartialSortingTransform.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Processors/RowsBeforeLimitCounter.h b/dbms/src/Processors/RowsBeforeLimitCounter.h index 25b8a641600..abee5a09405 100644 --- a/dbms/src/Processors/RowsBeforeLimitCounter.h +++ b/dbms/src/Processors/RowsBeforeLimitCounter.h @@ -5,6 +5,7 @@ namespace DB { +/// This class helps to calculate rows_before_limit_at_least. class RowsBeforeLimitCounter { public: diff --git a/dbms/src/Processors/Transforms/PartialSortingTransform.h b/dbms/src/Processors/Transforms/PartialSortingTransform.h index 158b0af202e..47ac90c6904 100644 --- a/dbms/src/Processors/Transforms/PartialSortingTransform.h +++ b/dbms/src/Processors/Transforms/PartialSortingTransform.h @@ -13,7 +13,6 @@ class PartialSortingTransform : public ISimpleTransform { public: /// limit - if not 0, then you can sort each block not completely, but only `limit` first rows by order. - /// When count_rows is false, read_rows is not changed. It is needed PartialSortingTransform( const Block & header_, SortDescription & description_, From 4dda8e11eca05154e9dfe74fc81e6775c0c88291 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Thu, 19 Mar 2020 18:32:53 +0300 Subject: [PATCH 111/115] emironyuk-DOCSUP-782 (#90) (#9725) * docs(string functions): synchronization of ru-en versions * docs(string_functions): links fixed * docs(string functions): mid(s, offset, length), substr(s, offset, length) added to substring(s, offset,length) * docs(string function): fix count form for bytes Co-authored-by: emironyuk Co-authored-by: emironyuk <62014692+emironyuk@users.noreply.github.com> Co-authored-by: emironyuk Co-authored-by: Sergei Shtykov --- docs/en/data_types/fixedstring.md | 2 +- docs/en/operations/table_engines/mergetree.md | 4 +- .../functions/string_functions.md | 75 +++++++---- docs/ru/data_types/fixedstring.md | 2 +- docs/ru/operations/table_engines/mergetree.md | 4 +- .../functions/string_functions.md | 123 +++++++++++------- 6 files changed, 128 insertions(+), 82 deletions(-) diff --git a/docs/en/data_types/fixedstring.md b/docs/en/data_types/fixedstring.md index 088315a4d6f..705dc47cba1 100644 --- a/docs/en/data_types/fixedstring.md +++ b/docs/en/data_types/fixedstring.md @@ -50,6 +50,6 @@ WHERE a = 'b\0' This behaviour differs from MySQL for the `CHAR` type (where strings are padded with spaces, and the spaces are removed for output). -Note that the length of the `FixedString(N)` value is constant. The [length](../query_language/functions/array_functions.md#array_functions-length) function returns `N` even if the `FixedString(N)` value is filled only with null bytes, but the [empty](../query_language/functions/string_functions.md#string_functions-empty) function returns `1` in this case. +Note that the length of the `FixedString(N)` value is constant. The [length](../query_language/functions/array_functions.md#array_functions-length) function returns `N` even if the `FixedString(N)` value is filled only with null bytes, but the [empty](../query_language/functions/string_functions.md#empty) function returns `1` in this case. [Original article](https://clickhouse.tech/docs/en/data_types/fixedstring/) diff --git a/docs/en/operations/table_engines/mergetree.md b/docs/en/operations/table_engines/mergetree.md index e80de9def1c..511d508f513 100644 --- a/docs/en/operations/table_engines/mergetree.md +++ b/docs/en/operations/table_engines/mergetree.md @@ -331,8 +331,8 @@ Function (operator) / Index | primary key | minmax | ngrambf_v1 | tokenbf_v1 | b [notEquals(!=, <>)](../../query_language/functions/comparison_functions.md#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ [like](../../query_language/functions/string_search_functions.md#function-like) | ✔ | ✔ | ✔ | ✗ | ✗ [notLike](../../query_language/functions/string_search_functions.md#function-notlike) | ✔ | ✔ | ✔ | ✗ | ✗ -[startsWith](../../query_language/functions/string_functions.md#function-startswith) | ✔ | ✔ | ✔ | ✔ | ✗ -[endsWith](../../query_language/functions/string_functions.md#function-endswith) | ✗ | ✗ | ✔ | ✔ | ✗ +[startsWith](../../query_language/functions/string_functions.md#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ +[endsWith](../../query_language/functions/string_functions.md#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ [multiSearchAny](../../query_language/functions/string_search_functions.md#function-multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ [in](../../query_language/functions/in_functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ [notIn](../../query_language/functions/in_functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ diff --git a/docs/en/query_language/functions/string_functions.md b/docs/en/query_language/functions/string_functions.md index 3d8beae6800..55b32d33f61 100644 --- a/docs/en/query_language/functions/string_functions.md +++ b/docs/en/query_language/functions/string_functions.md @@ -1,66 +1,66 @@ # Functions for working with strings -## empty {#string_functions-empty} +## empty {#empty} Returns 1 for an empty string or 0 for a non-empty string. The result type is UInt8. A string is considered non-empty if it contains at least one byte, even if this is a space or a null byte. The function also works for arrays. -## notEmpty +## notEmpty {#notempty} Returns 0 for an empty string or 1 for a non-empty string. The result type is UInt8. The function also works for arrays. -## length +## length {#length} Returns the length of a string in bytes (not in characters, and not in code points). The result type is UInt64. The function also works for arrays. -## lengthUTF8 +## lengthUTF8 {#lengthutf8} Returns the length of a string in Unicode code points (not in characters), assuming that the string contains a set of bytes that make up UTF-8 encoded text. If this assumption is not met, it returns some result (it doesn't throw an exception). The result type is UInt64. -## char_length, CHAR_LENGTH +## char_length, CHAR_LENGTH {#char_length} Returns the length of a string in Unicode code points (not in characters), assuming that the string contains a set of bytes that make up UTF-8 encoded text. If this assumption is not met, it returns some result (it doesn't throw an exception). The result type is UInt64. -## character_length, CHARACTER_LENGTH +## character_length, CHARACTER_LENGTH {#character_length} Returns the length of a string in Unicode code points (not in characters), assuming that the string contains a set of bytes that make up UTF-8 encoded text. If this assumption is not met, it returns some result (it doesn't throw an exception). The result type is UInt64. -## lower, lcase +## lower, lcase {#lower} Converts ASCII Latin symbols in a string to lowercase. -## upper, ucase +## upper, ucase {#upper} Converts ASCII Latin symbols in a string to uppercase. -## lowerUTF8 +## lowerUTF8 {#lowerutf8} Converts a string to lowercase, assuming the string contains a set of bytes that make up a UTF-8 encoded text. It doesn't detect the language. So for Turkish the result might not be exactly correct. If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. If the string contains a set of bytes that is not UTF-8, then the behavior is undefined. -## upperUTF8 +## upperUTF8 {#upperutf8} Converts a string to uppercase, assuming the string contains a set of bytes that make up a UTF-8 encoded text. It doesn't detect the language. So for Turkish the result might not be exactly correct. If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. If the string contains a set of bytes that is not UTF-8, then the behavior is undefined. -## isValidUTF8 +## isValidUTF8 {#isvalidutf8} Returns 1, if the set of bytes is valid UTF-8 encoded, otherwise 0. -## toValidUTF8 +## toValidUTF8 {#tovalidutf8} Replaces invalid UTF-8 characters by the `�` (U+FFFD) character. All running in a row invalid characters are collapsed into the one replacement character. @@ -74,7 +74,7 @@ Parameters: Returned value: Valid UTF-8 string. -### Example +**Example** ```sql SELECT toValidUTF8('\x61\xF0\x80\x80\x80b') @@ -122,15 +122,15 @@ Result: └────────────────────────────────┘ ``` -## reverse +## reverse {#reverse} Reverses the string (as a sequence of bytes). -## reverseUTF8 +## reverseUTF8 {#reverseutf8} Reverses a sequence of Unicode code points, assuming that the string contains a set of bytes representing a UTF-8 text. Otherwise, it does something else (it doesn't throw an exception). -## format(pattern, s0, s1, ...) +## format(pattern, s0, s1, ...) {#format} Formatting constant pattern with the string listed in the arguments. `pattern` is a simplified Python format pattern. Format string contains "replacement fields" surrounded by curly braces `{}`. Anything that is not contained in braces is considered literal text, which is copied unchanged to the output. If you need to include a brace character in the literal text, it can be escaped by doubling: `{{ '{{' }}` and `{{ '}}' }}`. Field names can be numbers (starting from zero) or empty (then they are treated as consequence numbers). @@ -193,6 +193,22 @@ Same as [concat](#concat), the difference is that you need to ensure that `conca The function is named "injective" if it always returns different result for different values of arguments. In other words: different arguments never yield identical result. +**Syntax** + +```sql +concatAssumeInjective(s1, s2, ...) +``` + +**Parameters** + +Values of type String or FixedString. + +**Returned values** + +Returns the String that results from concatenating the arguments. + +If any of argument values is `NULL`, `concatAssumeInjective` returns `NULL`. + **Example** Input table: @@ -228,36 +244,39 @@ Result: └────────────────────┴────────────┘ ``` -## substring(s, offset, length), mid(s, offset, length), substr(s, offset, length) +## substring(s, offset, length), mid(s, offset, length), substr(s, offset, length) {#substring} Returns a substring starting with the byte from the 'offset' index that is 'length' bytes long. Character indexing starts from one (as in standard SQL). The 'offset' and 'length' arguments must be constants. -## substringUTF8(s, offset, length) +## substringUTF8(s, offset, length) {#substringutf8} The same as 'substring', but for Unicode code points. Works under the assumption that the string contains a set of bytes representing a UTF-8 encoded text. If this assumption is not met, it returns some result (it doesn't throw an exception). -## appendTrailingCharIfAbsent(s, c) +## appendTrailingCharIfAbsent(s, c) {#appendtrailingcharifabsent} If the 's' string is non-empty and does not contain the 'c' character at the end, it appends the 'c' character to the end. -## convertCharset(s, from, to) +## convertCharset(s, from, to) {#convertcharset} Returns the string 's' that was converted from the encoding in 'from' to the encoding in 'to'. -## base64Encode(s) +## base64Encode(s) {#base64encode} + Encodes 's' string into base64 -## base64Decode(s) +## base64Decode(s) {#base64decode} + Decode base64-encoded string 's' into original string. In case of failure raises an exception. -## tryBase64Decode(s) +## tryBase64Decode(s) {#trybase64decode} + Similar to base64Decode, but in case of error an empty string would be returned. -## endsWith(s, suffix) {#function-endswith} +## endsWith(s, suffix) {#endswith} Returns whether to end with the specified suffix. Returns 1 if the string ends with the specified suffix, otherwise it returns 0. -## startsWith(str, prefix) {#function-startswith} +## startsWith(str, prefix) {#startswith} Returns 1 whether string starts with the specified prefix, otherwise it returns 0. @@ -438,19 +457,19 @@ Result: └─────────────────────────────────────┘ ``` -## CRC32(s) +## CRC32(s) {#crc32} Returns the CRC32 checksum of a string, using CRC-32-IEEE 802.3 polynomial and initial value `0xffffffff` (zlib implementation). The result type is UInt32. -## CRC32IEEE(s) +## CRC32IEEE(s) {#crc32ieee} Returns the CRC32 checksum of a string, using CRC-32-IEEE 802.3 polynomial. The result type is UInt32. -## CRC64(s) +## CRC64(s) {#crc64} Returns the CRC64 checksum of a string, using CRC-64-ECMA polynomial. diff --git a/docs/ru/data_types/fixedstring.md b/docs/ru/data_types/fixedstring.md index 07e45f71cab..66ece301cca 100644 --- a/docs/ru/data_types/fixedstring.md +++ b/docs/ru/data_types/fixedstring.md @@ -51,6 +51,6 @@ WHERE a = 'b\0' Это поведение отличается от поведения MySQL для типа `CHAR`, где строки дополняются пробелами, а пробелы перед выводом вырезаются. -Обратите внимание, что длина значения `FixedString(N)` постоянна. Функция [length](../query_language/functions/array_functions.md#array_functions-length) возвращает `N` даже если значение `FixedString(N)` заполнено только нулевыми байтами, однако функция [empty](../query_language/functions/string_functions.md#string_functions-empty) в этом же случае возвращает `1`. +Обратите внимание, что длина значения `FixedString(N)` постоянна. Функция [length](../query_language/functions/array_functions.md#array_functions-length) возвращает `N` даже если значение `FixedString(N)` заполнено только нулевыми байтами, однако функция [empty](../query_language/functions/string_functions.md#empty) в этом же случае возвращает `1`. [Оригинальная статья](https://clickhouse.tech/docs/ru/data_types/fixedstring/) diff --git a/docs/ru/operations/table_engines/mergetree.md b/docs/ru/operations/table_engines/mergetree.md index 2753156f8a8..a3f617db0b8 100644 --- a/docs/ru/operations/table_engines/mergetree.md +++ b/docs/ru/operations/table_engines/mergetree.md @@ -324,8 +324,8 @@ Function (operator) / Index | primary key | minmax | ngrambf_v1 | tokenbf_v1 | b [notEquals(!=, <>)](../../query_language/functions/comparison_functions.md#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ [like](../../query_language/functions/string_search_functions.md#function-like) | ✔ | ✔ | ✔ | ✗ | ✗ [notLike](../../query_language/functions/string_search_functions.md#function-notlike) | ✔ | ✔ | ✔ | ✔ | ✗ -[startsWith](../../query_language/functions/string_functions.md#function-startswith) | ✔ | ✔ | ✔ | ✔ | ✗ -[endsWith](../../query_language/functions/string_functions.md#function-endswith) | ✗ | ✗ | ✔ | ✔ | ✗ +[startsWith](../../query_language/functions/string_functions.md#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ +[endsWith](../../query_language/functions/string_functions.md#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ [multiSearchAny](../../query_language/functions/string_search_functions.md#function-multisearchany) | ✗ | ✗ | ✔ | ✔ | ✗ [in](../../query_language/functions/in_functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ [notIn](../../query_language/functions/in_functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ diff --git a/docs/ru/query_language/functions/string_functions.md b/docs/ru/query_language/functions/string_functions.md index 106672c9fdc..cf9451112f7 100644 --- a/docs/ru/query_language/functions/string_functions.md +++ b/docs/ru/query_language/functions/string_functions.md @@ -1,47 +1,66 @@ # Функции для работы со строками -## empty {#string_functions-empty} +## empty {#empty} + Возвращает 1 для пустой строки, и 0 для непустой строки. -Тип результата - UInt8. +Тип результата — UInt8. Строка считается непустой, если содержит хотя бы один байт, пусть даже это пробел или нулевой байт. Функция также работает для массивов. -## notEmpty +## notEmpty {#notempty} + Возвращает 0 для пустой строки, и 1 для непустой строки. -Тип результата - UInt8. +Тип результата — UInt8. Функция также работает для массивов. -## length +## length {#length} + Возвращает длину строки в байтах (не символах, не кодовых точках). -Тип результата - UInt64. +Тип результата — UInt64. Функция также работает для массивов. -## lengthUTF8 -Возвращает длину строки в кодовых точках Unicode (не символах), при допущении, что строка содержит набор байт, являющийся текстом в кодировке UTF-8. Если допущение не выполнено - то возвращает какой-нибудь результат (не кидает исключение). -Тип результата - UInt64. +## lengthUTF8 {#lengthutf8} + +Возвращает длину строки в кодовых точках Unicode (не символах), при допущении, что строка содержит набор байтов, являющийся текстом в кодировке UTF-8. Если допущение не выполнено, то возвращает какой-нибудь результат (не кидает исключение). +Тип результата — UInt64. + +## char_length, CHAR_LENGTH {#char_length} + +Возвращает длину строки в кодовых точках Unicode (не символах), при допущении, что строка содержит набор байтов, являющийся текстом в кодировке UTF-8. Если допущение не выполнено, возвращает какой-нибудь результат (не кидает исключение). +Тип результата — UInt64. + +## character_length, CHARACTER_LENGTH {#character_length} + +Возвращает длину строки в кодовых точках Unicode (не символах), при допущении, что строка содержит набор байтов, являющийся текстом в кодировке UTF-8. Если допущение не выполнено, возвращает какой-нибудь результат (не кидает исключение). +Тип результата — UInt64. + +## lower, lcase {#lower} -## lower Переводит ASCII-символы латиницы в строке в нижний регистр. -## upper +## upper, ucase {#upper} + Переводит ASCII-символы латиницы в строке в верхний регистр. -## lowerUTF8 -Переводит строку в нижний регистр, при допущении, что строка содержит набор байт, представляющий текст в кодировке UTF-8. +## lowerUTF8 {#lowerutf8} + +Переводит строку в нижний регистр, при допущении, что строка содержит набор байтов, представляющий текст в кодировке UTF-8. Не учитывает язык. То есть, для турецкого языка, результат может быть не совсем верным. -Если длина UTF-8 последовательности байт различна для верхнего и нижнего регистра кодовой точки, то для этой кодовой точки, результат работы может быть некорректным. -Если строка содержит набор байт, не являющийся UTF-8, то поведение не определено. +Если длина UTF-8 последовательности байтов различна для верхнего и нижнего регистра кодовой точки, то для этой кодовой точки результат работы может быть некорректным. +Если строка содержит набор байтов, не являющийся UTF-8, то поведение не определено. -## upperUTF8 -Переводит строку в верхний регистр, при допущении, что строка содержит набор байт, представляющий текст в кодировке UTF-8. +## upperUTF8 {#upperutf8} + +Переводит строку в верхний регистр, при допущении, что строка содержит набор байтов, представляющий текст в кодировке UTF-8. Не учитывает язык. То есть, для турецкого языка, результат может быть не совсем верным. -Если длина UTF-8 последовательности байт различна для верхнего и нижнего регистра кодовой точки, то для этой кодовой точки, результат работы может быть некорректным. -Если строка содержит набор байт, не являющийся UTF-8, то поведение не определено. +Если длина UTF-8 последовательности байтов различна для верхнего и нижнего регистра кодовой точки, то для этой кодовой точки, результат работы может быть некорректным. +Если строка содержит набор байтов, не являющийся UTF-8, то поведение не определено. -## isValidUTF8 -Возвращает 1, если набор байт является корректным в кодировке UTF-8, 0 иначе. +## isValidUTF8 {#isvalidutf8} -## toValidUTF8 +Возвращает 1, если набор байтов является корректным в кодировке UTF-8, 0 иначе. + +## toValidUTF8 {#tovalidutf8} Заменяет некорректные символы UTF-8 на символ `�` (U+FFFD). Все идущие подряд некорректные символы схлопываются в один заменяющий символ. @@ -55,7 +74,7 @@ toValidUTF8( input_string ) Возвращаемое значение: Корректная строка UTF-8. -### Пример +**Пример** ```sql SELECT toValidUTF8('\x61\xF0\x80\x80\x80b') @@ -103,16 +122,17 @@ SELECT repeat('abc', 10) └────────────────────────────────┘ ``` -## reverse -Разворачивает строку (как последовательность байт). +## reverse {#reverse} -## reverseUTF8 -Разворачивает последовательность кодовых точек Unicode, при допущении, что строка содержит набор байт, представляющий текст в кодировке UTF-8. Иначе - что-то делает (не кидает исключение). +Разворачивает строку (как последовательность байтов). +## reverseUTF8 {#reverseutf8} -## format(pattern, s0, s1, ...) +Разворачивает последовательность кодовых точек Unicode, при допущении, что строка содержит набор байтов, представляющий текст в кодировке UTF-8. Иначе — что-то делает (не кидает исключение). -Форматирует константный шаблон со строками, перечисленными в аргументах. `pattern` -- упрощенная версия шаблона в языке Python. Шаблон содержит "заменяющие поля", которые окружены фигурными скобками `{}`. Всё, что не содержится в скобках, интерпретируется как обычный текст и просто копируется. Если нужно использовать символ фигурной скобки, можно экранировать двойной скобкой `{{ '{{' }}` или `{{ '}}' }}`. Имя полей могут быть числами (нумерация с нуля) или пустыми (тогда они интерпретируются как последовательные числа). +## format(pattern, s0, s1, ...) {#format} + +Форматирует константный шаблон со строками, перечисленными в аргументах. `pattern` — упрощенная версия шаблона в языке Python. Шаблон содержит "заменяющие поля", которые окружены фигурными скобками `{}`. Всё, что не содержится в скобках, интерпретируется как обычный текст и просто копируется. Если нужно использовать символ фигурной скобки, можно экранировать двойной скобкой `{{ '{{' }}` или `{{ '}}' }}`. Имя полей могут быть числами (нумерация с нуля) или пустыми (тогда они интерпретируются как последовательные числа). ```sql SELECT format('{1} {0} {1}', 'World', 'Hello') @@ -131,9 +151,9 @@ SELECT format('{} {}', 'Hello', 'World') └───────────────────────────────────┘ ``` -## concat +## concat {#concat} -Склеивает строки переданные в аргументы в одну строку без разделителей. +Склеивает строки, переданные в аргументы, в одну строку без разделителей. **Cинтаксис** @@ -224,32 +244,39 @@ SELECT concat(key1, key2), sum(value) FROM key_val GROUP BY (key1, key2) └────────────────────┴────────────┘ ``` -## substring(s, offset, length) -Возвращает подстроку, начиная с байта по индексу offset, длины length байт. Индексация символов - начиная с единицы (как в стандартном SQL). Аргументы offset и length должны быть константами. +## substring(s, offset, length), mid(s, offset, length), substr(s, offset, length) {#substring} -## substringUTF8(s, offset, length) -Так же, как substring, но для кодовых точек Unicode. Работает при допущении, что строка содержит набор байт, представляющий текст в кодировке UTF-8. Если допущение не выполнено - то возвращает какой-нибудь результат (не кидает исключение). +Возвращает подстроку, начиная с байта по индексу offset, длины length байт. Индексация символов — начиная с единицы (как в стандартном SQL). Аргументы offset и length должны быть константами. + +## substringUTF8(s, offset, length) {#substringutf8} + +Так же, как substring, но для кодовых точек Unicode. Работает при допущении, что строка содержит набор байтов, представляющий текст в кодировке UTF-8. Если допущение не выполнено, то возвращает какой-нибудь результат (не кидает исключение). + +## appendTrailingCharIfAbsent(s, c) {#appendtrailingcharifabsent} -## appendTrailingCharIfAbsent(s, c) Если строка s непустая и не содержит символ c на конце, то добавляет символ c в конец. -## convertCharset(s, from, to) +## convertCharset(s, from, to) {#convertcharset} + Возвращает сконвертированную из кодировки from в кодировку to строку s. -## base64Encode(s) +## base64Encode(s) {#base64encode} + Производит кодирование строки s в base64-представление. -## base64Decode(s) +## base64Decode(s) {#base64decode} + Декодирует base64-представление s в исходную строку. При невозможности декодирования выбрасывает исключение -## tryBase64Decode(s) +## tryBase64Decode(s) {#trybase64decode} + Функционал аналогичен base64Decode, но при невозможности декодирования возвращает пустую строку. -## endsWith(s, suffix) {#function-endswith} +## endsWith(s, suffix) {#endswith} Возвращает 1, если строка завершается указанным суффиксом, и 0 в противном случае. -## startsWith(str, prefix) {#function-startswith} +## startsWith(str, prefix) {#startswith} Возвращает 1, если строка начинается указанным префиксом, в противном случае 0. @@ -430,22 +457,22 @@ SELECT trimBoth(' Hello, world! ') └─────────────────────────────────────┘ ``` -## CRC32(s) +## CRC32(s) {#crc32} Возвращает чексумму CRC32 данной строки, используется CRC-32-IEEE 802.3 многочлен и начальным значением `0xffffffff` (т.к. используется реализация из zlib). -Тип результата - UInt32. +Тип результата — UInt32. -## CRC32IEEE(s) +## CRC32IEEE(s) {#crc32ieee} Возвращает чексумму CRC32 данной строки, используется CRC-32-IEEE 802.3 многочлен. -Тип результата - UInt32. +Тип результата — UInt32. -## CRC64(s) +## CRC64(s) {#crc64} Возвращает чексумму CRC64 данной строки, используется CRC-64-ECMA многочлен. -Тип результата - UInt64. +Тип результата — UInt64. [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/string_functions/) From f2dca656f9307f90c17384d75dec440964f2d955 Mon Sep 17 00:00:00 2001 From: Pavel Kovalenko Date: Thu, 19 Mar 2020 19:37:55 +0300 Subject: [PATCH 112/115] MergeTree full support for S3 (#9646) * IMergeDataPart full S3 support. * MergeTreeData full S3 support. * Compilation fixes. * Mutations and merges S3 support. * Fixed removing data part. * MergeTree for S3 integration tests and fixes. * Code style issues. * Enable AWS logging. * Fixed hardlink creation for DiskLocal. * Fixed localBackup.cpp compilation. * Fixed attaching partition. * Get rid of extra methods in IDisk. * Fixed storage config reloading. * More tests with table manipulations. * Remove unused error codes. * Move localBackup to MergeTree folder. * Minor fixes. --- dbms/src/Disks/DiskLocal.cpp | 29 +- dbms/src/Disks/DiskLocal.h | 8 + dbms/src/Disks/DiskMemory.cpp | 17 ++ dbms/src/Disks/DiskMemory.h | 8 +- dbms/src/Disks/DiskS3.cpp | 34 ++- dbms/src/Disks/DiskS3.h | 8 +- dbms/src/Disks/IDisk.cpp | 37 +++ dbms/src/Disks/IDisk.h | 19 ++ dbms/src/Interpreters/Context.cpp | 13 +- .../Storages/MergeTree/IMergeTreeDataPart.cpp | 63 ++-- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 283 ++++++++---------- dbms/src/Storages/MergeTree/MergeTreeData.h | 16 +- .../MergeTree/MergeTreeDataMergerMutator.cpp | 52 ++-- .../Storages/MergeTree/MergeTreeSettings.h | 1 - dbms/src/Storages/MergeTree/checkDataPart.cpp | 1 - .../MergeTree}/localBackup.cpp | 37 +-- .../MergeTree}/localBackup.h | 6 +- dbms/src/Storages/StorageMergeTree.cpp | 5 +- .../config.d/bg_processing_pool_conf.xml | 5 + .../configs/config.d/storage_conf.xml | 28 ++ .../configs/config.d/users.xml | 6 + .../test_merge_tree_s3/configs/config.xml | 20 -- .../test_merge_tree_s3/configs/users.xml | 23 -- .../integration/test_merge_tree_s3/test.py | 211 +++++++++++-- .../convert-month-partitioned-parts/main.cpp | 12 +- 25 files changed, 599 insertions(+), 343 deletions(-) rename dbms/src/{Common => Storages/MergeTree}/localBackup.cpp (51%) rename dbms/src/{Common => Storages/MergeTree}/localBackup.h (80%) create mode 100644 dbms/tests/integration/test_merge_tree_s3/configs/config.d/bg_processing_pool_conf.xml create mode 100644 dbms/tests/integration/test_merge_tree_s3/configs/config.d/storage_conf.xml create mode 100644 dbms/tests/integration/test_merge_tree_s3/configs/config.d/users.xml delete mode 100644 dbms/tests/integration/test_merge_tree_s3/configs/users.xml diff --git a/dbms/src/Disks/DiskLocal.cpp b/dbms/src/Disks/DiskLocal.cpp index 418ce966955..a1c2641e2f3 100644 --- a/dbms/src/Disks/DiskLocal.cpp +++ b/dbms/src/Disks/DiskLocal.cpp @@ -1,4 +1,5 @@ #include "DiskLocal.h" +#include #include "DiskFactory.h" #include @@ -11,7 +12,6 @@ namespace DB { - namespace ErrorCodes { extern const int UNKNOWN_ELEMENT_IN_CONFIG; @@ -254,6 +254,33 @@ Poco::Timestamp DiskLocal::getLastModified(const String & path) return Poco::File(disk_path + path).getLastModified(); } +void DiskLocal::createHardLink(const String & src_path, const String & dst_path) +{ + DB::createHardLink(disk_path + src_path, disk_path + dst_path); +} + +void DiskLocal::createFile(const String & path) +{ + Poco::File(disk_path + path).createFile(); +} + +void DiskLocal::setReadOnly(const String & path) +{ + Poco::File(disk_path + path).setReadOnly(true); +} + +bool inline isSameDiskType(const IDisk & one, const IDisk & another) +{ + return typeid(one) == typeid(another); +} + +void DiskLocal::copy(const String & from_path, const std::shared_ptr & to_disk, const String & to_path) +{ + if (isSameDiskType(*this, *to_disk)) + Poco::File(disk_path + from_path).copyTo(to_disk->getPath() + to_path); /// Use more optimal way. + else + IDisk::copy(from_path, to_disk, to_path); /// Copy files through buffers. +} void DiskLocalReservation::update(UInt64 new_size) { diff --git a/dbms/src/Disks/DiskLocal.h b/dbms/src/Disks/DiskLocal.h index 77c86fa1f3e..61a3994b655 100644 --- a/dbms/src/Disks/DiskLocal.h +++ b/dbms/src/Disks/DiskLocal.h @@ -61,12 +61,16 @@ public: DiskDirectoryIteratorPtr iterateDirectory(const String & path) override; + void createFile(const String & path) override; + void moveFile(const String & from_path, const String & to_path) override; void replaceFile(const String & from_path, const String & to_path) override; void copyFile(const String & from_path, const String & to_path) override; + void copy(const String & from_path, const std::shared_ptr & to_disk, const String & to_path) override; + void listFiles(const String & path, std::vector & file_names) override; std::unique_ptr readFile( @@ -91,6 +95,10 @@ public: Poco::Timestamp getLastModified(const String & path) override; + void setReadOnly(const String & path) override; + + void createHardLink(const String & src_path, const String & dst_path) override; + private: bool tryReserve(UInt64 bytes); diff --git a/dbms/src/Disks/DiskMemory.cpp b/dbms/src/Disks/DiskMemory.cpp index 15b2b2152b1..6ae2af63485 100644 --- a/dbms/src/Disks/DiskMemory.cpp +++ b/dbms/src/Disks/DiskMemory.cpp @@ -386,10 +386,27 @@ void DiskMemory::removeRecursive(const String & path) void DiskMemory::listFiles(const String & path, std::vector & file_names) { + std::lock_guard lock(mutex); + for (auto it = iterateDirectory(path); it->isValid(); it->next()) file_names.push_back(it->name()); } +void DiskMemory::createHardLink(const String &, const String &) +{ + throw Exception("Method createHardLink is not implemented for memory disks", ErrorCodes::NOT_IMPLEMENTED); +} + +void DiskMemory::createFile(const String &) +{ + throw Exception("Method createFile is not implemented for memory disks", ErrorCodes::NOT_IMPLEMENTED); +} + +void DiskMemory::setReadOnly(const String &) +{ + throw Exception("Method setReadOnly is not implemented for memory disks", ErrorCodes::NOT_IMPLEMENTED); +} + using DiskMemoryPtr = std::shared_ptr; diff --git a/dbms/src/Disks/DiskMemory.h b/dbms/src/Disks/DiskMemory.h index 8ddb5307c41..b0c1d30c61d 100644 --- a/dbms/src/Disks/DiskMemory.h +++ b/dbms/src/Disks/DiskMemory.h @@ -54,6 +54,8 @@ public: DiskDirectoryIteratorPtr iterateDirectory(const String & path) override; + void createFile(const String & path) override; + void moveFile(const String & from_path, const String & to_path) override; void replaceFile(const String & from_path, const String & to_path) override; @@ -80,10 +82,14 @@ public: void removeRecursive(const String & path) override; - void setLastModified(const String &, const Poco::Timestamp &) override { } + void setLastModified(const String &, const Poco::Timestamp &) override {} Poco::Timestamp getLastModified(const String &) override { return Poco::Timestamp(); } + void setReadOnly(const String & path) override; + + void createHardLink(const String & src_path, const String & dst_path) override; + private: void createDirectoriesImpl(const String & path); void replaceFileImpl(const String & from_path, const String & to_path); diff --git a/dbms/src/Disks/DiskS3.cpp b/dbms/src/Disks/DiskS3.cpp index d3712631a58..8bd5c0f074d 100644 --- a/dbms/src/Disks/DiskS3.cpp +++ b/dbms/src/Disks/DiskS3.cpp @@ -303,7 +303,12 @@ namespace finalized = true; } - void sync() override { metadata.save(true); } + void sync() override + { + if (finalized) + metadata.save(true); + } + std::string getFileName() const override { return metadata.metadata_file_path; } private: @@ -480,14 +485,12 @@ void DiskS3::copyFile(const String & from_path, const String & to_path) Metadata from(metadata_path + from_path); Metadata to(metadata_path + to_path, true); - for (UInt32 i = 0; i < from.s3_objects_count; ++i) + for (const auto & [path, size] : from.s3_objects) { - auto path = from.s3_objects[i].first; - auto size = from.s3_objects[i].second; auto new_path = s3_root_path + getRandomName(); Aws::S3::Model::CopyObjectRequest req; + req.SetCopySource(bucket + "/" + path); req.SetBucket(bucket); - req.SetCopySource(path); req.SetKey(new_path); throwIfError(client->CopyObject(req)); @@ -621,6 +624,27 @@ Poco::Timestamp DiskS3::getLastModified(const String & path) return Poco::File(metadata_path + path).getLastModified(); } +void DiskS3::createHardLink(const String & src_path, const String & dst_path) +{ + /** + * TODO: Replace with optimal implementation: + * Store links into a list in metadata file. + * Hardlink creation is adding new link to list and just metadata file copy. + */ + copyFile(src_path, dst_path); +} + +void DiskS3::createFile(const String & path) +{ + /// Create empty metadata file. + Metadata metadata(metadata_path + path, true); + metadata.save(); +} + +void DiskS3::setReadOnly(const String & path) +{ + Poco::File(metadata_path + path).setReadOnly(true); +} DiskS3Reservation::~DiskS3Reservation() { diff --git a/dbms/src/Disks/DiskS3.h b/dbms/src/Disks/DiskS3.h index 10c7f015f77..1b61ed1cde3 100644 --- a/dbms/src/Disks/DiskS3.h +++ b/dbms/src/Disks/DiskS3.h @@ -31,7 +31,7 @@ public: const String & getName() const override { return name; } - const String & getPath() const override { return s3_root_path; } + const String & getPath() const override { return metadata_path; } ReservationPtr reserve(UInt64 bytes) override; @@ -87,10 +87,16 @@ public: void removeRecursive(const String & path) override; + void createHardLink(const String & src_path, const String & dst_path) override; + void setLastModified(const String & path, const Poco::Timestamp & timestamp) override; Poco::Timestamp getLastModified(const String & path) override; + void createFile(const String & path) override; + + void setReadOnly(const String & path) override; + private: bool tryReserve(UInt64 bytes); diff --git a/dbms/src/Disks/IDisk.cpp b/dbms/src/Disks/IDisk.cpp index 48b080e1704..36ab2a49573 100644 --- a/dbms/src/Disks/IDisk.cpp +++ b/dbms/src/Disks/IDisk.cpp @@ -1,4 +1,9 @@ #include "IDisk.h" +#include +#include +#include +#include +#include namespace DB { @@ -7,4 +12,36 @@ bool IDisk::isDirectoryEmpty(const String & path) { return !iterateDirectory(path)->isValid(); } + +void copyFile(IDisk & from_disk, const String & from_path, IDisk & to_disk, const String & to_path) +{ + LOG_DEBUG( + &Poco::Logger::get("IDisk"), + "Copying from " << from_disk.getName() << " " << from_path << " to " << to_disk.getName() << " " << to_path); + + auto in = from_disk.readFile(from_path); + auto out = to_disk.writeFile(to_path); + copyData(*in, *out); +} + +void IDisk::copy(const String & from_path, const std::shared_ptr & to_disk, const String & to_path) +{ + if (isFile(from_path)) + { + DB::copyFile(*this, from_path, *to_disk, to_path + fileName(from_path)); + } + else + { + Poco::Path path(from_path); + const String & dir_name = path.directory(path.depth() - 1); + const String dest = to_path + dir_name + "/"; + to_disk->createDirectories(dest); + + for (auto it = iterateDirectory(from_path); it->isValid(); it->next()) + { + copy(it->path(), to_disk, dest); + } + } +} + } diff --git a/dbms/src/Disks/IDisk.h b/dbms/src/Disks/IDisk.h index 877c6f84706..7d0b429720e 100644 --- a/dbms/src/Disks/IDisk.h +++ b/dbms/src/Disks/IDisk.h @@ -111,6 +111,9 @@ public: /// Return `true` if the specified directory is empty. bool isDirectoryEmpty(const String & path); + /// Create empty file at `path`. + virtual void createFile(const String & path) = 0; + /// Move the file from `from_path` to `to_path`. /// If a file with `to_path` path already exists, an exception will be thrown . virtual void moveFile(const String & from_path, const String & to_path) = 0; @@ -122,6 +125,9 @@ public: /// Copy the file from `from_path` to `to_path`. virtual void copyFile(const String & from_path, const String & to_path) = 0; + /// Recursively copy data containing at `from_path` to `to_path` located at `to_disk`. + virtual void copy(const String & from_path, const std::shared_ptr & to_disk, const String & to_path); + /// List files at `path` and add their names to `file_names` virtual void listFiles(const String & path, std::vector & file_names) = 0; @@ -147,11 +153,24 @@ public: /// Remove file or directory with all children. Use with extra caution. Throws exception if file doesn't exists. virtual void removeRecursive(const String & path) = 0; + /// Remove file or directory if it exists. + void removeIfExists(const String & path) + { + if (exists(path)) + remove(path); + } + /// Set last modified time to file or directory at `path`. virtual void setLastModified(const String & path, const Poco::Timestamp & timestamp) = 0; /// Get last modified time of file or directory at `path`. virtual Poco::Timestamp getLastModified(const String & path) = 0; + + /// Set file at `path` as read-only. + virtual void setReadOnly(const String & path) = 0; + + /// Create hardlink from `src_path` to `dst_path`. + virtual void createHardLink(const String & src_path, const String & dst_path) = 0; }; using DiskPtr = std::shared_ptr; diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 74340027dec..ab9b4a2c31b 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1321,7 +1321,18 @@ BackgroundProcessingPool & Context::getBackgroundPool() { auto lock = getLock(); if (!shared->background_pool) - shared->background_pool.emplace(settings.background_pool_size); + { + BackgroundProcessingPool::PoolSettings pool_settings; + auto & config = getConfigRef(); + pool_settings.thread_sleep_seconds = config.getDouble("background_processing_pool_thread_sleep_seconds", 10); + pool_settings.thread_sleep_seconds_random_part = config.getDouble("background_processing_pool_thread_sleep_seconds_random_part", 1.0); + pool_settings.thread_sleep_seconds_if_nothing_to_do = config.getDouble("background_processing_pool_thread_sleep_seconds_if_nothing_to_do", 0.1); + pool_settings.task_sleep_seconds_when_no_work_min = config.getDouble("background_processing_pool_task_sleep_seconds_when_no_work_min", 10); + pool_settings.task_sleep_seconds_when_no_work_max = config.getDouble("background_processing_pool_task_sleep_seconds_when_no_work_max", 600); + pool_settings.task_sleep_seconds_when_no_work_multiplier = config.getDouble("background_processing_pool_task_sleep_seconds_when_no_work_multiplier", 1.1); + pool_settings.task_sleep_seconds_when_no_work_random_part = config.getDouble("background_processing_pool_task_sleep_seconds_when_no_work_random_part", 1.0); + shared->background_pool.emplace(settings.background_pool_size, pool_settings); + } return *shared->background_pool; } diff --git a/dbms/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/IMergeTreeDataPart.cpp index b5c8f16b7e5..dedda5b5159 100644 --- a/dbms/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/dbms/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -3,16 +3,13 @@ #include #include #include -#include #include #include #include #include -#include -#include +#include #include #include -#include #include #include @@ -30,7 +27,6 @@ namespace ErrorCodes extern const int NOT_FOUND_EXPECTED_DATA_PART; extern const int BAD_SIZE_OF_FILE_IN_DATA_PART; extern const int BAD_TTL_FILE; - extern const int CANNOT_UNLINK; extern const int NOT_IMPLEMENTED; } @@ -251,7 +247,7 @@ void IMergeTreeDataPart::removeIfNeeded() if (is_temp) { - String file_name = Poco::Path(relative_path).getFileName(); + String file_name = fileName(relative_path); if (file_name.empty()) throw Exception("relative_path " + relative_path + " of part " + name + " is invalid or not set", ErrorCodes::LOGICAL_ERROR); @@ -699,33 +695,33 @@ void IMergeTreeDataPart::remove() const * And a race condition can happen that will lead to "File not found" error here. */ - String from_ = storage.relative_data_path + relative_path; - String to_ = storage.relative_data_path + "delete_tmp_" + name; + String from = storage.relative_data_path + relative_path; + String to = storage.relative_data_path + "delete_tmp_" + name; // TODO directory delete_tmp_ is never removed if server crashes before returning from this function - if (disk->exists(to_)) + if (disk->exists(to)) { - LOG_WARNING(storage.log, "Directory " << fullPath(disk, to_) << " (to which part must be renamed before removing) already exists." + LOG_WARNING(storage.log, "Directory " << fullPath(disk, to) << " (to which part must be renamed before removing) already exists." " Most likely this is due to unclean restart. Removing it."); try { - disk->removeRecursive(to_); + disk->removeRecursive(to); } catch (...) { - LOG_ERROR(storage.log, "Cannot recursively remove directory " << fullPath(disk, to_) << ". Exception: " << getCurrentExceptionMessage(false)); + LOG_ERROR(storage.log, "Cannot recursively remove directory " << fullPath(disk, to) << ". Exception: " << getCurrentExceptionMessage(false)); throw; } } try { - disk->moveFile(from_, to_); + disk->moveFile(from, to); } catch (const Poco::FileNotFoundException &) { - LOG_ERROR(storage.log, "Directory " << fullPath(disk, to_) << " (part to remove) doesn't exist or one of nested files has gone." + LOG_ERROR(storage.log, "Directory " << fullPath(disk, to) << " (part to remove) doesn't exist or one of nested files has gone." " Most likely this is due to manual removing. This should be discouraged. Ignoring."); return; @@ -741,37 +737,25 @@ void IMergeTreeDataPart::remove() const #endif std::shared_lock lock(columns_lock); - /// TODO: IDisk doesn't support `unlink()` and `rmdir()` functionality. - auto to = fullPath(disk, to_); - for (const auto & [file, _] : checksums.files) - { - String path_to_remove = to + "/" + file; - if (0 != unlink(path_to_remove.c_str())) - throwFromErrnoWithPath("Cannot unlink file " + path_to_remove, path_to_remove, ErrorCodes::CANNOT_UNLINK); - } + disk->remove(to + "/" + file); #if !__clang__ # pragma GCC diagnostic pop #endif for (const auto & file : {"checksums.txt", "columns.txt"}) - { - String path_to_remove = to + "/" + file; - if (0 != unlink(path_to_remove.c_str())) - throwFromErrnoWithPath("Cannot unlink file " + path_to_remove, path_to_remove, ErrorCodes::CANNOT_UNLINK); - } + disk->remove(to + "/" + file); - if (0 != rmdir(to.c_str())) - throwFromErrnoWithPath("Cannot rmdir file " + to, to, ErrorCodes::CANNOT_UNLINK); + disk->remove(to); } catch (...) { /// Recursive directory removal does many excessive "stat" syscalls under the hood. - LOG_ERROR(storage.log, "Cannot quickly remove directory " << fullPath(disk, to_) << " by removing files; fallback to recursive removal. Reason: " + LOG_ERROR(storage.log, "Cannot quickly remove directory " << fullPath(disk, to) << " by removing files; fallback to recursive removal. Reason: " << getCurrentExceptionMessage(false)); - disk->removeRecursive(to_ + "/"); + disk->removeRecursive(to + "/"); } } @@ -791,7 +775,7 @@ String IMergeTreeDataPart::getRelativePathForDetachedPart(const String & prefix) { res = "detached/" + (prefix.empty() ? "" : prefix + "_") + name + (try_no ? "_try" + DB::toString(try_no) : ""); - if (!Poco::File(storage.getFullPathOnDisk(disk) + res).exists()) + if (!disk->exists(getFullRelativePath() + res)) return res; LOG_WARNING(storage.log, "Directory " << res << " (to detach to) already exists." @@ -812,10 +796,8 @@ void IMergeTreeDataPart::makeCloneInDetached(const String & prefix) const assertOnDisk(); LOG_INFO(storage.log, "Detaching " << relative_path); - Poco::Path src(getFullPath()); - Poco::Path dst(storage.getFullPathOnDisk(disk) + getRelativePathForDetachedPart(prefix)); /// Backup is not recursive (max_level is 0), so do not copy inner directories - localBackup(src, dst, 0); + localBackup(disk, getFullRelativePath(), storage.relative_data_path + getRelativePathForDetachedPart(prefix), 0); } void IMergeTreeDataPart::makeCloneOnDiskDetached(const ReservationPtr & reservation) const @@ -825,14 +807,13 @@ void IMergeTreeDataPart::makeCloneOnDiskDetached(const ReservationPtr & reservat if (reserved_disk->getName() == disk->getName()) throw Exception("Can not clone data part " + name + " to same disk " + disk->getName(), ErrorCodes::LOGICAL_ERROR); - String path_to_clone = storage.getFullPathOnDisk(reserved_disk) + "detached/"; + String path_to_clone = storage.relative_data_path + "detached/"; - if (Poco::File(path_to_clone + relative_path).exists()) - throw Exception("Path " + path_to_clone + relative_path + " already exists. Can not clone ", ErrorCodes::DIRECTORY_ALREADY_EXISTS); - Poco::File(path_to_clone).createDirectory(); + if (reserved_disk->exists(path_to_clone + relative_path)) + throw Exception("Path " + fullPath(reserved_disk, path_to_clone + relative_path) + " already exists. Can not clone ", ErrorCodes::DIRECTORY_ALREADY_EXISTS); + reserved_disk->createDirectory(path_to_clone); - Poco::File cloning_directory(getFullPath()); - cloning_directory.copyTo(path_to_clone); + disk->copy(getFullRelativePath(), reserved_disk, path_to_clone); } void IMergeTreeDataPart::checkConsistencyBase() const diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index adb106205de..2a22ca2dbd8 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -1,50 +1,49 @@ -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include #include +#include +#include #include #include #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include +#include +#include #include #include -#include -#include #include -#include -#include #include @@ -859,7 +858,6 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) const auto settings = getSettings(); std::vector> part_names_with_disks; Strings part_file_names; - Poco::DirectoryIterator end; auto disks = getStoragePolicy()->getDisks(); @@ -1318,7 +1316,7 @@ void MergeTreeData::rename( for (const auto & disk : disks) { - auto new_table_path_parent = Poco::Path(new_table_path).makeParent().toString(); + auto new_table_path_parent = parentPath(new_table_path); disk->createDirectory(new_table_path_parent); disk->moveDirectory(relative_data_path, new_table_path); } @@ -1713,8 +1711,8 @@ void MergeTreeData::alterDataPart( size_t num_files_to_modify = transaction->rename_map.size(); size_t num_files_to_remove = 0; - for (const auto & from_to : transaction->rename_map) - if (from_to.second.empty()) + for (const auto & [from, to] : transaction->rename_map) + if (to.empty()) ++num_files_to_remove; if (!skip_sanity_checks @@ -1732,18 +1730,18 @@ void MergeTreeData::alterDataPart( << ") files ("; bool first = true; - for (const auto & from_to : transaction->rename_map) + for (const auto & [from, to] : transaction->rename_map) { if (!first) exception_message << ", "; if (forbidden_because_of_modify) { - exception_message << "from " << backQuote(from_to.first) << " to " << backQuote(from_to.second); + exception_message << "from " << backQuote(from) << " to " << backQuote(to); first = false; } - else if (from_to.second.empty()) + else if (to.empty()) { - exception_message << backQuote(from_to.first); + exception_message << backQuote(from); first = false; } } @@ -1813,28 +1811,28 @@ void MergeTreeData::alterDataPart( /// Update the checksums. DataPart::Checksums new_checksums = part->checksums; - for (const auto & it : transaction->rename_map) + for (const auto & [from, to] : transaction->rename_map) { - if (it.second.empty()) - new_checksums.files.erase(it.first); + if (to.empty()) + new_checksums.files.erase(from); else - new_checksums.files[it.second] = add_checksums.files[it.first]; + new_checksums.files[to] = add_checksums.files[from]; } /// Write the checksums to the temporary file. if (!part->checksums.empty()) { transaction->new_checksums = new_checksums; - WriteBufferFromFile checksums_file(part->getFullPath() + "checksums.txt.tmp", 4096); - new_checksums.write(checksums_file); + auto checksums_file = part->disk->writeFile(part->getFullRelativePath() + "checksums.txt.tmp", 4096); + new_checksums.write(*checksums_file); transaction->rename_map["checksums.txt.tmp"] = "checksums.txt"; } /// Write the new column list to the temporary file. { transaction->new_columns = new_columns.filter(part->getColumns().getNames()); - WriteBufferFromFile columns_file(part->getFullPath() + "columns.txt.tmp", 4096); - transaction->new_columns.writeText(columns_file); + auto columns_file = part->disk->writeFile(part->getFullRelativePath() + "columns.txt.tmp", 4096); + transaction->new_columns.writeText(*columns_file); transaction->rename_map["columns.txt.tmp"] = "columns.txt"; } } @@ -1863,16 +1861,16 @@ void MergeTreeData::changeSettings( for (const String & disk_name : all_diff_disk_names) { - const auto & path = getFullPathOnDisk(new_storage_policy->getDiskByName(disk_name)); - if (Poco::File(path).exists()) + auto disk = new_storage_policy->getDiskByName(disk_name); + if (disk->exists(relative_data_path)) throw Exception("New storage policy contain disks which already contain data of a table with the same name", ErrorCodes::LOGICAL_ERROR); } for (const String & disk_name : all_diff_disk_names) { - const auto & path = getFullPathOnDisk(new_storage_policy->getDiskByName(disk_name)); - Poco::File(path).createDirectories(); - Poco::File(path + "detached").createDirectory(); + auto disk = new_storage_policy->getDiskByName(disk_name); + disk->createDirectories(relative_data_path); + disk->createDirectories(relative_data_path + "detached"); } /// FIXME how would that be done while reloading configuration??? } @@ -1939,7 +1937,8 @@ void MergeTreeData::AlterDataPartTransaction::commit() { std::unique_lock lock(data_part->columns_lock); - String path = data_part->getFullPath(); + auto disk = data_part->disk; + String path = data_part->getFullRelativePath(); /// NOTE: checking that a file exists before renaming or deleting it /// is justified by the fact that, when converting an ordinary column @@ -1947,19 +1946,18 @@ void MergeTreeData::AlterDataPartTransaction::commit() /// before, i.e. they do not have older versions. /// 1) Rename the old files. - for (const auto & from_to : rename_map) + for (const auto & [from, to] : rename_map) { - String name = from_to.second.empty() ? from_to.first : from_to.second; - Poco::File file{path + name}; - if (file.exists()) - file.renameTo(path + name + ".tmp2"); + String name = to.empty() ? from : to; + if (disk->exists(path + name)) + disk->moveFile(path + name, path + name + ".tmp2"); } /// 2) Move new files in the place of old and update the metadata in memory. - for (const auto & from_to : rename_map) + for (const auto & [from, to] : rename_map) { - if (!from_to.second.empty()) - Poco::File{path + from_to.first}.renameTo(path + from_to.second); + if (!to.empty()) + disk->moveFile(path + from, path + to); } auto & mutable_part = const_cast(*data_part); @@ -1967,12 +1965,10 @@ void MergeTreeData::AlterDataPartTransaction::commit() mutable_part.setColumns(new_columns); /// 3) Delete the old files and drop required columns (DROP COLUMN) - for (const auto & from_to : rename_map) + for (const auto & [from, to] : rename_map) { - String name = from_to.second.empty() ? from_to.first : from_to.second; - Poco::File file{path + name + ".tmp2"}; - if (file.exists()) - file.remove(); + String name = to.empty() ? from : to; + disk->removeIfExists(path + name + ".tmp2"); } mutable_part.bytes_on_disk = new_checksums.getTotalSizeOnDisk(); @@ -2002,20 +1998,18 @@ MergeTreeData::AlterDataPartTransaction::~AlterDataPartTransaction() { LOG_WARNING(data_part->storage.log, "Aborting ALTER of part " << data_part->relative_path); - String path = data_part->getFullPath(); - for (const auto & from_to : rename_map) + String path = data_part->getFullRelativePath(); + for (const auto & [from, to] : rename_map) { - if (!from_to.second.empty()) + if (!to.empty()) { try { - Poco::File file(path + from_to.first); - if (file.exists()) - file.remove(); + data_part->disk->removeIfExists(path + from); } catch (Poco::Exception & e) { - LOG_WARNING(data_part->storage.log, "Can't remove " << path + from_to.first << ": " << e.displayText()); + LOG_WARNING(data_part->storage.log, "Can't remove " << fullPath(data_part->disk, path + from) << ": " << e.displayText()); } } } @@ -2029,14 +2023,13 @@ MergeTreeData::AlterDataPartTransaction::~AlterDataPartTransaction() void MergeTreeData::PartsTemporaryRename::addPart(const String & old_name, const String & new_name) { old_and_new_names.push_back({old_name, new_name}); - const auto paths = storage.getDataPaths(); - for (const auto & full_path : paths) + for (const auto & [path, disk] : storage.getRelativeDataPathsWithDisks()) { - for (Poco::DirectoryIterator it = Poco::DirectoryIterator(full_path + source_dir); it != Poco::DirectoryIterator(); ++it) + for (auto it = disk->iterateDirectory(path + source_dir); it->isValid(); it->next()) { - if (it.name() == old_name) + if (it->name() == old_name) { - old_part_name_to_full_path[old_name] = full_path; + old_part_name_to_path_and_disk[old_name] = {path, disk}; break; } } @@ -2050,11 +2043,12 @@ void MergeTreeData::PartsTemporaryRename::tryRenameAll() { try { - const auto & names = old_and_new_names[i]; - if (names.first.empty() || names.second.empty()) + const auto & [old_name, new_name] = old_and_new_names[i]; + if (old_name.empty() || new_name.empty()) throw DB::Exception("Empty part name. Most likely it's a bug.", ErrorCodes::INCORRECT_FILE_NAME); - const auto full_path = old_part_name_to_full_path[names.first] + source_dir; /// old_name - Poco::File(full_path + names.first).renameTo(full_path + names.second); + const auto & [path, disk] = old_part_name_to_path_and_disk[old_name]; + const auto full_path = path + source_dir; /// for old_name + disk->moveFile(full_path + old_name, full_path + new_name); } catch (...) { @@ -2070,15 +2064,16 @@ MergeTreeData::PartsTemporaryRename::~PartsTemporaryRename() // TODO what if server had crashed before this destructor was called? if (!renamed) return; - for (const auto & names : old_and_new_names) + for (const auto & [old_name, new_name] : old_and_new_names) { - if (names.first.empty()) + if (old_name.empty()) continue; try { - const auto full_path = old_part_name_to_full_path[names.first] + source_dir; /// old_name - Poco::File(full_path + names.second).renameTo(full_path + names.first); + const auto & [path, disk] = old_part_name_to_path_and_disk[old_name]; + const auto full_path = path + source_dir; /// for old_name + disk->moveFile(full_path + new_name, full_path + old_name); } catch (...) { @@ -2690,14 +2685,15 @@ void MergeTreeData::swapActivePart(MergeTreeData::DataPartPtr part_copy) auto part_it = data_parts_indexes.insert(part_copy).first; modifyPartState(part_it, DataPartState::Committed); - Poco::Path marker_path(Poco::Path(original_active_part->getFullPath()), DELETE_ON_DESTROY_MARKER_PATH); + auto disk = original_active_part->disk; + String marker_path = original_active_part->getFullRelativePath() + DELETE_ON_DESTROY_MARKER_PATH; try { - Poco::File(marker_path).createFile(); + disk->createFile(marker_path); } catch (Poco::Exception & e) { - LOG_ERROR(log, e.what() << " (while creating DeleteOnDestroy marker: " + backQuote(marker_path.toString()) + ")"); + LOG_ERROR(log, e.what() << " (while creating DeleteOnDestroy marker: " + backQuote(fullPath(disk, marker_path)) + ")"); } return; } @@ -2754,15 +2750,16 @@ MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const String & part_na static void loadPartAndFixMetadataImpl(MergeTreeData::MutableDataPartPtr part) { - String full_part_path = part->getFullPath(); + auto disk = part->disk; + String full_part_path = part->getFullRelativePath(); /// Earlier the list of columns was written incorrectly. Delete it and re-create. /// But in compact parts we can't get list of columns without this file. - if (isWidePart(part) && Poco::File(full_part_path + "columns.txt").exists()) - Poco::File(full_part_path + "columns.txt").remove(); + if (isWidePart(part)) + disk->removeIfExists(full_part_path + "columns.txt"); part->loadColumnsChecksumsIndexes(false, true); - part->modification_time = Poco::File(full_part_path).getLastModified().epochTime(); + part->modification_time = disk->getLastModified(full_part_path).epochTime(); /// If the checksums file is not present, calculate the checksums and write them to disk. /// Check the data while we are at it. @@ -2770,11 +2767,11 @@ static void loadPartAndFixMetadataImpl(MergeTreeData::MutableDataPartPtr part) { part->checksums = checkDataPart(part, false); { - WriteBufferFromFile out(full_part_path + "checksums.txt.tmp", 4096); - part->checksums.write(out); + auto out = disk->writeFile(full_part_path + "checksums.txt.tmp", 4096); + part->checksums.write(*out); } - Poco::File(full_part_path + "checksums.txt.tmp").renameTo(full_part_path + "checksums.txt"); + disk->moveFile(full_part_path + "checksums.txt.tmp", full_part_path + "checksums.txt"); } } @@ -3097,15 +3094,14 @@ MergeTreeData::getDetachedParts() const { std::vector res; - for (const auto & [path, disk] : getDataPathsWithDisks()) + for (const auto & [path, disk] : getRelativeDataPathsWithDisks()) { - for (Poco::DirectoryIterator it(path + "detached"); - it != Poco::DirectoryIterator(); ++it) + for (auto it = disk->iterateDirectory(path + "detached"); it->isValid(); it->next()) { res.emplace_back(); auto & part = res.back(); - DetachedPartInfo::tryParseDetachedPartName(it.name(), part, format_version); + DetachedPartInfo::tryParseDetachedPartName(it->name(), part, format_version); part.disk = disk->getName(); } } @@ -3117,9 +3113,9 @@ void MergeTreeData::validateDetachedPartName(const String & name) const if (name.find('/') != std::string::npos || name == "." || name == "..") throw DB::Exception("Invalid part name '" + name + "'", ErrorCodes::INCORRECT_FILE_NAME); - String full_path = getFullPathForPart(name, "detached/"); + auto full_path = getFullRelativePathForPart(name, "detached/"); - if (full_path.empty() || !Poco::File(full_path + name).exists()) + if (!full_path) throw DB::Exception("Detached part \"" + name + "\" not found" , ErrorCodes::BAD_DATA_PART_NAME); if (startsWith(name, "attaching_") || startsWith(name, "deleting_")) @@ -3154,7 +3150,8 @@ void MergeTreeData::dropDetached(const ASTPtr & partition, bool part, const Cont for (auto & [old_name, new_name] : renamed_parts.old_and_new_names) { - Poco::File(renamed_parts.old_part_name_to_full_path[old_name] + "detached/" + new_name).remove(true); + const auto & [path, disk] = renamed_parts.old_part_name_to_path_and_disk[old_name]; + disk->removeRecursive(path + "detached/" + new_name + "/"); LOG_DEBUG(log, "Dropped detached part " << old_name); old_name.clear(); } @@ -3182,12 +3179,11 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const ActiveDataPartSet active_parts(format_version); const auto disks = getStoragePolicy()->getDisks(); - for (const DiskPtr & disk : disks) + for (auto & disk : disks) { - const auto full_path = getFullPathOnDisk(disk); - for (Poco::DirectoryIterator it = Poco::DirectoryIterator(full_path + source_dir); it != Poco::DirectoryIterator(); ++it) + for (auto it = disk->iterateDirectory(relative_data_path + source_dir); it->isValid(); it->next()) { - const String & name = it.name(); + const String & name = it->name(); MergeTreePartInfo part_info; // TODO what if name contains "_tryN" suffix? /// Parts with prefix in name (e.g. attaching_1_3_3_0, deleting_1_3_3_0) will be ignored @@ -3208,10 +3204,8 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const String containing_part = active_parts.getContainingPart(name); if (!containing_part.empty() && containing_part != name) { - auto full_path = getFullPathOnDisk(disk); // TODO maybe use PartsTemporaryRename here? - Poco::File(full_path + source_dir + name) - .renameTo(full_path + source_dir + "inactive_" + name); + disk->moveDirectory(relative_data_path + source_dir + name, relative_data_path + source_dir + "inactive_" + name); } else renamed_parts.addPart(name, "attaching_" + name); @@ -3576,22 +3570,22 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::cloneAndLoadDataPartOnSameDisk( String tmp_dst_part_name = tmp_part_prefix + dst_part_name; auto reservation = reserveSpace(src_part->bytes_on_disk, src_part->disk); - String dst_part_path = getFullPathOnDisk(reservation->getDisk()); - Poco::Path dst_part_absolute_path = Poco::Path(dst_part_path + tmp_dst_part_name).absolute(); - Poco::Path src_part_absolute_path = Poco::Path(src_part->getFullPath()).absolute(); + auto disk = reservation->getDisk(); + String src_part_path = src_part->getFullRelativePath(); + String dst_part_path = relative_data_path + tmp_dst_part_name; - if (Poco::File(dst_part_absolute_path).exists()) - throw Exception("Part in " + dst_part_absolute_path.toString() + " already exists", ErrorCodes::DIRECTORY_ALREADY_EXISTS); + if (disk->exists(dst_part_path)) + throw Exception("Part in " + fullPath(disk, dst_part_path) + " already exists", ErrorCodes::DIRECTORY_ALREADY_EXISTS); - LOG_DEBUG(log, "Cloning part " << src_part_absolute_path.toString() << " to " << dst_part_absolute_path.toString()); - localBackup(src_part_absolute_path, dst_part_absolute_path); + LOG_DEBUG(log, "Cloning part " << fullPath(disk, src_part_path) << " to " << fullPath(disk, dst_part_path)); + localBackup(disk, src_part_path, dst_part_path); auto dst_data_part = createPart(dst_part_name, dst_part_info, reservation->getDisk(), tmp_dst_part_name); dst_data_part->is_temp = true; dst_data_part->loadColumnsChecksumsIndexes(require_part_metadata, true); - dst_data_part->modification_time = Poco::File(dst_part_absolute_path).getLastModified().epochTime(); + dst_data_part->modification_time = disk->getLastModified(dst_part_path).epochTime(); return dst_data_part; } @@ -3601,26 +3595,25 @@ String MergeTreeData::getFullPathOnDisk(const DiskPtr & disk) const } -DiskPtr MergeTreeData::getDiskForPart(const String & part_name, const String & relative_path) const +DiskPtr MergeTreeData::getDiskForPart(const String & part_name, const String & additional_path) const { const auto disks = getStoragePolicy()->getDisks(); + for (const DiskPtr & disk : disks) - { - const auto disk_path = getFullPathOnDisk(disk); - for (Poco::DirectoryIterator it = Poco::DirectoryIterator(disk_path + relative_path); it != Poco::DirectoryIterator(); ++it) - if (it.name() == part_name) + for (auto it = disk->iterateDirectory(relative_data_path + additional_path); it->isValid(); it->next()) + if (it->name() == part_name) return disk; - } + return nullptr; } -String MergeTreeData::getFullPathForPart(const String & part_name, const String & relative_path) const +std::optional MergeTreeData::getFullRelativePathForPart(const String & part_name, const String & additional_path) const { - auto disk = getDiskForPart(part_name, relative_path); + auto disk = getDiskForPart(part_name, additional_path); if (disk) - return getFullPathOnDisk(disk) + relative_path; - return ""; + return relative_data_path + additional_path; + return {}; } Strings MergeTreeData::getDataPaths() const @@ -3632,15 +3625,6 @@ Strings MergeTreeData::getDataPaths() const return res; } -MergeTreeData::PathsWithDisks MergeTreeData::getDataPathsWithDisks() const -{ - PathsWithDisks res; - auto disks = getStoragePolicy()->getDisks(); - for (const auto & disk : disks) - res.emplace_back(getFullPathOnDisk(disk), disk); - return res; -} - MergeTreeData::PathsWithDisks MergeTreeData::getRelativeDataPathsWithDisks() const { PathsWithDisks res; @@ -3657,6 +3641,8 @@ void MergeTreeData::freezePartitionsByMatcher(MatcherFn matcher, const String & Poco::File(default_shadow_path).createDirectories(); auto increment = Increment(default_shadow_path + "increment.txt").get(true); + const String shadow_path = "shadow/"; + /// Acquire a snapshot of active data parts to prevent removing while doing backup. const auto data_parts = getDataParts(); @@ -3666,9 +3652,8 @@ void MergeTreeData::freezePartitionsByMatcher(MatcherFn matcher, const String & if (!matcher(part)) continue; - String shadow_path = part->disk->getPath() + "shadow/"; + part->disk->createDirectories(shadow_path); - Poco::File(shadow_path).createDirectories(); String backup_path = shadow_path + (!with_name.empty() ? escapeForFileName(with_name) @@ -3677,11 +3662,8 @@ void MergeTreeData::freezePartitionsByMatcher(MatcherFn matcher, const String & LOG_DEBUG(log, "Freezing part " << part->name << " snapshot will be placed at " + backup_path); - String part_absolute_path = Poco::Path(part->getFullPath()).absolute().toString(); - String backup_part_absolute_path = backup_path - + relative_data_path - + part->relative_path; - localBackup(part_absolute_path, backup_part_absolute_path); + String backup_part_path = backup_path + relative_data_path + part->relative_path; + localBackup(part->disk, part->getFullRelativePath(), backup_part_path); part->is_frozen.store(true, std::memory_order_relaxed); ++parts_processed; } @@ -3853,11 +3835,10 @@ MergeTreeData::CurrentlyMovingPartsTagger MergeTreeData::checkPartsForMove(const throw Exception("Move is not possible. Not enough space on '" + space->getName() + "'", ErrorCodes::NOT_ENOUGH_SPACE); auto reserved_disk = reservation->getDisk(); - String path_to_clone = getFullPathOnDisk(reserved_disk); - if (Poco::File(path_to_clone + part->name).exists()) + if (reserved_disk->exists(relative_data_path + part->name)) throw Exception( - "Move is not possible: " + path_to_clone + part->name + " already exists", + "Move is not possible: " + fullPath(reserved_disk, relative_data_path + part->name) + " already exists", ErrorCodes::DIRECTORY_ALREADY_EXISTS); if (currently_moving_parts.count(part) || partIsAssignedToBackgroundOperation(part)) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 079fb316ffd..0606b2d9cec 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -263,6 +263,7 @@ public: }; using AlterDataPartTransactionPtr = std::unique_ptr; + using PathWithDisk = std::pair; struct PartsTemporaryRename : private boost::noncopyable { @@ -285,7 +286,7 @@ public: const MergeTreeData & storage; const String source_dir; std::vector> old_and_new_names; - std::unordered_map old_part_name_to_full_path; + std::unordered_map old_part_name_to_path_and_disk; bool renamed = false; }; @@ -670,18 +671,17 @@ public: /// Get table path on disk String getFullPathOnDisk(const DiskPtr & disk) const; - /// Get disk for part. Looping through directories on FS because some parts maybe not in - /// active dataparts set (detached) - DiskPtr getDiskForPart(const String & part_name, const String & relative_path = "") const; + /// Get disk where part is located. + /// `additional_path` can be set if part is not located directly in table data path (e.g. 'detached/') + DiskPtr getDiskForPart(const String & part_name, const String & additional_path = "") const; - /// Get full path for part. Uses getDiskForPart and returns the full path - String getFullPathForPart(const String & part_name, const String & relative_path = "") const; + /// Get full path for part. Uses getDiskForPart and returns the full relative path. + /// `additional_path` can be set if part is not located directly in table data path (e.g. 'detached/') + std::optional getFullRelativePathForPart(const String & part_name, const String & additional_path = "") const; Strings getDataPaths() const override; - using PathWithDisk = std::pair; using PathsWithDisks = std::vector; - PathsWithDisks getDataPathsWithDisks() const; PathsWithDisks getRelativeDataPathsWithDisks() const; /// Reserves space at least 1MB. diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index cffc654ed55..9911796e2d5 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -26,9 +26,6 @@ #include #include #include -#include -#include -#include #include #include #include @@ -576,10 +573,11 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor << parts.front()->name << " to " << parts.back()->name << " into " << TMP_PREFIX + future_part.name + " with type " + future_part.type.toString()); - String part_path = data.getFullPathOnDisk(space_reservation->getDisk()); + auto disk = space_reservation->getDisk(); + String part_path = data.relative_data_path; String new_part_tmp_path = part_path + TMP_PREFIX + future_part.name + "/"; - if (Poco::File(new_part_tmp_path).exists()) - throw Exception("Directory " + new_part_tmp_path + " already exists", ErrorCodes::DIRECTORY_ALREADY_EXISTS); + if (disk->exists(new_part_tmp_path)) + throw Exception("Directory " + fullPath(disk, new_part_tmp_path) + " already exists", ErrorCodes::DIRECTORY_ALREADY_EXISTS); MergeTreeData::DataPart::ColumnToSize merged_column_to_size; @@ -598,7 +596,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor future_part.name, future_part.type, future_part.part_info, - space_reservation->getDisk(), + disk, TMP_PREFIX + future_part.name); new_data_part->setColumns(all_columns); @@ -633,16 +631,17 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor merge_entry->total_size_bytes_compressed, static_cast (merge_entry->total_size_bytes_compressed) / data.getTotalActiveSizeInBytes()); + /// TODO: Should it go through IDisk interface? String rows_sources_file_path; - std::unique_ptr rows_sources_uncompressed_write_buf; + std::unique_ptr rows_sources_uncompressed_write_buf; std::unique_ptr rows_sources_write_buf; std::optional column_sizes; if (merge_alg == MergeAlgorithm::Vertical) { - Poco::File(new_part_tmp_path).createDirectories(); + disk->createDirectories(new_part_tmp_path); rows_sources_file_path = new_part_tmp_path + "rows_sources"; - rows_sources_uncompressed_write_buf = std::make_unique(rows_sources_file_path); + rows_sources_uncompressed_write_buf = disk->writeFile(rows_sources_file_path); rows_sources_write_buf = std::make_unique(*rows_sources_uncompressed_write_buf); for (const MergeTreeData::DataPartPtr & part : parts) @@ -832,6 +831,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor rows_sources_write_buf->next(); rows_sources_uncompressed_write_buf->next(); + /// Ensure data has written to disk. + rows_sources_uncompressed_write_buf->finalize(); size_t rows_sources_count = rows_sources_write_buf->count(); /// In special case, when there is only one source part, and no rows were skipped, we may have @@ -842,7 +843,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor + ") differs from number of bytes written to rows_sources file (" + toString(rows_sources_count) + "). It is a bug.", ErrorCodes::LOGICAL_ERROR); - CompressedReadBufferFromFile rows_sources_read_buf(rows_sources_file_path, 0, 0, 0); + CompressedReadBufferFromFile rows_sources_read_buf(disk->readFile(rows_sources_file_path)); IMergedBlockOutputStream::WrittenOffsetColumns written_offset_columns; for (size_t column_num = 0, gathering_column_names_size = gathering_column_names.size(); @@ -909,7 +910,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor merge_entry->progress.store(progress_before + column_sizes->columnWeight(column_name), std::memory_order_relaxed); } - Poco::File(rows_sources_file_path).remove(); + disk->remove(rows_sources_file_path); } for (const auto & part : parts) @@ -1018,7 +1019,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor new_data_part->index_granularity_info = source_part->index_granularity_info; new_data_part->setColumns(getColumnsForNewDataPart(source_part, updated_header, all_columns)); - String new_part_tmp_path = new_data_part->getFullPath(); + auto disk = new_data_part->disk; + String new_part_tmp_path = new_data_part->getFullRelativePath(); /// Note: this is done before creating input streams, because otherwise data.data_parts_mutex /// (which is locked in data.getTotalActiveSizeInBytes()) is locked after part->columns_lock @@ -1029,7 +1031,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor source_part->bytes_on_disk, static_cast(source_part->bytes_on_disk) / data.getTotalActiveSizeInBytes()); - Poco::File(new_part_tmp_path).createDirectories(); + disk->createDirectories(new_part_tmp_path); /// Don't change granularity type while mutating subset of columns auto mrk_extension = source_part->index_granularity_info.is_adaptive ? getAdaptiveMrkExtension(new_data_part->getType()) @@ -1125,17 +1127,15 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor if (need_remove_expired_values) files_to_skip.insert("ttl.txt"); - Poco::DirectoryIterator dir_end; /// Create hardlinks for unchanged files - for (Poco::DirectoryIterator dir_it(source_part->getFullPath()); dir_it != dir_end; ++dir_it) + for (auto it = disk->iterateDirectory(source_part->getFullRelativePath()); it->isValid(); it->next()) { - if (files_to_skip.count(dir_it.name()) || files_to_remove.count(dir_it.name())) + if (files_to_skip.count(it->name()) || files_to_remove.count(it->name())) continue; - Poco::Path destination(new_part_tmp_path); - destination.append(dir_it.name()); + String destination = new_part_tmp_path + "/" + it->name(); - createHardLink(dir_it.path().toString(), destination.toString()); + disk->createHardLink(it->path(), destination); } merge_entry->columns_written = all_columns.size() - updated_header.columns(); @@ -1181,8 +1181,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor if (need_remove_expired_values) { /// Write a file with ttl infos in json format. - WriteBufferFromFile out_ttl(new_part_tmp_path + "ttl.txt", 4096); - HashingWriteBuffer out_hashing(out_ttl); + auto out_ttl = disk->writeFile(new_part_tmp_path + "ttl.txt", 4096); + HashingWriteBuffer out_hashing(*out_ttl); new_data_part->ttl_infos.write(out_hashing); new_data_part->checksums.files["ttl.txt"].file_size = out_hashing.count(); new_data_part->checksums.files["ttl.txt"].file_hash = out_hashing.getHash(); @@ -1193,15 +1193,15 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor new_data_part->checksums.files.erase(removed_file); { /// Write file with checksums. - WriteBufferFromFile out_checksums(new_part_tmp_path + "checksums.txt", 4096); - new_data_part->checksums.write(out_checksums); + auto out_checksums = disk->writeFile(new_part_tmp_path + "checksums.txt", 4096); + new_data_part->checksums.write(*out_checksums); } /// close fd { /// Write a file with a description of columns. - WriteBufferFromFile out_columns(new_part_tmp_path + "columns.txt", 4096); - new_data_part->getColumns().writeText(out_columns); + auto out_columns = disk->writeFile(new_part_tmp_path + "columns.txt", 4096); + new_data_part->getColumns().writeText(*out_columns); } /// close new_data_part->rows_count = source_part->rows_count; diff --git a/dbms/src/Storages/MergeTree/MergeTreeSettings.h b/dbms/src/Storages/MergeTree/MergeTreeSettings.h index 7d53f161620..bbd1fd6cbeb 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSettings.h +++ b/dbms/src/Storages/MergeTree/MergeTreeSettings.h @@ -42,7 +42,6 @@ struct MergeTreeSettings : public SettingsCollection M(SettingUInt64, number_of_free_entries_in_pool_to_execute_mutation, 10, "When there is less than specified number of free entries in pool, do not execute part mutations. This is to leave free threads for regular merges and avoid \"Too many parts\"", 0) \ M(SettingSeconds, old_parts_lifetime, 8 * 60, "How many seconds to keep obsolete parts.", 0) \ M(SettingSeconds, temporary_directories_lifetime, 86400, "How many seconds to keep tmp_-directories.", 0) \ - M(SettingBool, disable_background_merges, false, "Disable background merges.", 0) \ \ /** Inserts settings. */ \ M(SettingUInt64, parts_to_delay_insert, 150, "If table contains at least that many active parts in single partition, artificially slow down insert into table.", 0) \ diff --git a/dbms/src/Storages/MergeTree/checkDataPart.cpp b/dbms/src/Storages/MergeTree/checkDataPart.cpp index 6195facc914..6da051d04ac 100644 --- a/dbms/src/Storages/MergeTree/checkDataPart.cpp +++ b/dbms/src/Storages/MergeTree/checkDataPart.cpp @@ -99,7 +99,6 @@ IMergeTreeDataPart::Checksums checkDataPart( throw Exception("Unknown type in part " + path, ErrorCodes::UNKNOWN_PART_TYPE); } - Poco::DirectoryIterator dir_end; for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) { const String & file_name = it->name(); diff --git a/dbms/src/Common/localBackup.cpp b/dbms/src/Storages/MergeTree/localBackup.cpp similarity index 51% rename from dbms/src/Common/localBackup.cpp rename to dbms/src/Storages/MergeTree/localBackup.cpp index 2e042351a90..7d7dacaeaf1 100644 --- a/dbms/src/Common/localBackup.cpp +++ b/dbms/src/Storages/MergeTree/localBackup.cpp @@ -1,13 +1,8 @@ #include "localBackup.h" -#include #include -#include -#include -#include #include -#include -#include +#include namespace DB @@ -20,7 +15,7 @@ namespace ErrorCodes } -static void localBackupImpl(const Poco::Path & source_path, const Poco::Path & destination_path, size_t level, +static void localBackupImpl(const DiskPtr & disk, const String & source_path, const String & destination_path, size_t level, std::optional max_level) { if (max_level && level > *max_level) @@ -29,34 +24,30 @@ static void localBackupImpl(const Poco::Path & source_path, const Poco::Path & d if (level >= 1000) throw DB::Exception("Too deep recursion", DB::ErrorCodes::TOO_DEEP_RECURSION); - Poco::File(destination_path).createDirectories(); + disk->createDirectories(destination_path); - Poco::DirectoryIterator dir_end; - for (Poco::DirectoryIterator dir_it(source_path); dir_it != dir_end; ++dir_it) + for (auto it = disk->iterateDirectory(source_path); it->isValid(); it->next()) { - Poco::Path source = dir_it.path(); - Poco::Path destination = destination_path; - destination.append(dir_it.name()); + auto source = it->path(); + auto destination = destination_path + "/" + it->name(); - if (!dir_it->isDirectory()) + if (!disk->isDirectory(source)) { - dir_it->setReadOnly(); - - createHardLink(source.toString(), destination.toString()); + disk->setReadOnly(source); + disk->createHardLink(source, destination); } else { - localBackupImpl(source, destination, level + 1, max_level); + localBackupImpl(disk, source, destination, level + 1, max_level); } } } -void localBackup(const Poco::Path & source_path, const Poco::Path & destination_path, std::optional max_level) +void localBackup(const DiskPtr & disk, const String & source_path, const String & destination_path, std::optional max_level) { - if (Poco::File(destination_path).exists() - && Poco::DirectoryIterator(destination_path) != Poco::DirectoryIterator()) + if (disk->exists(destination_path) && !disk->isDirectoryEmpty(destination_path)) { - throw DB::Exception("Directory " + destination_path.toString() + " already exists and is not empty.", DB::ErrorCodes::DIRECTORY_ALREADY_EXISTS); + throw DB::Exception("Directory " + fullPath(disk, destination_path) + " already exists and is not empty.", DB::ErrorCodes::DIRECTORY_ALREADY_EXISTS); } size_t try_no = 0; @@ -70,7 +61,7 @@ void localBackup(const Poco::Path & source_path, const Poco::Path & destination_ { try { - localBackupImpl(source_path, destination_path, 0, max_level); + localBackupImpl(disk, source_path, destination_path, 0, max_level); } catch (const DB::ErrnoException & e) { diff --git a/dbms/src/Common/localBackup.h b/dbms/src/Storages/MergeTree/localBackup.h similarity index 80% rename from dbms/src/Common/localBackup.h rename to dbms/src/Storages/MergeTree/localBackup.h index e3ea32614ee..3c9d92fa9da 100644 --- a/dbms/src/Common/localBackup.h +++ b/dbms/src/Storages/MergeTree/localBackup.h @@ -1,8 +1,8 @@ #pragma once #include - -namespace Poco { class Path; } +#include +#include namespace DB { @@ -20,6 +20,6 @@ namespace DB * If max_level is specified, than only files which depth relative source_path less or equal max_level will be copied. * So, if max_level=0 than only direct file child are copied. */ -void localBackup(const Poco::Path & source_path, const Poco::Path & destination_path, std::optional max_level = {}); +void localBackup(const DiskPtr & disk, const String & source_path, const String & destination_path, std::optional max_level = {}); } diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index aa4566fef37..d62ff1ca5cd 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -21,8 +21,6 @@ #include #include #include -#include -#include #include #include #include @@ -95,8 +93,7 @@ void StorageMergeTree::startup() /// NOTE background task will also do the above cleanups periodically. time_after_previous_cleanup.restart(); - if (!getSettings()->disable_background_merges) - merging_mutating_task_handle = global_context.getBackgroundPool().addTask([this] { return mergeMutateTask(); }); + merging_mutating_task_handle = global_context.getBackgroundPool().addTask([this] { return mergeMutateTask(); }); if (areBackgroundMovesNeeded()) moving_task_handle = global_context.getBackgroundMovePool().addTask([this] { return movePartsTask(); }); } diff --git a/dbms/tests/integration/test_merge_tree_s3/configs/config.d/bg_processing_pool_conf.xml b/dbms/tests/integration/test_merge_tree_s3/configs/config.d/bg_processing_pool_conf.xml new file mode 100644 index 00000000000..a756c4434ea --- /dev/null +++ b/dbms/tests/integration/test_merge_tree_s3/configs/config.d/bg_processing_pool_conf.xml @@ -0,0 +1,5 @@ + + 0.5 + 0.5 + 0.5 + diff --git a/dbms/tests/integration/test_merge_tree_s3/configs/config.d/storage_conf.xml b/dbms/tests/integration/test_merge_tree_s3/configs/config.d/storage_conf.xml new file mode 100644 index 00000000000..5b292446c6b --- /dev/null +++ b/dbms/tests/integration/test_merge_tree_s3/configs/config.d/storage_conf.xml @@ -0,0 +1,28 @@ + + + + + s3 + http://minio1:9001/root/data/ + minio + minio123 + + + local + / + + + + + +
    + s3 +
    + + hdd + +
    +
    +
    +
    +
    diff --git a/dbms/tests/integration/test_merge_tree_s3/configs/config.d/users.xml b/dbms/tests/integration/test_merge_tree_s3/configs/config.d/users.xml new file mode 100644 index 00000000000..a13b24b278d --- /dev/null +++ b/dbms/tests/integration/test_merge_tree_s3/configs/config.d/users.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/dbms/tests/integration/test_merge_tree_s3/configs/config.xml b/dbms/tests/integration/test_merge_tree_s3/configs/config.xml index 63b4d951eb7..24b7344df3a 100644 --- a/dbms/tests/integration/test_merge_tree_s3/configs/config.xml +++ b/dbms/tests/integration/test_merge_tree_s3/configs/config.xml @@ -1,25 +1,5 @@ - - trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - 1000M - 10 - - - - - - s3 - http://minio1:9001/root/data/ - minio - minio123 - - - - - 9000 127.0.0.1 diff --git a/dbms/tests/integration/test_merge_tree_s3/configs/users.xml b/dbms/tests/integration/test_merge_tree_s3/configs/users.xml deleted file mode 100644 index 6061af8e33d..00000000000 --- a/dbms/tests/integration/test_merge_tree_s3/configs/users.xml +++ /dev/null @@ -1,23 +0,0 @@ - - - - - - - - - - - - ::/0 - - default - default - - - - - - - - diff --git a/dbms/tests/integration/test_merge_tree_s3/test.py b/dbms/tests/integration/test_merge_tree_s3/test.py index c79745642a0..631d69911ff 100644 --- a/dbms/tests/integration/test_merge_tree_s3/test.py +++ b/dbms/tests/integration/test_merge_tree_s3/test.py @@ -1,6 +1,7 @@ import logging import random import string +import time import pytest from helpers.cluster import ClickHouseCluster @@ -36,56 +37,212 @@ def cluster(): cluster.shutdown() +FILES_OVERHEAD = 1 +FILES_OVERHEAD_PER_COLUMN = 2 # Data and mark files +FILES_OVERHEAD_PER_PART_WIDE = FILES_OVERHEAD_PER_COLUMN * 3 + 2 + 6 +FILES_OVERHEAD_PER_PART_COMPACT = 10 + + def random_string(length): letters = string.ascii_letters return ''.join(random.choice(letters) for i in range(length)) -def generate_values(date_str, count): - data = [[date_str, i, random_string(10)] for i in range(count)] +def generate_values(date_str, count, sign=1): + data = [[date_str, sign*(i + 1), random_string(10)] for i in range(count)] data.sort(key=lambda tup: tup[1]) return ",".join(["('{}',{},'{}')".format(x, y, z) for x, y, z in data]) -@pytest.mark.parametrize( - "min_rows_for_wide_part,files_overhead,files_per_part", - [ - (0, 1, 14), - (8192, 1, 10) - ] -) -def test_log_family_s3(cluster, min_rows_for_wide_part, files_overhead, files_per_part): +def create_table(cluster, additional_settings=None): node = cluster.instances["node"] - minio = cluster.minio_client - node.query( - """ + create_table_statement = """ CREATE TABLE s3_test( dt Date, - id UInt64, + id Int64, data String, INDEX min_max (id) TYPE minmax GRANULARITY 3 ) ENGINE=MergeTree() PARTITION BY dt ORDER BY (dt, id) - SETTINGS disable_background_merges='true', index_granularity=512, min_rows_for_wide_part={} + SETTINGS + old_parts_lifetime=0, index_granularity=512 """ - .format(min_rows_for_wide_part) - ) - assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == 1 - values1 = generate_values('2020-01-03', 4096) - node.query("INSERT INTO s3_test VALUES {}".format(values1)) - assert node.query("SELECT * FROM s3_test order by dt, id FORMAT Values") == values1 - assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == files_overhead + files_per_part + if additional_settings: + create_table_statement += "," + create_table_statement += additional_settings - values2 = generate_values('2020-01-04', 4096) - node.query("INSERT INTO s3_test VALUES {}".format(values2)) - assert node.query("SELECT * FROM s3_test ORDER BY dt, id FORMAT Values") == values1 + "," + values2 - assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == files_overhead + 2 * files_per_part + node.query(create_table_statement) - assert node.query("SELECT count(*) FROM s3_test where id = 0 FORMAT Values") == "(2)" + +@pytest.fixture(autouse=True) +def drop_table(cluster): + yield + node = cluster.instances["node"] + minio = cluster.minio_client node.query("DROP TABLE s3_test") assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == 0 + +@pytest.mark.parametrize( + "min_rows_for_wide_part,files_per_part", + [ + (0, FILES_OVERHEAD_PER_PART_WIDE), + (8192, FILES_OVERHEAD_PER_PART_COMPACT) + ] +) +def test_simple_insert_select(cluster, min_rows_for_wide_part, files_per_part): + create_table(cluster, "min_rows_for_wide_part={}".format(min_rows_for_wide_part)) + + node = cluster.instances["node"] + minio = cluster.minio_client + + values1 = generate_values('2020-01-03', 4096) + node.query("INSERT INTO s3_test VALUES {}".format(values1)) + assert node.query("SELECT * FROM s3_test order by dt, id FORMAT Values") == values1 + assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD + files_per_part + + values2 = generate_values('2020-01-04', 4096) + node.query("INSERT INTO s3_test VALUES {}".format(values2)) + assert node.query("SELECT * FROM s3_test ORDER BY dt, id FORMAT Values") == values1 + "," + values2 + assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD + files_per_part*2 + + assert node.query("SELECT count(*) FROM s3_test where id = 1 FORMAT Values") == "(2)" + + +@pytest.mark.parametrize( + "merge_vertical", [False, True] +) +def test_insert_same_partition_and_merge(cluster, merge_vertical): + settings = None + if merge_vertical: + settings = """ + vertical_merge_algorithm_min_rows_to_activate=0, + vertical_merge_algorithm_min_columns_to_activate=0 + """ + create_table(cluster, settings) + + node = cluster.instances["node"] + minio = cluster.minio_client + + node.query("SYSTEM STOP MERGES s3_test") + node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-03', 1024))) + node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-03', 2048))) + node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-03', 4096))) + node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-03', 1024, -1))) + node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-03', 2048, -1))) + node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-03', 4096, -1))) + assert node.query("SELECT sum(id) FROM s3_test FORMAT Values") == "(0)" + assert node.query("SELECT count(distinct(id)) FROM s3_test FORMAT Values") == "(8192)" + assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD_PER_PART_WIDE*6 + FILES_OVERHEAD + + node.query("SYSTEM START MERGES s3_test") + # Wait for merges and old parts deletion + time.sleep(3) + + assert node.query("SELECT sum(id) FROM s3_test FORMAT Values") == "(0)" + assert node.query("SELECT count(distinct(id)) FROM s3_test FORMAT Values") == "(8192)" + assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD_PER_PART_WIDE + FILES_OVERHEAD + + +def test_alter_table_columns(cluster): + create_table(cluster) + + node = cluster.instances["node"] + minio = cluster.minio_client + + node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-03', 4096))) + node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-03', 4096, -1))) + + node.query("ALTER TABLE s3_test ADD COLUMN col1 UInt64 DEFAULT 1") + # To ensure parts have merged + node.query("OPTIMIZE TABLE s3_test") + + # Wait for merges, mutations and old parts deletion + time.sleep(3) + + assert node.query("SELECT sum(col1) FROM s3_test FORMAT Values") == "(8192)" + assert node.query("SELECT sum(col1) FROM s3_test WHERE id > 0 FORMAT Values") == "(4096)" + assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE + FILES_OVERHEAD_PER_COLUMN + + node.query("ALTER TABLE s3_test MODIFY COLUMN col1 String") + assert node.query("SELECT distinct(col1) FROM s3_test FORMAT Values") == "('1')" + assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE + FILES_OVERHEAD_PER_COLUMN + + node.query("ALTER TABLE s3_test DROP COLUMN col1") + assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE + + +def test_attach_detach_partition(cluster): + create_table(cluster) + + node = cluster.instances["node"] + minio = cluster.minio_client + + node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-03', 4096))) + node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-04', 4096))) + assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)" + assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE*2 + + node.query("ALTER TABLE s3_test DETACH PARTITION '2020-01-03'") + assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(4096)" + assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE*2 + + node.query("ALTER TABLE s3_test ATTACH PARTITION '2020-01-03'") + assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)" + assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE*2 + + node.query("ALTER TABLE s3_test DROP PARTITION '2020-01-03'") + assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(4096)" + assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE + + node.query("ALTER TABLE s3_test DETACH PARTITION '2020-01-04'") + node.query("SET allow_drop_detached=1; ALTER TABLE s3_test DROP DETACHED PARTITION '2020-01-04'") + assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(0)" + assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD + + +def test_move_partition(cluster): + create_table(cluster) + + node = cluster.instances["node"] + minio = cluster.minio_client + + node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-03', 4096))) + node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-04', 4096))) + assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)" + assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE*2 + + node.query("ALTER TABLE s3_test MOVE PARTITION '2020-01-04' TO DISK 'hdd'") + assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)" + assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE + + +def test_table_manipulations(cluster): + create_table(cluster) + + node = cluster.instances["node"] + minio = cluster.minio_client + + node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-03', 4096))) + node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-04', 4096))) + + node.query("RENAME TABLE s3_test TO s3_renamed") + assert node.query("SELECT count(*) FROM s3_renamed FORMAT Values") == "(8192)" + assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE*2 + node.query("RENAME TABLE s3_renamed TO s3_test") + + # TODO: Doesn't work with min_max index. + #assert node.query("SET check_query_single_value_result='false'; CHECK TABLE s3_test FORMAT Values") == "(1)" + + node.query("DETACH TABLE s3_test") + node.query("ATTACH TABLE s3_test") + assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)" + assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE*2 + + node.query("TRUNCATE TABLE s3_test") + assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(0)" + assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD diff --git a/utils/convert-month-partitioned-parts/main.cpp b/utils/convert-month-partitioned-parts/main.cpp index 8f1ca05dd32..51ea87d35b9 100644 --- a/utils/convert-month-partitioned-parts/main.cpp +++ b/utils/convert-month-partitioned-parts/main.cpp @@ -1,13 +1,13 @@ -#include -#include #include +#include #include #include -#include -#include -#include #include #include +#include +#include +#include +#include #include #include @@ -73,7 +73,7 @@ void run(String part_path, String date_column, String dest_path) { /// If the file is already deleted, do nothing. } - localBackup(old_part_path, new_tmp_part_path, {}); + localBackup(disk, old_part_path.toString(), new_tmp_part_path.toString(), {}); WriteBufferFromFile count_out(new_tmp_part_path_str + "count.txt", 4096); HashingWriteBuffer count_out_hashing(count_out); From 175c95675be3679acca51be75704df1f6d266900 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 19 Mar 2020 20:18:33 +0300 Subject: [PATCH 113/115] Try fix tests. --- .../Executors/TreeExecutorBlockInputStream.cpp | 8 ++++++-- dbms/src/Processors/QueryPipeline.cpp | 10 +++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.cpp b/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.cpp index c07499b28dc..e0242533518 100644 --- a/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.cpp +++ b/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.cpp @@ -240,8 +240,7 @@ void TreeExecutorBlockInputStream::initRowsBeforeLimit() if (auto * source = typeid_cast(processor)) sources.emplace_back(source); } - - if (auto * sorting = typeid_cast(processor)) + else if (auto * sorting = typeid_cast(processor)) { if (!rows_before_limit_at_least) rows_before_limit_at_least = std::make_shared(); @@ -269,6 +268,11 @@ void TreeExecutorBlockInputStream::initRowsBeforeLimit() for (auto & source : sources) source->setRowsBeforeLimitCounter(rows_before_limit_at_least); } + + /// If there is a limit, then enable rows_before_limit_at_least + /// It is needed when zero rows is read, but we still want rows_before_limit_at_least in result. + if (!limit_transforms.empty()) + rows_before_limit_at_least->add(0); } Block TreeExecutorBlockInputStream::readImpl() diff --git a/dbms/src/Processors/QueryPipeline.cpp b/dbms/src/Processors/QueryPipeline.cpp index 60ea824ad5c..fe7d466cbe6 100644 --- a/dbms/src/Processors/QueryPipeline.cpp +++ b/dbms/src/Processors/QueryPipeline.cpp @@ -592,8 +592,7 @@ void QueryPipeline::initRowsBeforeLimit() if (auto * source = typeid_cast(processor)) sources.emplace_back(source); } - - if (auto * sorting = typeid_cast(processor)) + else if (auto * sorting = typeid_cast(processor)) { if (!rows_before_limit_at_least) rows_before_limit_at_least = std::make_shared(); @@ -601,7 +600,7 @@ void QueryPipeline::initRowsBeforeLimit() sorting->setRowsBeforeLimitCounter(rows_before_limit_at_least); /// Don't go to children. Take rows_before_limit from last PartialSortingTransform. - /// continue; + continue; } /// Skip totals and extremes port for output format. @@ -633,6 +632,11 @@ void QueryPipeline::initRowsBeforeLimit() source->setRowsBeforeLimitCounter(rows_before_limit_at_least); } + /// If there is a limit, then enable rows_before_limit_at_least + /// It is needed when zero rows is read, but we still want rows_before_limit_at_least in result. + if (!limits.empty()) + rows_before_limit_at_least->add(0); + if (rows_before_limit_at_least) output_format->setRowsBeforeLimitCounter(rows_before_limit_at_least); } From 1a3e5cfcc53e7ae1d1d6d56557341afc5211b2dc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 19 Mar 2020 20:31:49 +0300 Subject: [PATCH 114/115] Fixed a test that may taint other tests if timed out --- .../1_stateful/00152_insert_different_granularity.sql | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dbms/tests/queries/1_stateful/00152_insert_different_granularity.sql b/dbms/tests/queries/1_stateful/00152_insert_different_granularity.sql index e690daadb6e..5ca34bbe48e 100644 --- a/dbms/tests/queries/1_stateful/00152_insert_different_granularity.sql +++ b/dbms/tests/queries/1_stateful/00152_insert_different_granularity.sql @@ -8,7 +8,8 @@ INSERT INTO fixed_granularity_table SELECT * FROM test.hits LIMIT 10; -- should INSERT INTO fixed_granularity_table SELECT * FROM test.hits LIMIT 10; -OPTIMIZE TABLE fixed_granularity_table FINAL; -- and even after optimize +-- We have removed testing of OPTIMIZE because it's too heavy on very slow builds (debug + coverage + thread fuzzer with sleeps) +-- OPTIMIZE TABLE fixed_granularity_table FINAL; -- and even after optimize DETACH TABLE fixed_granularity_table; @@ -39,7 +40,7 @@ ALTER TABLE test.hits DETACH PARTITION 201403; ALTER TABLE test.hits ATTACH PARTITION 201403; -OPTIMIZE TABLE test.hits; +-- OPTIMIZE TABLE test.hits; SELECT count() FROM test.hits; From 03aa7894d9484f4c6dce471e5c1806574f31bc38 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 19 Mar 2020 20:49:27 +0300 Subject: [PATCH 115/115] Draft of docs translation helper (#9755) * replace exit with assert in test_single_page * improve save_raw_single_page docs option * More grammar fixes * "Built from" link in new tab * fix mistype * Example of include in docs * add anchor to meeting form * Draft of translation helper * WIP on translation helper * Replace some fa docs content with machine translation --- .gitignore | 2 + docs/en/roadmap.md | 2 +- .../example_datasets/metrica.md | 64 +- docs/fa/getting_started/tutorial.md | 662 +++++++++++++++++- docs/tools/translate.py | 21 - docs/tools/translate/filter.py | 118 ++++ .../translate/replace-with-translation.sh | 12 + docs/tools/translate/translate.sh | 16 + 8 files changed, 873 insertions(+), 24 deletions(-) mode change 120000 => 100644 docs/fa/getting_started/example_datasets/metrica.md mode change 120000 => 100644 docs/fa/getting_started/tutorial.md delete mode 100755 docs/tools/translate.py create mode 100755 docs/tools/translate/filter.py create mode 100755 docs/tools/translate/replace-with-translation.sh create mode 100755 docs/tools/translate/translate.sh diff --git a/.gitignore b/.gitignore index 5f41164e3f7..6aa331edc84 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,8 @@ /docs/website /docs/venv/ /docs/tools/venv/ +/docs/tools/translate/venv/ +/docs/tools/translate/output.md /docs/en/single.md /docs/ru/single.md /docs/zh/single.md diff --git a/docs/en/roadmap.md b/docs/en/roadmap.md index 0001e2a0529..212673c1f3d 100644 --- a/docs/en/roadmap.md +++ b/docs/en/roadmap.md @@ -10,4 +10,4 @@ - Resource pools for more precise distribution of cluster capacity between users -[Original article](https://clickhouse.tech/docs/en/roadmap/) +{## [Original article](https://clickhouse.tech/docs/en/roadmap/) ##} diff --git a/docs/fa/getting_started/example_datasets/metrica.md b/docs/fa/getting_started/example_datasets/metrica.md deleted file mode 120000 index 984023973eb..00000000000 --- a/docs/fa/getting_started/example_datasets/metrica.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/getting_started/example_datasets/metrica.md \ No newline at end of file diff --git a/docs/fa/getting_started/example_datasets/metrica.md b/docs/fa/getting_started/example_datasets/metrica.md new file mode 100644 index 00000000000..0675f4c1fa9 --- /dev/null +++ b/docs/fa/getting_started/example_datasets/metrica.md @@ -0,0 +1,63 @@ +# ناشناس یاندکس.اطلاعات متریکا + +مجموعه داده شامل دو جدول حاوی داده های ناشناس در مورد بازدید (`hits_v1`) و بازدیدکننده داشته است (`visits_v1`) یاندکس . متریکا شما می توانید اطلاعات بیشتر در مورد یاندکس به عنوان خوانده شده.متریکا در [تاریخچه کلیک](../../introduction/history.md) بخش. + +مجموعه داده ها شامل دو جدول است که هر کدام می توانند به عنوان یک فشرده دانلود شوند `tsv.xz` فایل و یا به عنوان پارتیشن تهیه شده است. علاوه بر این, یک نسخه طولانی از `hits` جدول حاوی 100 میلیون ردیف به عنوان تسو در دسترس است https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_100m_obfuscated_v1.tsv.xz و به عنوان پارتیشن تهیه شده در https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_100m_obfuscated_v1.tar.xz. + +## اخذ جداول از پارتیشن های تهیه شده + +دانلود و وارد کردن جدول بازدید: + +``` bash +curl -O https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_v1.tar +tar xvf hits_v1.tar -C /var/lib/clickhouse # path to ClickHouse data directory +# check permissions on unpacked data, fix if required +sudo service clickhouse-server restart +clickhouse-client --query "SELECT COUNT(*) FROM datasets.hits_v1" +``` + +دانلود و وارد کردن بازدیدکننده داشته است: + +``` bash +curl -O https://clickhouse-datasets.s3.yandex.net/visits/partitions/visits_v1.tar +tar xvf visits_v1.tar -C /var/lib/clickhouse # path to ClickHouse data directory +# check permissions on unpacked data, fix if required +sudo service clickhouse-server restart +clickhouse-client --query "SELECT COUNT(*) FROM datasets.visits_v1" +``` + +## اخذ جداول از فایل تسو فشرده + +دانلود و وارد کردن بازدید از فایل تسو فشرده: + +``` bash +curl https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_v1.tsv.xz | unxz --threads=`nproc` > hits_v1.tsv +# now create table +clickhouse-client --query "CREATE DATABASE IF NOT EXISTS datasets" +clickhouse-client --query "CREATE TABLE datasets.hits_v1 ( WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192" +# import data +cat hits_v1.tsv | clickhouse-client --query "INSERT INTO datasets.hits_v1 FORMAT TSV" --max_insert_block_size=100000 +# optionally you can optimize table +clickhouse-client --query "OPTIMIZE TABLE datasets.hits_v1 FINAL" +clickhouse-client --query "SELECT COUNT(*) FROM datasets.hits_v1" +``` + +دانلود و واردات بازدیدکننده داشته است از فشرده فایل: + +``` bash +curl https://clickhouse-datasets.s3.yandex.net/visits/tsv/visits_v1.tsv.xz | unxz --threads=`nproc` > visits_v1.tsv +# now create table +clickhouse-client --query "CREATE DATABASE IF NOT EXISTS datasets" +clickhouse-client --query "CREATE TABLE datasets.visits_v1 ( CounterID UInt32, StartDate Date, Sign Int8, IsNew UInt8, VisitID UInt64, UserID UInt64, StartTime DateTime, Duration UInt32, UTCStartTime DateTime, PageViews Int32, Hits Int32, IsBounce UInt8, Referer String, StartURL String, RefererDomain String, StartURLDomain String, EndURL String, LinkURL String, IsDownload UInt8, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, PlaceID Int32, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), IsYandex UInt8, GoalReachesDepth Int32, GoalReachesURL Int32, GoalReachesAny Int32, SocialSourceNetworkID UInt8, SocialSourcePage String, MobilePhoneModel String, ClientEventTime DateTime, RegionID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RemoteIP UInt32, RemoteIP6 FixedString(16), IPNetworkID UInt32, SilverlightVersion3 UInt32, CodeVersion UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, UserAgentMajor UInt16, UserAgentMinor UInt16, WindowClientWidth UInt16, WindowClientHeight UInt16, SilverlightVersion2 UInt8, SilverlightVersion4 UInt16, FlashVersion3 UInt16, FlashVersion4 UInt16, ClientTimeZone Int16, OS UInt8, UserAgent UInt8, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, NetMajor UInt8, NetMinor UInt8, MobilePhone UInt8, SilverlightVersion1 UInt8, Age UInt8, Sex UInt8, Income UInt8, JavaEnable UInt8, CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, BrowserLanguage UInt16, BrowserCountry UInt16, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), Params Array(String), Goals Nested(ID UInt32, Serial UInt32, EventTime DateTime, Price Int64, OrderID String, CurrencyID UInt32), WatchIDs Array(UInt64), ParamSumPrice Int64, ParamCurrency FixedString(3), ParamCurrencyID UInt16, ClickLogID UInt64, ClickEventID Int32, ClickGoodEvent Int32, ClickEventTime DateTime, ClickPriorityID Int32, ClickPhraseID Int32, ClickPageID Int32, ClickPlaceID Int32, ClickTypeID Int32, ClickResourceID Int32, ClickCost UInt32, ClickClientIP UInt32, ClickDomainID UInt32, ClickURL String, ClickAttempt UInt8, ClickOrderID UInt32, ClickBannerID UInt32, ClickMarketCategoryID UInt32, ClickMarketPP UInt32, ClickMarketCategoryName String, ClickMarketPPName String, ClickAWAPSCampaignName String, ClickPageName String, ClickTargetType UInt16, ClickTargetPhraseID UInt64, ClickContextType UInt8, ClickSelectType Int8, ClickOptions String, ClickGroupBannerID Int32, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, FirstVisit DateTime, PredLastVisit Date, LastVisit Date, TotalVisits UInt32, TraficSource Nested(ID Int8, SearchEngineID UInt16, AdvEngineID UInt8, PlaceID UInt16, SocialSourceNetworkID UInt8, Domain String, SearchPhrase String, SocialSourcePage String), Attendance FixedString(16), CLID UInt32, YCLID UInt64, NormalizedRefererHash UInt64, SearchPhraseHash UInt64, RefererDomainHash UInt64, NormalizedStartURLHash UInt64, StartURLDomainHash UInt64, NormalizedEndURLHash UInt64, TopLevelDomain UInt64, URLScheme UInt64, OpenstatServiceNameHash UInt64, OpenstatCampaignIDHash UInt64, OpenstatAdIDHash UInt64, OpenstatSourceIDHash UInt64, UTMSourceHash UInt64, UTMMediumHash UInt64, UTMCampaignHash UInt64, UTMContentHash UInt64, UTMTermHash UInt64, FromHash UInt64, WebVisorEnabled UInt8, WebVisorActivity UInt32, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), Market Nested(Type UInt8, GoalID UInt32, OrderID String, OrderPrice Int64, PP UInt32, DirectPlaceID UInt32, DirectOrderID UInt32, DirectBannerID UInt32, GoodID String, GoodName String, GoodQuantity Int32, GoodPrice Int64), IslandID FixedString(16)) ENGINE = CollapsingMergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192, Sign)" +# import data +cat visits_v1.tsv | clickhouse-client --query "INSERT INTO datasets.visits_v1 FORMAT TSV" --max_insert_block_size=100000 +# optionally you can optimize table +clickhouse-client --query "OPTIMIZE TABLE datasets.visits_v1 FINAL" +clickhouse-client --query "SELECT COUNT(*) FROM datasets.visits_v1" +``` + +## به عنوان مثال نمایش داده شد + +[اموزش کلیک](../../getting_started/tutorial.md) است در یاندکس بر اساس.مجموعه داده های متریکا و راه توصیه شده برای شروع این مجموعه داده ها فقط از طریق تدریس خصوصی است. + +نمونه های اضافی از نمایش داده شد به این جداول را می توان در میان یافت [تست های نفرت انگیز](https://github.com/ClickHouse/ClickHouse/tree/master/dbms/tests/queries/1_stateful) از کلیک هاوس (به نام `test.hists` و `test.visits` وجود دارد). diff --git a/docs/fa/getting_started/tutorial.md b/docs/fa/getting_started/tutorial.md deleted file mode 120000 index 8bc40816ab2..00000000000 --- a/docs/fa/getting_started/tutorial.md +++ /dev/null @@ -1 +0,0 @@ -../../en/getting_started/tutorial.md \ No newline at end of file diff --git a/docs/fa/getting_started/tutorial.md b/docs/fa/getting_started/tutorial.md new file mode 100644 index 00000000000..a5590799133 --- /dev/null +++ b/docs/fa/getting_started/tutorial.md @@ -0,0 +1,661 @@ +# اموزش کلیک + +## چه انتظار از این مقاله? + +با رفتن را از طریق این مقاله شما یاد بگیرند که چگونه به راه اندازی پایه خوشه خانه رعیتی, این کوچک, اما مقاوم در برابر خطا و مقیاس پذیر. ما از یکی از مجموعه داده های نمونه برای پر کردن داده ها و اجرای برخی از نمایش های نسخه ی نمایشی استفاده خواهیم کرد. + +## راه اندازی تک گره + +برای به تعویق انداختن پیچیدگی از محیط توزیع, ما با استقرار کلیک بر روی یک سرور و یا ماشین مجازی شروع. خانه کلیک است که معمولا از نصب [دب](index.md#install-from-deb-packages) یا [دور در دقیقه](index.md#from-rpm-packages) بسته, اما وجود دارد [جایگزین ها](index.md#from-docker-image) برای سیستم عامل هایی که هیچ پشتیبانی نمی کنند. + +مثلا, شما را انتخاب کرده اند `deb` بسته ها و اعدام: + +``` bash +sudo apt-get install dirmngr +sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4 + +echo "deb http://repo.clickhouse.tech/deb/stable/ main/" | sudo tee /etc/apt/sources.list.d/clickhouse.list +sudo apt-get update + +sudo apt-get install -y clickhouse-server clickhouse-client +``` + +در بسته هایی که نصب شده اند چه چیزی داریم: + +- `clickhouse-client` بسته شامل [کلیک مشتری](../interfaces/cli.md) کاربرد, تعاملی مشتری کنسول تاتر. +- `clickhouse-common` بسته شامل یک فایل اجرایی کلیک. +- `clickhouse-server` بسته شامل فایل های پیکربندی برای اجرای تاتر به عنوان یک سرور. + +فایل های پیکربندی سرور در واقع `/etc/clickhouse-server/`. قبل از رفتن بیشتر لطفا توجه کنید `` عنصر در `config.xml`. مسیر تعیین محل ذخیره سازی داده ها, بنابراین باید در حجم با ظرفیت دیسک بزرگ واقع, مقدار پیش فرض است `/var/lib/clickhouse/`. اگر شما می خواهید برای تنظیم پیکربندی دستی به طور مستقیم ویرایش نیست `config.xml` فایل, با توجه به اینکه ممکن است در به روز رسانی بسته های بعدی بازنویسی. راه توصیه می شود به نادیده گرفتن عناصر پیکربندی است که برای ایجاد [فایل ها در پیکربندی.فهرست راهنما](../operations/configuration_files.md) که به عنوان خدمت می کنند “patches” برای پیکربندی.. + +همانطور که شما ممکن است متوجه, `clickhouse-server` به طور خودکار پس از نصب بسته راه اندازی نشده است. این به طور خودکار پس از به روز رسانی هم دوباره راه اندازی نخواهد شد. راه شما شروع به سرور بستگی به سیستم اینیت خود را, معمولا, این: + +``` bash +sudo service clickhouse-server start +``` + +یا + +``` bash +sudo /etc/init.d/clickhouse-server start +``` + +محل پیش فرض برای سیاهههای مربوط به سرور است `/var/log/clickhouse-server/`. سرور خواهد بود برای رسیدگی به اتصالات مشتری یک بار `Ready for connections` پیام وارد شد. + +هنگامی که `clickhouse-server` است و در حال اجرا, ما می توانیم با استفاده از `clickhouse-client` برای اتصال به سرور و اجرای برخی از نمایش داده شد تست مانند `SELECT "Hello, world!";`. + +
    + +راهنمایی سریع برای کلیک-مشتری حالت تعاملی: + +``` bash +clickhouse-client +clickhouse-client --host=... --port=... --user=... --password=... +``` + +فعالسازی پرسشهای چند خطی: + +``` bash +clickhouse-client -m +clickhouse-client --multiline +``` + +نمایش داده شد اجرا در دسته حالت: + +``` bash +clickhouse-client --query='SELECT 1' +echo 'SELECT 1' | clickhouse-client +clickhouse-client <<< 'SELECT 1' +``` + +درج داده از یک پرونده در قالب مشخص شده: + +``` bash +clickhouse-client --query='INSERT INTO table VALUES' < data.txt +clickhouse-client --query='INSERT INTO table FORMAT TabSeparated' < data.tsv +``` + +
    + +## واردات مجموعه داده نمونه + +در حال حاضر زمان برای پر کردن سرور کلیک ما با برخی از داده های نمونه است. در این مقاله ما داده های ناشناس یاندکس را استفاده خواهیم کرد.متریکا, اولین سرویس اجرا می شود که کلیک در راه تولید قبل از منبع باز شد (بیشتر در که در [بخش تاریخچه](../introduction/history.md)). وجود دارد [راه های متعدد برای وارد کردن یاندکس.مجموعه داده های متریکا](example_datasets/metrica.md) و به خاطر اموزش, ما با یکی از واقع بینانه ترین رفتن. + +### دانلود و استخراج داده های جدول + +``` bash +curl https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_v1.tsv.xz | unxz --threads=`nproc` > hits_v1.tsv +curl https://clickhouse-datasets.s3.yandex.net/visits/tsv/visits_v1.tsv.xz | unxz --threads=`nproc` > visits_v1.tsv +``` + +فایل های استخراج شده حدود 10 گیگابایت است. + +### ایجاد جداول + +جداول منطقی به گروه بندی می شوند “databases”. یک `default` پایگاه داده, اما ما یکی از جدید به نام ایجاد `tutorial`: + +``` bash +clickhouse-client --query "CREATE DATABASE IF NOT EXISTS tutorial" +``` + +نحو برای ایجاد جداول راه پیچیده تر در مقایسه با پایگاه داده است (نگاه کنید به [مرجع](../query_language/create.md). به طور کلی `CREATE TABLE` بیانیه باید سه چیز کلیدی را مشخص کند: + +1. نام جدول برای ایجاد. +2. طرحواره جدول, به عنوان مثال لیستی از ستون ها و خود [انواع داده ها](../data_types/index.md). +3. [موتور جدول](../operations/table_engines/index.md) و این تنظیمات است, که تعیین تمام اطلاعات در مورد نحوه نمایش داده شد به این جدول خواهد شد از لحاظ جسمی اجرا. + +یاندکسمتریکا یک سرویس تجزیه و تحلیل وب است و مجموعه داده نمونه قابلیت های کامل خود را پوشش نمی دهد بنابراین تنها دو جدول برای ایجاد وجود دارد: + +- `hits` یک جدول با هر عمل انجام شده توسط همه کاربران در تمام وب سایت های تحت پوشش این سرویس است. +- `visits` یک جدول است که شامل جلسات از پیش ساخته شده به جای اقدامات فردی است. + +بیایید ببینید و اجرای واقعی ایجاد نمایش داده شد جدول برای این جداول: + +``` sql +CREATE TABLE tutorial.hits_v1 +( + `WatchID` UInt64, + `JavaEnable` UInt8, + `Title` String, + `GoodEvent` Int16, + `EventTime` DateTime, + `EventDate` Date, + `CounterID` UInt32, + `ClientIP` UInt32, + `ClientIP6` FixedString(16), + `RegionID` UInt32, + `UserID` UInt64, + `CounterClass` Int8, + `OS` UInt8, + `UserAgent` UInt8, + `URL` String, + `Referer` String, + `URLDomain` String, + `RefererDomain` String, + `Refresh` UInt8, + `IsRobot` UInt8, + `RefererCategories` Array(UInt16), + `URLCategories` Array(UInt16), + `URLRegions` Array(UInt32), + `RefererRegions` Array(UInt32), + `ResolutionWidth` UInt16, + `ResolutionHeight` UInt16, + `ResolutionDepth` UInt8, + `FlashMajor` UInt8, + `FlashMinor` UInt8, + `FlashMinor2` String, + `NetMajor` UInt8, + `NetMinor` UInt8, + `UserAgentMajor` UInt16, + `UserAgentMinor` FixedString(2), + `CookieEnable` UInt8, + `JavascriptEnable` UInt8, + `IsMobile` UInt8, + `MobilePhone` UInt8, + `MobilePhoneModel` String, + `Params` String, + `IPNetworkID` UInt32, + `TraficSourceID` Int8, + `SearchEngineID` UInt16, + `SearchPhrase` String, + `AdvEngineID` UInt8, + `IsArtifical` UInt8, + `WindowClientWidth` UInt16, + `WindowClientHeight` UInt16, + `ClientTimeZone` Int16, + `ClientEventTime` DateTime, + `SilverlightVersion1` UInt8, + `SilverlightVersion2` UInt8, + `SilverlightVersion3` UInt32, + `SilverlightVersion4` UInt16, + `PageCharset` String, + `CodeVersion` UInt32, + `IsLink` UInt8, + `IsDownload` UInt8, + `IsNotBounce` UInt8, + `FUniqID` UInt64, + `HID` UInt32, + `IsOldCounter` UInt8, + `IsEvent` UInt8, + `IsParameter` UInt8, + `DontCountHits` UInt8, + `WithHash` UInt8, + `HitColor` FixedString(1), + `UTCEventTime` DateTime, + `Age` UInt8, + `Sex` UInt8, + `Income` UInt8, + `Interests` UInt16, + `Robotness` UInt8, + `GeneralInterests` Array(UInt16), + `RemoteIP` UInt32, + `RemoteIP6` FixedString(16), + `WindowName` Int32, + `OpenerName` Int32, + `HistoryLength` Int16, + `BrowserLanguage` FixedString(2), + `BrowserCountry` FixedString(2), + `SocialNetwork` String, + `SocialAction` String, + `HTTPError` UInt16, + `SendTiming` Int32, + `DNSTiming` Int32, + `ConnectTiming` Int32, + `ResponseStartTiming` Int32, + `ResponseEndTiming` Int32, + `FetchTiming` Int32, + `RedirectTiming` Int32, + `DOMInteractiveTiming` Int32, + `DOMContentLoadedTiming` Int32, + `DOMCompleteTiming` Int32, + `LoadEventStartTiming` Int32, + `LoadEventEndTiming` Int32, + `NSToDOMContentLoadedTiming` Int32, + `FirstPaintTiming` Int32, + `RedirectCount` Int8, + `SocialSourceNetworkID` UInt8, + `SocialSourcePage` String, + `ParamPrice` Int64, + `ParamOrderID` String, + `ParamCurrency` FixedString(3), + `ParamCurrencyID` UInt16, + `GoalsReached` Array(UInt32), + `OpenstatServiceName` String, + `OpenstatCampaignID` String, + `OpenstatAdID` String, + `OpenstatSourceID` String, + `UTMSource` String, + `UTMMedium` String, + `UTMCampaign` String, + `UTMContent` String, + `UTMTerm` String, + `FromTag` String, + `HasGCLID` UInt8, + `RefererHash` UInt64, + `URLHash` UInt64, + `CLID` UInt32, + `YCLID` UInt64, + `ShareService` String, + `ShareURL` String, + `ShareTitle` String, + `ParsedParams` Nested( + Key1 String, + Key2 String, + Key3 String, + Key4 String, + Key5 String, + ValueDouble Float64), + `IslandID` FixedString(16), + `RequestNum` UInt32, + `RequestTry` UInt8 +) +ENGINE = MergeTree() +PARTITION BY toYYYYMM(EventDate) +ORDER BY (CounterID, EventDate, intHash32(UserID)) +SAMPLE BY intHash32(UserID) +SETTINGS index_granularity = 8192 +``` + +``` sql +CREATE TABLE tutorial.visits_v1 +( + `CounterID` UInt32, + `StartDate` Date, + `Sign` Int8, + `IsNew` UInt8, + `VisitID` UInt64, + `UserID` UInt64, + `StartTime` DateTime, + `Duration` UInt32, + `UTCStartTime` DateTime, + `PageViews` Int32, + `Hits` Int32, + `IsBounce` UInt8, + `Referer` String, + `StartURL` String, + `RefererDomain` String, + `StartURLDomain` String, + `EndURL` String, + `LinkURL` String, + `IsDownload` UInt8, + `TraficSourceID` Int8, + `SearchEngineID` UInt16, + `SearchPhrase` String, + `AdvEngineID` UInt8, + `PlaceID` Int32, + `RefererCategories` Array(UInt16), + `URLCategories` Array(UInt16), + `URLRegions` Array(UInt32), + `RefererRegions` Array(UInt32), + `IsYandex` UInt8, + `GoalReachesDepth` Int32, + `GoalReachesURL` Int32, + `GoalReachesAny` Int32, + `SocialSourceNetworkID` UInt8, + `SocialSourcePage` String, + `MobilePhoneModel` String, + `ClientEventTime` DateTime, + `RegionID` UInt32, + `ClientIP` UInt32, + `ClientIP6` FixedString(16), + `RemoteIP` UInt32, + `RemoteIP6` FixedString(16), + `IPNetworkID` UInt32, + `SilverlightVersion3` UInt32, + `CodeVersion` UInt32, + `ResolutionWidth` UInt16, + `ResolutionHeight` UInt16, + `UserAgentMajor` UInt16, + `UserAgentMinor` UInt16, + `WindowClientWidth` UInt16, + `WindowClientHeight` UInt16, + `SilverlightVersion2` UInt8, + `SilverlightVersion4` UInt16, + `FlashVersion3` UInt16, + `FlashVersion4` UInt16, + `ClientTimeZone` Int16, + `OS` UInt8, + `UserAgent` UInt8, + `ResolutionDepth` UInt8, + `FlashMajor` UInt8, + `FlashMinor` UInt8, + `NetMajor` UInt8, + `NetMinor` UInt8, + `MobilePhone` UInt8, + `SilverlightVersion1` UInt8, + `Age` UInt8, + `Sex` UInt8, + `Income` UInt8, + `JavaEnable` UInt8, + `CookieEnable` UInt8, + `JavascriptEnable` UInt8, + `IsMobile` UInt8, + `BrowserLanguage` UInt16, + `BrowserCountry` UInt16, + `Interests` UInt16, + `Robotness` UInt8, + `GeneralInterests` Array(UInt16), + `Params` Array(String), + `Goals` Nested( + ID UInt32, + Serial UInt32, + EventTime DateTime, + Price Int64, + OrderID String, + CurrencyID UInt32), + `WatchIDs` Array(UInt64), + `ParamSumPrice` Int64, + `ParamCurrency` FixedString(3), + `ParamCurrencyID` UInt16, + `ClickLogID` UInt64, + `ClickEventID` Int32, + `ClickGoodEvent` Int32, + `ClickEventTime` DateTime, + `ClickPriorityID` Int32, + `ClickPhraseID` Int32, + `ClickPageID` Int32, + `ClickPlaceID` Int32, + `ClickTypeID` Int32, + `ClickResourceID` Int32, + `ClickCost` UInt32, + `ClickClientIP` UInt32, + `ClickDomainID` UInt32, + `ClickURL` String, + `ClickAttempt` UInt8, + `ClickOrderID` UInt32, + `ClickBannerID` UInt32, + `ClickMarketCategoryID` UInt32, + `ClickMarketPP` UInt32, + `ClickMarketCategoryName` String, + `ClickMarketPPName` String, + `ClickAWAPSCampaignName` String, + `ClickPageName` String, + `ClickTargetType` UInt16, + `ClickTargetPhraseID` UInt64, + `ClickContextType` UInt8, + `ClickSelectType` Int8, + `ClickOptions` String, + `ClickGroupBannerID` Int32, + `OpenstatServiceName` String, + `OpenstatCampaignID` String, + `OpenstatAdID` String, + `OpenstatSourceID` String, + `UTMSource` String, + `UTMMedium` String, + `UTMCampaign` String, + `UTMContent` String, + `UTMTerm` String, + `FromTag` String, + `HasGCLID` UInt8, + `FirstVisit` DateTime, + `PredLastVisit` Date, + `LastVisit` Date, + `TotalVisits` UInt32, + `TraficSource` Nested( + ID Int8, + SearchEngineID UInt16, + AdvEngineID UInt8, + PlaceID UInt16, + SocialSourceNetworkID UInt8, + Domain String, + SearchPhrase String, + SocialSourcePage String), + `Attendance` FixedString(16), + `CLID` UInt32, + `YCLID` UInt64, + `NormalizedRefererHash` UInt64, + `SearchPhraseHash` UInt64, + `RefererDomainHash` UInt64, + `NormalizedStartURLHash` UInt64, + `StartURLDomainHash` UInt64, + `NormalizedEndURLHash` UInt64, + `TopLevelDomain` UInt64, + `URLScheme` UInt64, + `OpenstatServiceNameHash` UInt64, + `OpenstatCampaignIDHash` UInt64, + `OpenstatAdIDHash` UInt64, + `OpenstatSourceIDHash` UInt64, + `UTMSourceHash` UInt64, + `UTMMediumHash` UInt64, + `UTMCampaignHash` UInt64, + `UTMContentHash` UInt64, + `UTMTermHash` UInt64, + `FromHash` UInt64, + `WebVisorEnabled` UInt8, + `WebVisorActivity` UInt32, + `ParsedParams` Nested( + Key1 String, + Key2 String, + Key3 String, + Key4 String, + Key5 String, + ValueDouble Float64), + `Market` Nested( + Type UInt8, + GoalID UInt32, + OrderID String, + OrderPrice Int64, + PP UInt32, + DirectPlaceID UInt32, + DirectOrderID UInt32, + DirectBannerID UInt32, + GoodID String, + GoodName String, + GoodQuantity Int32, + GoodPrice Int64), + `IslandID` FixedString(16) +) +ENGINE = CollapsingMergeTree(Sign) +PARTITION BY toYYYYMM(StartDate) +ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) +SAMPLE BY intHash32(UserID) +SETTINGS index_granularity = 8192 +``` + +شما می توانید این پرسش ها را با استفاده از حالت تعاملی اجرا کنید `clickhouse-client` (فقط در یک ترمینال راه اندازی بدون مشخص کردن یک پرس و جو در پیش) و یا سعی کنید برخی از [رابط جایگزین](../interfaces/index.md) اگر شما می خواهید. + +همانطور که می بینیم, `hits_v1` با استفاده از [موتور ادغام عمومی](../operations/table_engines/mergetree.md) در حالی که `visits_v1` با استفاده از [سقوط](../operations/table_engines/collapsingmergetree.md) گزینه. + +### وارد کردن داده + +وارد کردن داده ها به تاتر از طریق انجام می شود [وارد](../query_language/insert_into.md) پرس و جو مانند در بسیاری از پایگاه داده های دیگر گذاشتن. با این حال داده ها معمولا در یکی از [فرمت های پشتیبانی شده](../interfaces/formats.md) به جای `VALUES` بند (که همچنین پشتیبانی). + +فایل هایی که قبلا دانلود کردیم در قالب تب جدا شده اند بنابراین در اینجا نحوه وارد کردن از طریق مشتری کنسول است: + +``` bash +clickhouse-client --query "INSERT INTO tutorial.hits_v1 FORMAT TSV" --max_insert_block_size=100000 < hits_v1.tsv +clickhouse-client --query "INSERT INTO tutorial.visits_v1 FORMAT TSV" --max_insert_block_size=100000 < visits_v1.tsv +``` + +تاتر است که بسیاری از [تنظیمات برای تنظیم](../operations/settings/index.md) و یک راه برای مشخص کردن انها در کنسول مشتری از طریق استدلال است همانطور که ما می توانید ببینید با `--max_insert_block_size`. ساده ترین راه برای کشف کردن چه تنظیمات در دسترس هستند, چه معنی می دهند و چه پیش فرض است به پرس و جو `system.settings` جدول: + +``` sql +SELECT name, value, changed, description +FROM system.settings +WHERE name LIKE '%max_insert_b%' +FORMAT TSV + +max_insert_block_size 1048576 0 "The maximum block size for insertion, if we control the creation of blocks for insertion." +``` + +در صورت تمایل شما می توانید [بهینه سازی](../query_language/misc/#misc_operations-optimize) جداول پس از واردات. جداول که با ادغام پیکربندی-موتور خانواده همیشه ادغام قطعات داده ها در پس زمینه برای بهینه سازی ذخیره سازی داده ها (یا حداقل چک کنید اگر حس می کند). این نمایش داده شد فقط موتور جدول را مجبور به انجام بهینه سازی ذخیره سازی در حال حاضر به جای برخی از زمان بعد: + +``` bash +clickhouse-client --query "OPTIMIZE TABLE tutorial.hits_v1 FINAL" +clickhouse-client --query "OPTIMIZE TABLE tutorial.visits_v1 FINAL" +``` + +این عملیات فشرده من / و پردازنده است بنابراین اگر جدول به طور مداوم داده های جدید را دریافت کند بهتر است تنها بماند و اجازه دهید ادغام در پس زمینه اجرا شود. + +در حال حاضر ما می توانید بررسی کنید که جداول با موفقیت وارد شده است: + +``` bash +clickhouse-client --query "SELECT COUNT(*) FROM tutorial.hits_v1" +clickhouse-client --query "SELECT COUNT(*) FROM tutorial.visits_v1" +``` + +## به عنوان مثال نمایش داده شد + +``` sql +SELECT + StartURL AS URL, + AVG(Duration) AS AvgDuration +FROM tutorial.visits_v1 +WHERE StartDate BETWEEN '2014-03-23' AND '2014-03-30' +GROUP BY URL +ORDER BY AvgDuration DESC +LIMIT 10 +``` + +``` sql +SELECT + sum(Sign) AS visits, + sumIf(Sign, has(Goals.ID, 1105530)) AS goal_visits, + (100. * goal_visits) / visits AS goal_percent +FROM tutorial.visits_v1 +WHERE (CounterID = 912887) AND (toYYYYMM(StartDate) = 201403) AND (domain(StartURL) = 'yandex.ru') +``` + +## استقرار خوشه + +خوشه کلیک یک خوشه همگن است. مراحل برای راه اندازی: + +1. نصب سرور کلیک بر روی تمام ماشین های خوشه +2. تنظیم پیکربندی خوشه در فایل های پیکربندی +3. ایجاد جداول محلی در هر نمونه +4. ایجاد یک [جدول توزیع شده](../operations/table_engines/distributed.md) + +[جدول توزیع شده](../operations/table_engines/distributed.md) در واقع یک نوع از “view” به جداول محلی خوشه فاحشه خانه. پرس و جو را انتخاب کنید از یک جدول توزیع خواهد شد با استفاده از منابع خرده ریز تمام خوشه اجرا. شما ممکن است تنظیمات برای خوشه های متعدد مشخص و ایجاد جداول توزیع های متعدد فراهم کردن دیدگاه ها به خوشه های مختلف. + +به عنوان مثال پیکربندی برای یک خوشه با سه خرده ریز, یک ماکت هر: + +``` xml + + + + + example-perftest01j.yandex.ru + 9000 + + + + + example-perftest02j.yandex.ru + 9000 + + + + + example-perftest03j.yandex.ru + 9000 + + + + +``` + +برای تظاهرات بیشتر بیایید ایجاد یک جدول محلی جدید با همان `CREATE TABLE` پرس و جو که ما برای استفاده `hits_v1`, اما نام جدول های مختلف: + +``` sql +CREATE TABLE tutorial.hits_local (...) ENGINE = MergeTree() ... +``` + +ایجاد یک جدول توزیع شده برای نمایش در جداول محلی خوشه: + +``` sql +CREATE TABLE tutorial.hits_all AS tutorial.hits_local +ENGINE = Distributed(perftest_3shards_1replicas, tutorial, hits_local, rand()); +``` + +یک روش معمول این است که جداول توزیع شده مشابه را در تمام ماشین های خوشه ایجاد کنید. این اجازه می دهد در حال اجرا نمایش داده شد توزیع در هر دستگاه از خوشه. همچنین یک گزینه جایگزین برای ایجاد جدول توزیع موقت برای پرس و جو انتخاب داده شده با استفاده از وجود دارد [دور](../query_language/table_functions/remote.md) تابع جدول. + +بیا فرار کنیم [درج را انتخاب کنید](../query_language/insert_into.md) به جدول توزیع شده برای گسترش جدول به چندین سرور. + +``` sql +INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1; +``` + +!!! اخطار “Notice” این روش مناسب برای جلوگیری از جداول بزرگ نیست. یک ابزار جداگانه وجود دارد [تاتر-کپی](../operations/utils/clickhouse-copier.md) که می تواند جداول دلخواه بزرگ دوباره سفال. + +همانطور که شما می توانید انتظار نمایش داده شد محاسباتی سنگین نفر بار سریع تر اجرا در 3 سرور به جای یک راه اندازی. + +در این مورد, ما یک خوشه با استفاده کرده اند 3 خرده ریز هر شامل یک ماکت تک. + +برای انعطاف پذیری در یک محیط تولید توصیه می کنیم که هر سفال باید حاوی 2-3 کپی بین مراکز داده های متعدد توزیع شده است. توجه داشته باشید که کلیک خانه پشتیبانی از تعداد نامحدودی از کپی. + +به عنوان مثال پیکربندی برای یک خوشه از یک سفال حاوی سه کپی: + +``` xml + + ... + + + + example-perftest01j.yandex.ru + 9000 + + + example-perftest02j.yandex.ru + 9000 + + + example-perftest03j.yandex.ru + 9000 + + + + +``` + +برای فعال کردن تکثیر بومی باغ وحش الزامی است. تاتر مراقبت از سازگاری داده ها در تمام کپی را اجرا و بازگرداندن روش پس از شکست بطور خودکار توصیه می شود برای استقرار خوشه باغ وحش به سرور جداگانه. + +باغ وحش یک نیاز سخت نیست: در برخی موارد ساده می توانید داده ها را با نوشتن به تمام کپی ها از کد درخواست خود کپی کنید. این رویکرد است **نه** توصیه می شود, در این مورد, تاتر قادر نخواهد بود به تضمین سازگاری داده ها در تمام کپی. این وظیفه درخواست شما باقی می ماند. + +مکان های باغ وحش باید در فایل پیکربندی مشخص شود: + +``` xml + + + zoo01.yandex.ru + 2181 + + + zoo02.yandex.ru + 2181 + + + zoo03.yandex.ru + 2181 + + +``` + +همچنین, ما نیاز به تنظیم ماکروها برای شناسایی هر سفال و ماکت, خواهد شد در ایجاد جدول استفاده می شود: + +``` xml + + 01 + 01 + +``` + +اگر هیچ کپی در حال حاضر در ایجاد جدول تکرار وجود دارد, اولین ماکت جدید نمونه خواهد شد. اگر در حال حاضر زندگی می کنند کپی جدید کلون کردن داده ها از موجود. شما ابتدا یک گزینه برای ایجاد تمام جداول تکرار شده دارید و داده ها را وارد می کنید. یکی دیگر از گزینه این است که برای ایجاد برخی از کپی و اضافه کردن دیگران بعد یا در هنگام درج داده ها. + +``` sql +CREATE TABLE tutorial.hits_replica (...) +ENGINE = ReplcatedMergeTree( + '/clickhouse_perftest/tables/{shard}/hits', + '{replica}' +) +... +``` + +در اینجا ما با استفاده از [تکرار غذای اصلی](../operations/table_engines/replication.md) موتور جدول. در پارامترهای مشخص می کنیم مسیر باغ وحش حاوی سفال و کپی شناسه. + +``` sql +INSERT INTO tutorial.hits_replica SELECT * FROM tutorial.hits_local; +``` + +تکرار عمل در حالت چند استاد. داده ها را می توان به هر ماکت بارگذاری کرد و به طور خودکار با سایر موارد همگام سازی می شود. تکرار ناهمزمان است بنابراین در یک لحظه معین, همه کپی ممکن است حاوی داده به تازگی قرار داده شده. اجازه می دهد تا درج داده ها حداقل یک ماکت باید باشد. دیگران همگام سازی داده ها و قوام تعمیر هنگامی که دوباره فعال تبدیل خواهد شد. لطفا توجه داشته باشید که چنین رویکردی اجازه می دهد تا برای امکان کم از دست دادن داده ها فقط اضافه. + +[مقاله اصلی](https://clickhouse.tech/docs/en/getting_started/tutorial/) diff --git a/docs/tools/translate.py b/docs/tools/translate.py deleted file mode 100755 index 621fc37af19..00000000000 --- a/docs/tools/translate.py +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env python - -from __future__ import print_function -import sys -import pprint - -import googletrans -import pandocfilters - -translator = googletrans.Translator() - -def translate(key, value, format, _): - if key == 'Str': - print(value.encode('utf8'), file=sys.stderr) - return - [meta, contents] = value - cls = getattr(pandocfilters, key) - return cls(meta, translator.translate(contents, dest='es')) - -if __name__ == "__main__": - pandocfilters.toJSONFilter(translate) diff --git a/docs/tools/translate/filter.py b/docs/tools/translate/filter.py new file mode 100755 index 00000000000..f8c048f0871 --- /dev/null +++ b/docs/tools/translate/filter.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python3 + +import os +import random +import sys +import time +import json.decoder +import urllib.parse + +import googletrans +import pandocfilters +import requests + +translator = googletrans.Translator() +target_language = os.environ.get('TARGET_LANGUAGE', 'ru') +is_debug = os.environ.get('DEBUG') is not None +is_yandex = os.environ.get('YANDEX') is not None + + +def debug(*args): + if is_debug: + print(*args, file=sys.stderr) + + +def translate(text): + if target_language == 'en': + return text + else: + if is_yandex: + text = urllib.parse.quote(text) + url = f'http://translate.yandex.net/api/v1/tr.json/translate?srv=docs&lang=en-{target_language}&text={text}' + result = requests.get(url).json() + debug(result) + if result.get('code') == 200: + return result['text'][0] + else: + print('Failed to translate', str(result), file=sys.stderr) + sys.exit(1) + else: + time.sleep(random.random()) + return translator.translate(text, target_language).text + + +def process_buffer(buffer, new_value, item=None): + if buffer: + text = ''.join(buffer) + + try: + translated_text = translate(text) + except TypeError: + translated_text = text + except json.decoder.JSONDecodeError as e: + print('Failed to translate', str(e), file=sys.stderr) + sys.exit(1) + + debug('Translate', text, ' -> ', translated_text) + + if text and text[0].isupper() and not translated_text[0].isupper(): + translated_text = translated_text[0].upper() + translated_text[1:] + + if text.startswith(' ') and not translated_text.startswith(' '): + translated_text = ' ' + translated_text + + if text.endswith(' ') and not translated_text.endswith(' '): + translated_text = translated_text + ' ' + + for token in translated_text.split(' '): + new_value.append(pandocfilters.Str(token)) + new_value.append(pandocfilters.Space()) + + if item is None and len(new_value): + new_value.pop(len(new_value) - 1) + else: + new_value[-1] = item + elif item: + new_value.append(item) + + +def process_sentence(value): + new_value = [] + buffer = [] + for item in value: + t = item.get('t') + c = item.get('c') + if t == 'Str': + buffer.append(c) + elif t == 'Space': + buffer.append(' ') + elif t == 'DoubleQuote': + buffer.append('"') + else: + process_buffer(buffer, new_value, item) + buffer = [] + process_buffer(buffer, new_value) + return new_value + + +def translate_filter(key, value, _format, _): + debug(key, value) + try: + cls = getattr(pandocfilters, key) + except AttributeError: + return + + if key == 'Para' or key == 'Plain' or key == 'Strong' or key == 'Emph': + return cls(process_sentence(value)) + elif key == 'Link': + value[1] = process_sentence(value[1]) + return cls(*value) + elif key == 'Header': + value[2] = process_sentence(value[2]) + return cls(*value) + + return + + +if __name__ == "__main__": + pandocfilters.toJSONFilter(translate_filter) diff --git a/docs/tools/translate/replace-with-translation.sh b/docs/tools/translate/replace-with-translation.sh new file mode 100755 index 00000000000..79c99bf4723 --- /dev/null +++ b/docs/tools/translate/replace-with-translation.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +# Usage: replace-with-translation.sh +set -e +BASE_DIR=$(dirname $(readlink -f $0)) +TEMP_FILE=$(mktemp) +trap 'rm -f -- "${TEMP_FILE}"' INT TERM HUP EXIT +TARGET_LANGUAGE="$1" +INPUT="$2" +cat "${INPUT}" > "${TEMP_FILE}" +git rm "${INPUT}" +YANDEX=1 "${BASE_DIR}/translate.sh" "${TARGET_LANGUAGE}" "${TEMP_FILE}" "${INPUT}" +git add "${INPUT}" diff --git a/docs/tools/translate/translate.sh b/docs/tools/translate/translate.sh new file mode 100755 index 00000000000..b33a206cae4 --- /dev/null +++ b/docs/tools/translate/translate.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +# Usage: translate.sh +set -e +BASE_DIR=$(dirname $(readlink -f $0)) +OUTPUT=${3:-/dev/stdout} +export TARGET_LANGUAGE="$1" +export DEBUG +TEMP_FILE=$(mktemp) +trap 'rm -f -- "${TEMP_FILE}"' INT TERM HUP EXIT +source "${BASE_DIR}/venv/bin/activate" +pandoc "$2" --filter "${BASE_DIR}/filter.py" -o "${TEMP_FILE}" \ + -f markdown -t "markdown_strict+pipe_tables+markdown_attribute+all_symbols_escapable+backtick_code_blocks" \ + --atx-headers --wrap=none +perl -pi -e 's/{\\#\\#/{##/g' "${TEMP_FILE}" +perl -pi -e 's/\\#\\#}/##}/g' "${TEMP_FILE}" +cat "${TEMP_FILE}" > "${OUTPUT}"