Merge branch 'master' into keeper-upload-snapshot-to-s3

This commit is contained in:
Antonio Andelic 2022-09-21 11:54:44 +00:00
commit 448f8184f3
21 changed files with 288 additions and 217 deletions

View File

@ -1,8 +1,8 @@
---
slug: /en/operations/opentelemetry
sidebar_position: 62
sidebar_label: OpenTelemetry Support
title: "[experimental] OpenTelemetry Support"
sidebar_label: Tracing ClickHouse with OpenTelemetry
title: "[experimental] Tracing ClickHouse with OpenTelemetry"
---
[OpenTelemetry](https://opentelemetry.io/) is an open standard for collecting traces and metrics from the distributed application. ClickHouse has some support for OpenTelemetry.

View File

@ -12,6 +12,16 @@
#include <Common/OvercommitTracker.h>
#include <Common/logger_useful.h>
#include "config_core.h"
#if USE_JEMALLOC
# include <jemalloc/jemalloc.h>
#define STRINGIFY_HELPER(x) #x
#define STRINGIFY(x) STRINGIFY_HELPER(x)
#endif
#include <atomic>
#include <cmath>
#include <random>
@ -84,6 +94,7 @@ static constexpr size_t log_peak_memory_usage_every = 1ULL << 30;
MemoryTracker total_memory_tracker(nullptr, VariableContext::Global);
std::atomic<Int64> MemoryTracker::free_memory_in_allocator_arenas;
MemoryTracker::MemoryTracker(VariableContext level_) : parent(&total_memory_tracker), level(level_) {}
MemoryTracker::MemoryTracker(MemoryTracker * parent_, VariableContext level_) : parent(parent_), level(level_) {}
@ -128,6 +139,16 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT
if (MemoryTrackerBlockerInThread::isBlocked(level))
{
if (level == VariableContext::Global)
{
/// For global memory tracker always update memory usage.
amount.fetch_add(size, std::memory_order_relaxed);
auto metric_loaded = metric.load(std::memory_order_relaxed);
if (metric_loaded != CurrentMetrics::end())
CurrentMetrics::add(metric_loaded, size);
}
/// Since the MemoryTrackerBlockerInThread should respect the level, we should go to the next parent.
if (auto * loaded_next = parent.load(std::memory_order_relaxed))
loaded_next->allocImpl(size, throw_if_memory_exceeded,
@ -148,24 +169,6 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT
Int64 current_hard_limit = hard_limit.load(std::memory_order_relaxed);
Int64 current_profiler_limit = profiler_limit.load(std::memory_order_relaxed);
/// Cap the limit to the total_memory_tracker, since it may include some drift
/// for user-level memory tracker.
///
/// And since total_memory_tracker is reset to the process resident
/// memory peridically (in AsynchronousMetrics::update()), any limit can be
/// capped to it, to avoid possible drift.
if (unlikely(current_hard_limit
&& will_be > current_hard_limit
&& level == VariableContext::User))
{
Int64 total_amount = total_memory_tracker.get();
if (amount > total_amount)
{
set(total_amount);
will_be = size + total_amount;
}
}
bool memory_limit_exceeded_ignored = false;
bool allocation_traced = false;
@ -211,8 +214,30 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT
memory_limit_exceeded_ignored = true;
}
Int64 limit_to_check = current_hard_limit;
if (unlikely(current_hard_limit && will_be > current_hard_limit))
#if USE_JEMALLOC
if (level == VariableContext::Global)
{
/// Jemalloc arenas may keep some extra memory.
/// This memory was substucted from RSS to decrease memory drift.
/// In case memory is close to limit, try to pugre the arenas.
/// This is needed to avoid OOM, because some allocations are directly done with mmap.
Int64 current_free_memory_in_allocator_arenas = free_memory_in_allocator_arenas.load(std::memory_order_relaxed);
if (current_free_memory_in_allocator_arenas > 0 && current_hard_limit && current_free_memory_in_allocator_arenas + will_be > current_hard_limit)
{
if (free_memory_in_allocator_arenas.exchange(-current_free_memory_in_allocator_arenas) > 0)
{
mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", nullptr, nullptr, nullptr, 0);
}
}
limit_to_check += abs(current_free_memory_in_allocator_arenas);
}
#endif
if (unlikely(current_hard_limit && will_be > limit_to_check))
{
if (memoryTrackerCanThrow(level, false) && throw_if_memory_exceeded)
{
@ -311,6 +336,15 @@ void MemoryTracker::free(Int64 size)
{
if (MemoryTrackerBlockerInThread::isBlocked(level))
{
if (level == VariableContext::Global)
{
/// For global memory tracker always update memory usage.
amount.fetch_sub(size, std::memory_order_relaxed);
auto metric_loaded = metric.load(std::memory_order_relaxed);
if (metric_loaded != CurrentMetrics::end())
CurrentMetrics::sub(metric_loaded, size);
}
/// Since the MemoryTrackerBlockerInThread should respect the level, we should go to the next parent.
if (auto * loaded_next = parent.load(std::memory_order_relaxed))
loaded_next->free(size);
@ -325,7 +359,7 @@ void MemoryTracker::free(Int64 size)
}
Int64 accounted_size = size;
if (level == VariableContext::Thread)
if (level == VariableContext::Thread || level == VariableContext::Global)
{
/// Could become negative if memory allocated in this thread is freed in another one
amount.fetch_sub(accounted_size, std::memory_order_relaxed);
@ -396,12 +430,18 @@ void MemoryTracker::reset()
}
void MemoryTracker::set(Int64 to)
void MemoryTracker::setRSS(Int64 rss_, Int64 free_memory_in_allocator_arenas_)
{
amount.store(to, std::memory_order_relaxed);
Int64 new_amount = rss_; // - free_memory_in_allocator_arenas_;
total_memory_tracker.amount.store(new_amount, std::memory_order_relaxed);
free_memory_in_allocator_arenas.store(free_memory_in_allocator_arenas_, std::memory_order_relaxed);
auto metric_loaded = total_memory_tracker.metric.load(std::memory_order_relaxed);
if (metric_loaded != CurrentMetrics::end())
CurrentMetrics::set(metric_loaded, new_amount);
bool log_memory_usage = true;
updatePeak(to, log_memory_usage);
total_memory_tracker.updatePeak(rss_, log_memory_usage);
}

View File

@ -56,6 +56,8 @@ private:
std::atomic<Int64> hard_limit {0};
std::atomic<Int64> profiler_limit {0};
static std::atomic<Int64> free_memory_in_allocator_arenas;
Int64 profiler_step = 0;
/// To test exception safety of calling code, memory tracker throws an exception on each memory allocation with specified probability.
@ -199,8 +201,10 @@ public:
/// Reset the accumulated data.
void reset();
/// Reset current counter to a new value.
void set(Int64 to);
/// Reset current counter to an RSS value.
/// Jemalloc may have pre-allocated arenas, they are accounted in RSS.
/// We can free this arenas in case of exception to avoid OOM.
static void setRSS(Int64 rss_, Int64 free_memory_in_allocator_arenas_);
/// Prints info about peak memory consumption into log.
void logPeakMemoryUsage();

View File

@ -33,13 +33,6 @@
# include <jemalloc/jemalloc.h>
#endif
namespace CurrentMetrics
{
extern const Metric MemoryTracking;
}
namespace DB
{
@ -393,7 +386,7 @@ uint64_t updateJemallocEpoch()
}
template <typename Value>
static void saveJemallocMetricImpl(AsynchronousMetricValues & values,
static Value saveJemallocMetricImpl(AsynchronousMetricValues & values,
const std::string & jemalloc_full_name,
const std::string & clickhouse_full_name)
{
@ -401,22 +394,23 @@ static void saveJemallocMetricImpl(AsynchronousMetricValues & values,
size_t size = sizeof(value);
mallctl(jemalloc_full_name.c_str(), &value, &size, nullptr, 0);
values[clickhouse_full_name] = value;
return value;
}
template<typename Value>
static void saveJemallocMetric(AsynchronousMetricValues & values,
static Value saveJemallocMetric(AsynchronousMetricValues & values,
const std::string & metric_name)
{
saveJemallocMetricImpl<Value>(values,
return saveJemallocMetricImpl<Value>(values,
fmt::format("stats.{}", metric_name),
fmt::format("jemalloc.{}", metric_name));
}
template<typename Value>
static void saveAllArenasMetric(AsynchronousMetricValues & values,
static Value saveAllArenasMetric(AsynchronousMetricValues & values,
const std::string & metric_name)
{
saveJemallocMetricImpl<Value>(values,
return saveJemallocMetricImpl<Value>(values,
fmt::format("stats.arenas.{}.{}", MALLCTL_ARENAS_ALL, metric_name),
fmt::format("jemalloc.arenas.all.{}", metric_name));
}
@ -657,10 +651,39 @@ void AsynchronousMetrics::update(TimePoint update_time)
}
}
#if defined(OS_LINUX) || defined(OS_FREEBSD)
MemoryStatisticsOS::Data memory_statistics_data = memory_stat.get();
#endif
#if USE_JEMALLOC
// 'epoch' is a special mallctl -- it updates the statistics. Without it, all
// the following calls will return stale values. It increments and returns
// the current epoch number, which might be useful to log as a sanity check.
auto epoch = updateJemallocEpoch();
new_values["jemalloc.epoch"] = epoch;
// Collect the statistics themselves.
saveJemallocMetric<size_t>(new_values, "allocated");
saveJemallocMetric<size_t>(new_values, "active");
saveJemallocMetric<size_t>(new_values, "metadata");
saveJemallocMetric<size_t>(new_values, "metadata_thp");
saveJemallocMetric<size_t>(new_values, "resident");
saveJemallocMetric<size_t>(new_values, "mapped");
saveJemallocMetric<size_t>(new_values, "retained");
saveJemallocMetric<size_t>(new_values, "background_thread.num_threads");
saveJemallocMetric<uint64_t>(new_values, "background_thread.num_runs");
saveJemallocMetric<uint64_t>(new_values, "background_thread.run_intervals");
saveAllArenasMetric<size_t>(new_values, "pactive");
[[maybe_unused]] size_t je_malloc_pdirty = saveAllArenasMetric<size_t>(new_values, "pdirty");
[[maybe_unused]] size_t je_malloc_pmuzzy = saveAllArenasMetric<size_t>(new_values, "pmuzzy");
saveAllArenasMetric<size_t>(new_values, "dirty_purged");
saveAllArenasMetric<size_t>(new_values, "muzzy_purged");
#endif
/// Process process memory usage according to OS
#if defined(OS_LINUX) || defined(OS_FREEBSD)
{
MemoryStatisticsOS::Data data = memory_stat.get();
MemoryStatisticsOS::Data & data = memory_statistics_data;
new_values["MemoryVirtual"] = data.virt;
new_values["MemoryResident"] = data.resident;
@ -676,9 +699,16 @@ void AsynchronousMetrics::update(TimePoint update_time)
{
Int64 amount = total_memory_tracker.get();
Int64 peak = total_memory_tracker.getPeak();
Int64 new_amount = data.resident;
Int64 rss = data.resident;
Int64 free_memory_in_allocator_arenas = 0;
Int64 difference = new_amount - amount;
#if USE_JEMALLOC
/// This is a memory which is kept by allocator.
/// Will subsract it from RSS to decrease memory drift.
free_memory_in_allocator_arenas = je_malloc_pdirty * getPageSize();
#endif
Int64 difference = rss - free_memory_in_allocator_arenas - amount;
/// Log only if difference is high. This is for convenience. The threshold is arbitrary.
if (difference >= 1048576 || difference <= -1048576)
@ -686,11 +716,10 @@ void AsynchronousMetrics::update(TimePoint update_time)
"MemoryTracking: was {}, peak {}, will set to {} (RSS), difference: {}",
ReadableSize(amount),
ReadableSize(peak),
ReadableSize(new_amount),
ReadableSize(rss),
ReadableSize(difference));
total_memory_tracker.set(new_amount);
CurrentMetrics::set(CurrentMetrics::MemoryTracking, new_amount);
total_memory_tracker.setRSS(rss, free_memory_in_allocator_arenas);
}
}
#endif
@ -1561,31 +1590,6 @@ void AsynchronousMetrics::update(TimePoint update_time)
}
#endif
#if USE_JEMALLOC
// 'epoch' is a special mallctl -- it updates the statistics. Without it, all
// the following calls will return stale values. It increments and returns
// the current epoch number, which might be useful to log as a sanity check.
auto epoch = updateJemallocEpoch();
new_values["jemalloc.epoch"] = epoch;
// Collect the statistics themselves.
saveJemallocMetric<size_t>(new_values, "allocated");
saveJemallocMetric<size_t>(new_values, "active");
saveJemallocMetric<size_t>(new_values, "metadata");
saveJemallocMetric<size_t>(new_values, "metadata_thp");
saveJemallocMetric<size_t>(new_values, "resident");
saveJemallocMetric<size_t>(new_values, "mapped");
saveJemallocMetric<size_t>(new_values, "retained");
saveJemallocMetric<size_t>(new_values, "background_thread.num_threads");
saveJemallocMetric<uint64_t>(new_values, "background_thread.num_runs");
saveJemallocMetric<uint64_t>(new_values, "background_thread.run_intervals");
saveAllArenasMetric<size_t>(new_values, "pactive");
saveAllArenasMetric<size_t>(new_values, "pdirty");
saveAllArenasMetric<size_t>(new_values, "pmuzzy");
saveAllArenasMetric<size_t>(new_values, "dirty_purged");
saveAllArenasMetric<size_t>(new_values, "muzzy_purged");
#endif
updateHeavyMetricsIfNeeded(current_time, update_time, new_values);
/// Add more metrics as you wish.

View File

@ -372,6 +372,12 @@ CancellationCode QueryStatus::cancelQuery(bool)
void QueryStatus::addPipelineExecutor(PipelineExecutor * e)
{
/// In case of asynchronous distributed queries it is possible to call
/// addPipelineExecutor() from the cancelQuery() context, and this will
/// lead to deadlock.
if (is_killed.load())
throw Exception("Query was cancelled", ErrorCodes::QUERY_WAS_CANCELLED);
std::lock_guard lock(executors_mutex);
assert(std::find(executors.begin(), executors.end(), e) == executors.end());
executors.push_back(e);

View File

@ -164,7 +164,7 @@ def gen_versions(
# The order is important, PR number is used as cache during the build
versions = [str(pr_info.number), pr_commit_version]
result_version = pr_commit_version
if pr_info.number == 0:
if pr_info.number == 0 and pr_info.base_name == "master":
# First get the latest for cache
versions.insert(0, "latest")

View File

@ -99,6 +99,11 @@ class TestDockerImageCheck(unittest.TestCase):
def test_gen_version(self):
pr_info = PRInfo(PRInfo.default_event.copy())
pr_info.base_name = "anything-else"
versions, result_version = di.gen_versions(pr_info, None)
self.assertEqual(versions, ["0", "0-HEAD"])
self.assertEqual(result_version, "0-HEAD")
pr_info.base_name = "master"
versions, result_version = di.gen_versions(pr_info, None)
self.assertEqual(versions, ["latest", "0", "0-HEAD"])
self.assertEqual(result_version, "0-HEAD")

View File

@ -1808,6 +1808,9 @@ def main(args):
args, "system", "processes", "is_all_data_sent"
)
if args.s3_storage and (BuildFlags.THREAD in args.build_flags or BuildFlags.DEBUG in args.build_flags):
args.no_random_settings = True
if args.skip:
args.skip = set(args.skip)

View File

@ -39,9 +39,9 @@
8 0 8
9 0 9
0 3
3 9
2 21 def
1 12 abc
2 21 def
3 9
0 45
0 0

View File

@ -3,19 +3,19 @@ DROP TABLE IF EXISTS t2;
CREATE TABLE t2 (k UInt64, s String) ENGINE = Join(ANY, LEFT, k);
INSERT INTO t2 VALUES (1, 'abc'), (2, 'def');
SELECT k, s FROM (SELECT number AS k FROM system.numbers LIMIT 10) js1 ANY LEFT JOIN t2 USING k;
SELECT k, s FROM (SELECT number AS k FROM system.numbers LIMIT 10) js1 ANY LEFT JOIN t2 USING k ORDER BY k;
INSERT INTO t2 VALUES (6, 'ghi');
SELECT k, s FROM (SELECT number AS k FROM system.numbers LIMIT 10) js1 ANY LEFT JOIN t2 USING k;
SELECT k, s FROM (SELECT number AS k FROM system.numbers LIMIT 10) js1 ANY LEFT JOIN t2 USING k ORDER BY k;
SELECT k, js1.s, t2.s FROM (SELECT number AS k, number as s FROM system.numbers LIMIT 10) js1 ANY LEFT JOIN t2 USING k;
SELECT k, t2.k, js1.s, t2.s FROM (SELECT number AS k, number as s FROM system.numbers LIMIT 10) js1 ANY LEFT JOIN t2 USING k;
SELECT k, js1.s, t2.s FROM (SELECT number AS k, number as s FROM system.numbers LIMIT 10) js1 ANY LEFT JOIN t2 USING k ORDER BY k;
SELECT k, t2.k, js1.s, t2.s FROM (SELECT number AS k, number as s FROM system.numbers LIMIT 10) js1 ANY LEFT JOIN t2 USING k ORDER BY k;
SELECT k, js1.s, t2.s FROM (SELECT toUInt64(number / 3) AS k, sum(number) as s FROM numbers(10) GROUP BY toUInt64(number / 3) WITH TOTALS) js1 ANY LEFT JOIN t2 USING k;
SELECT k, js1.s, t2.s FROM (SELECT toUInt64(number / 3) AS k, sum(number) as s FROM numbers(10) GROUP BY toUInt64(number / 3) WITH TOTALS) js1 ANY LEFT JOIN t2 USING k ORDER BY k;
SELECT k, js1.s, t2.s FROM (SELECT number AS k, number AS s FROM system.numbers LIMIT 10) js1 ANY LEFT JOIN t2 ON js1.k == t2.k;
SELECT k, t2.k, js1.s, t2.s FROM (SELECT number AS k, number AS s FROM system.numbers LIMIT 10) js1 ANY LEFT JOIN t2 ON js1.k == t2.k;
SELECT k, js1.s, t2.s FROM (SELECT number AS k, number AS s FROM system.numbers LIMIT 10) js1 ANY LEFT JOIN t2 ON js1.k == t2.k ORDER BY k;
SELECT k, t2.k, js1.s, t2.s FROM (SELECT number AS k, number AS s FROM system.numbers LIMIT 10) js1 ANY LEFT JOIN t2 ON js1.k == t2.k ORDER BY k;
SELECT k, js1.s, t2.s FROM (SELECT number AS k, number AS s FROM system.numbers LIMIT 10) js1 ANY LEFT JOIN t2 ON js1.k == t2.k OR js1.s == t2.k; -- { serverError 48 }
SELECT k, js1.s, t2.s FROM (SELECT number AS k, number AS s FROM system.numbers LIMIT 10) js1 ANY LEFT JOIN t2 ON js1.k == t2.k OR js1.s == t2.k ORDER BY k; -- { serverError 48 }
DROP TABLE t2;

View File

@ -194,7 +194,7 @@ def main():
select_requests = {
"select distinct numuint from {tbl} order by numuint": '\n'.join([str(i) for i in range(11)]),
"select count(*) from {tbl}": '12',
'select double, count(*) from {tbl} group by double': "7.7\t2\n9.9\t10"
'select double, count(*) from {tbl} group by double order by double': "7.7\t2\n9.9\t10"
}
t, httpd = start_server()

View File

@ -4,9 +4,9 @@
1 1
2 2
3 3
3 3
2 2
1 1
2 2
3 3
-
1 52.5 ONE
-

View File

@ -8,9 +8,9 @@ CREATE TABLE testJoinTable (number UInt64, data String) ENGINE = Join(ANY, INNER
INSERT INTO testJoinTable VALUES (1, '1'), (2, '2'), (3, '3');
SELECT * FROM (SELECT * FROM numbers(10)) js1 INNER JOIN testJoinTable USING number; -- { serverError 264 }
SELECT * FROM (SELECT * FROM numbers(10)) js1 INNER JOIN (SELECT * FROM testJoinTable) js2 USING number;
SELECT * FROM (SELECT * FROM numbers(10)) js1 ANY INNER JOIN testJoinTable USING number;
SELECT * FROM testJoinTable;
SELECT * FROM (SELECT * FROM numbers(10)) js1 INNER JOIN (SELECT * FROM testJoinTable) js2 USING number ORDER BY number;
SELECT * FROM (SELECT * FROM numbers(10)) js1 ANY INNER JOIN testJoinTable USING number ORDER BY number;
SELECT * FROM testJoinTable ORDER BY number;
DROP TABLE testJoinTable;
@ -25,7 +25,7 @@ CREATE TABLE master (id Int32, name String) ENGINE = Join (ANY, LEFT, id) SETTIN
INSERT INTO master VALUES (1, 'ONE');
INSERT INTO transaction VALUES (1, 52.5, 1);
SELECT tx.id, tx.value, m.name FROM transaction tx ANY LEFT JOIN master m ON m.id = tx.master_id;
SELECT tx.id, tx.value, m.name FROM transaction tx ANY LEFT JOIN master m ON m.id = tx.master_id ORDER BY tx.id;
DROP TABLE master;
DROP TABLE transaction;

View File

@ -52,110 +52,110 @@ uniqTheta
35 52331
36 53766
uniqTheta round(float)
0.125 1
0.5 1
0.05 1
0.143 1
0.056 1
0.048 2
0.083 1
0.25 1
0.1 1
0.028 1
0.027 1
0.028 1
0.031 1
0.067 1
0.037 1
0.045 162
0.125 163
0.5 162
0.05 162
0.143 162
0.091 81
0.056 162
0.048 162
0.083 163
0.25 162
1 162
0.1 163
0.028 162
0.048 2
0.05 1
0.056 1
0.067 1
0.083 1
0.1 1
0.125 1
0.143 1
0.25 1
0.5 1
0.027 162
0.028 162
0.031 162
0.067 162
0.043 162
0.037 162
0.043 162
0.045 162
0.048 162
0.05 162
0.056 162
0.067 162
0.071 162
0.083 163
0.091 81
0.1 163
0.125 163
0.143 162
0.25 162
0.5 162
1 162
0.027 53766
0.028 52331
0.031 54139
0.037 53716
0.043 54690
0.045 53054
0.125 53839
0.5 54020
0.05 53680
0.143 53947
0.091 26876
0.056 53331
0.048 54211
0.05 53680
0.056 53331
0.067 53516
0.071 53479
0.083 54985
0.091 26876
0.1 54408
0.125 53839
0.143 53947
0.25 53774
0.5 54020
1 55018
0.1 54408
0.028 52331
0.027 53766
0.031 54139
0.067 53516
0.043 54690
0.037 53716
0.071 53479
uniqTheta round(toFloat32())
0.5 1
0.05 1
0.25 1
0.048 2
0.083 1
0.125 1
0.031 1
0.143 1
0.028 1
0.067 1
0.027 1
0.056 1
0.028 1
0.031 1
0.037 1
0.048 2
0.05 1
0.056 1
0.067 1
0.083 1
0.1 1
0.5 162
0.05 162
0.25 162
0.048 162
0.091 81
0.125 1
0.143 1
0.25 1
0.5 1
0.027 162
0.028 162
0.031 162
0.037 162
0.043 162
0.045 162
0.048 162
0.05 162
0.056 162
0.067 162
0.071 162
0.083 163
0.125 163
0.031 162
0.143 162
0.028 162
0.067 162
0.045 162
0.027 162
0.056 162
0.037 162
0.091 81
0.1 163
0.125 163
0.143 162
0.25 162
0.5 162
1 162
0.5 54020
0.05 53680
0.25 53774
0.048 54211
0.091 26876
0.027 53766
0.028 52331
0.031 54139
0.037 53716
0.043 54690
0.045 53054
0.048 54211
0.05 53680
0.056 53331
0.067 53516
0.071 53479
0.083 54985
0.125 53839
0.031 54139
0.143 53947
0.028 52331
0.067 53516
0.045 53054
0.027 53766
0.056 53331
0.037 53716
0.091 26876
0.1 54408
0.125 53839
0.143 53947
0.25 53774
0.5 54020
1 55018
uniqTheta IPv4NumToString
1 1

View File

@ -2,27 +2,27 @@
SELECT 'uniqTheta';
SELECT Y, uniqTheta(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y;
SELECT Y, uniqTheta(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y;
SELECT Y, uniqTheta(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y;
SELECT Y, uniqTheta(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y ORDER BY Y;
SELECT Y, uniqTheta(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y ORDER BY Y;
SELECT Y, uniqTheta(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y ORDER BY Y;
SELECT 'uniqTheta round(float)';
SELECT Y, uniqTheta(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y;
SELECT Y, uniqTheta(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y;
SELECT Y, uniqTheta(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y;
SELECT Y, uniqTheta(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y ORDER BY Y;
SELECT Y, uniqTheta(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y ORDER BY Y;
SELECT Y, uniqTheta(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y ORDER BY Y;
SELECT 'uniqTheta round(toFloat32())';
SELECT Y, uniqTheta(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y;
SELECT Y, uniqTheta(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y;
SELECT Y, uniqTheta(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y;
SELECT Y, uniqTheta(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y ORDER BY Y;
SELECT Y, uniqTheta(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y ORDER BY Y;
SELECT Y, uniqTheta(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y ORDER BY Y;
SELECT 'uniqTheta IPv4NumToString';
SELECT Y, uniqTheta(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y;
SELECT Y, uniqTheta(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y;
SELECT Y, uniqTheta(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y;
SELECT Y, uniqTheta(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y ORDER BY Y;
SELECT Y, uniqTheta(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y ORDER BY Y;
SELECT Y, uniqTheta(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y ORDER BY Y;
SELECT 'uniqTheta remote()';

View File

@ -18,7 +18,8 @@ SELECT subcolumns.names, subcolumns.serializations, count() FROM system.parts_co
ARRAY JOIN subcolumns
WHERE database = currentDatabase()
AND table = 't_json_sparse' AND column = 'data' AND active
GROUP BY subcolumns.names, subcolumns.serializations;
GROUP BY subcolumns.names, subcolumns.serializations
ORDER BY subcolumns.names;
SELECT '=============';
@ -29,7 +30,8 @@ SELECT subcolumns.names, subcolumns.serializations, count() FROM system.parts_co
ARRAY JOIN subcolumns
WHERE database = currentDatabase()
AND table = 't_json_sparse' AND column = 'data' AND active
GROUP BY subcolumns.names, subcolumns.serializations;
GROUP BY subcolumns.names, subcolumns.serializations
ORDER BY subcolumns.names;
SELECT '=============';
@ -40,7 +42,8 @@ SELECT subcolumns.names, subcolumns.serializations, count() FROM system.parts_co
ARRAY JOIN subcolumns
WHERE database = currentDatabase()
AND table = 't_json_sparse' AND column = 'data' AND active
GROUP BY subcolumns.names, subcolumns.serializations;
GROUP BY subcolumns.names, subcolumns.serializations
ORDER BY subcolumns.names;
INSERT INTO t_json_sparse SELECT '{"k1": 2}' FROM numbers(200000);
@ -52,8 +55,9 @@ SELECT subcolumns.names, subcolumns.serializations, count() FROM system.parts_co
ARRAY JOIN subcolumns
WHERE database = currentDatabase()
AND table = 't_json_sparse' AND column = 'data' AND active
GROUP BY subcolumns.names, subcolumns.serializations;
GROUP BY subcolumns.names, subcolumns.serializations
ORDER BY subcolumns.names;
SELECT data.k1, count(), sum(data.k2.k3) FROM t_json_sparse GROUP BY data.k1;
SELECT data.k1, count(), sum(data.k2.k3) FROM t_json_sparse GROUP BY data.k1 ORDER BY data.k1;
DROP TABLE t_json_sparse;
-- DROP TABLE t_json_sparse;

View File

@ -16,13 +16,13 @@ select x3, x2, x1 from test order by x3 desc;
10 1 10
1 100 100
insert into test values (1, 10, 100), (10, 1, 10), (100, 100, 1);
select x3, x2 from test group by x3, x2;
10 1
select x3, x2 from test group by x3, x2 order by x3;
1 100
10 1
100 10
select x3, x2 from test group by 1, 2;
10 1
select x3, x2 from test group by 1, 2 order by x3;
1 100
10 1
100 10
select x1, x2, x3 from test order by x3 limit 1 by x3;
100 100 1
@ -102,14 +102,14 @@ select x1, x1 * 2, max(x2), max(x3) from test2 group by 2, 1, x1 order by 1, 2,
1 2 10 100
10 20 1 10
100 200 100 1
select a, b, c, d, e, f from (select 44 a, 88 b, 13 c, 14 d, 15 e, 16 f) t group by 1,2,3,4,5,6;
select a, b, c, d, e, f from (select 44 a, 88 b, 13 c, 14 d, 15 e, 16 f) t group by 1,2,3,4,5,6 order by a;
44 88 13 14 15 16
explain syntax select plus(1, 1) as a group by a;
SELECT 1 + 1 AS a
GROUP BY a
select substr('aaaaaaaaaaaaaa', 8) as a group by a;
select substr('aaaaaaaaaaaaaa', 8) as a group by a order by a;
aaaaaaa
select substr('aaaaaaaaaaaaaa', 8) as a group by substr('aaaaaaaaaaaaaa', 8);
select substr('aaaaaaaaaaaaaa', 8) as a group by substr('aaaaaaaaaaaaaa', 8) order by a;
aaaaaaa
select b from (select 5 as a, 'Hello' as b order by a);
Hello

View File

@ -15,8 +15,8 @@ select x3, x2, x1 from test order by 1 desc;
select x3, x2, x1 from test order by x3 desc;
insert into test values (1, 10, 100), (10, 1, 10), (100, 100, 1);
select x3, x2 from test group by x3, x2;
select x3, x2 from test group by 1, 2;
select x3, x2 from test group by x3, x2 order by x3;
select x3, x2 from test group by 1, 2 order by x3;
select x1, x2, x3 from test order by x3 limit 1 by x3;
select x1, x2, x3 from test order by 3 limit 1 by 3;
@ -39,11 +39,11 @@ create table test2(x1 Int, x2 Int, x3 Int) engine=Memory;
insert into test2 values (1, 10, 100), (10, 1, 10), (100, 100, 1);
select x1, x1 * 2, max(x2), max(x3) from test2 group by 2, 1, x1 order by 1, 2, 4 desc, 3 asc;
select a, b, c, d, e, f from (select 44 a, 88 b, 13 c, 14 d, 15 e, 16 f) t group by 1,2,3,4,5,6;
select a, b, c, d, e, f from (select 44 a, 88 b, 13 c, 14 d, 15 e, 16 f) t group by 1,2,3,4,5,6 order by a;
explain syntax select plus(1, 1) as a group by a;
select substr('aaaaaaaaaaaaaa', 8) as a group by a;
select substr('aaaaaaaaaaaaaa', 8) as a group by substr('aaaaaaaaaaaaaa', 8);
select substr('aaaaaaaaaaaaaa', 8) as a group by a order by a;
select substr('aaaaaaaaaaaaaa', 8) as a group by substr('aaaaaaaaaaaaaa', 8) order by a;
select b from (select 5 as a, 'Hello' as b order by a);
select b from (select 5 as a, 'Hello' as b group by a);

View File

@ -1,14 +1,14 @@
-- { echoOn }
SELECT grp_aggreg FROM data_02295 GROUP BY a, grp_aggreg SETTINGS optimize_aggregation_in_order = 0 FORMAT JSONEachRow;
{"grp_aggreg":"\u0002\u0002\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0003\u0000\u0000\u0000\u0000\u0000\u0000\u0000"}
{"grp_aggreg":"\u0002\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0001\u0000\u0000\u0000\u0000\u0000\u0000\u0000"}
SELECT grp_aggreg FROM data_02295 GROUP BY a, grp_aggreg SETTINGS optimize_aggregation_in_order = 1 FORMAT JSONEachRow;
SELECT grp_aggreg FROM data_02295 GROUP BY a, grp_aggreg ORDER BY a SETTINGS optimize_aggregation_in_order = 0 FORMAT JSONEachRow;
{"grp_aggreg":"\u0002\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0001\u0000\u0000\u0000\u0000\u0000\u0000\u0000"}
{"grp_aggreg":"\u0002\u0002\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0003\u0000\u0000\u0000\u0000\u0000\u0000\u0000"}
SELECT grp_aggreg FROM data_02295 GROUP BY a, grp_aggreg WITH TOTALS SETTINGS optimize_aggregation_in_order = 0 FORMAT JSONEachRow;
{"grp_aggreg":"\u0002\u0002\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0003\u0000\u0000\u0000\u0000\u0000\u0000\u0000"}
SELECT grp_aggreg FROM data_02295 GROUP BY a, grp_aggreg ORDER BY a SETTINGS optimize_aggregation_in_order = 1 FORMAT JSONEachRow;
{"grp_aggreg":"\u0002\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0001\u0000\u0000\u0000\u0000\u0000\u0000\u0000"}
SELECT grp_aggreg FROM data_02295 GROUP BY a, grp_aggreg WITH TOTALS SETTINGS optimize_aggregation_in_order = 1 FORMAT JSONEachRow;
{"grp_aggreg":"\u0002\u0002\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0003\u0000\u0000\u0000\u0000\u0000\u0000\u0000"}
SELECT grp_aggreg FROM data_02295 GROUP BY a, grp_aggreg WITH TOTALS ORDER BY a SETTINGS optimize_aggregation_in_order = 0 FORMAT JSONEachRow;
{"grp_aggreg":"\u0002\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0001\u0000\u0000\u0000\u0000\u0000\u0000\u0000"}
{"grp_aggreg":"\u0002\u0002\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0003\u0000\u0000\u0000\u0000\u0000\u0000\u0000"}
SELECT grp_aggreg FROM data_02295 GROUP BY a, grp_aggreg WITH TOTALS ORDER BY a SETTINGS optimize_aggregation_in_order = 1 FORMAT JSONEachRow;
{"grp_aggreg":"\u0002\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0001\u0000\u0000\u0000\u0000\u0000\u0000\u0000"}
{"grp_aggreg":"\u0002\u0002\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0003\u0000\u0000\u0000\u0000\u0000\u0000\u0000"}
-- regression for incorrect positions passed to finalizeChunk()

View File

@ -10,10 +10,10 @@ create table data_02295 (
insert into data_02295 select 0 b, intDiv(number, 2) a, groupArrayArrayState([toUInt64(number)]) from numbers(4) group by a, b;
-- { echoOn }
SELECT grp_aggreg FROM data_02295 GROUP BY a, grp_aggreg SETTINGS optimize_aggregation_in_order = 0 FORMAT JSONEachRow;
SELECT grp_aggreg FROM data_02295 GROUP BY a, grp_aggreg SETTINGS optimize_aggregation_in_order = 1 FORMAT JSONEachRow;
SELECT grp_aggreg FROM data_02295 GROUP BY a, grp_aggreg WITH TOTALS SETTINGS optimize_aggregation_in_order = 0 FORMAT JSONEachRow;
SELECT grp_aggreg FROM data_02295 GROUP BY a, grp_aggreg WITH TOTALS SETTINGS optimize_aggregation_in_order = 1 FORMAT JSONEachRow;
SELECT grp_aggreg FROM data_02295 GROUP BY a, grp_aggreg ORDER BY a SETTINGS optimize_aggregation_in_order = 0 FORMAT JSONEachRow;
SELECT grp_aggreg FROM data_02295 GROUP BY a, grp_aggreg ORDER BY a SETTINGS optimize_aggregation_in_order = 1 FORMAT JSONEachRow;
SELECT grp_aggreg FROM data_02295 GROUP BY a, grp_aggreg WITH TOTALS ORDER BY a SETTINGS optimize_aggregation_in_order = 0 FORMAT JSONEachRow;
SELECT grp_aggreg FROM data_02295 GROUP BY a, grp_aggreg WITH TOTALS ORDER BY a SETTINGS optimize_aggregation_in_order = 1 FORMAT JSONEachRow;
-- regression for incorrect positions passed to finalizeChunk()
SELECT a, min(b), max(b) FROM data_02295 GROUP BY a ORDER BY a, count() SETTINGS optimize_aggregation_in_order = 1;
SELECT a, min(b), max(b) FROM data_02295 GROUP BY a ORDER BY a, count() SETTINGS optimize_aggregation_in_order = 1, max_threads = 1;

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Tags: no-fasttest
# Tags: no-fasttest, long
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
@ -16,9 +16,10 @@ function check_refcnt_for_table()
query_id="$table-$(random_str 10)"
# Notes:
# - query may sleep 0.1*(200/4)=5 seconds, it is enough to check system.parts
# - query may sleep 1*(200/4)=50 seconds maximum, it is enough to check system.parts
# - "part = 1" condition should prune all parts except first
$CLICKHOUSE_CLIENT --format Null --max_block_size 1 --query_id "$query_id" -q "select sleepEachRow(0.1) from $table where part = 1" &
# - max_block_size=1 with index_granularity=1 will allow to cancel the query earlier
$CLICKHOUSE_CLIENT --format Null --max_threads 1 --max_block_size 1 --query_id "$query_id" -q "select sleepEachRow(1) from $table where part = 1" &
PID=$!
# wait for query to be started
@ -30,26 +31,30 @@ function check_refcnt_for_table()
# however when it starts reading, partition pruning takes place,
# and it should hold only parts that are required for SELECT
#
# So 2 seconds delay to ensure that it goes the reading stage.
sleep 2
# But to reach partition prune the function sleepEachRow() will be executed twice,
# so 2 seconds for sleepEachRow() and 3 seconds just to ensure that it enters the reading stage.
sleep $((2+3))
# NOTE: parts that are used in query will have refcount increased for each range
$CLICKHOUSE_CLIENT -q "select table, name, refcount from system.parts where database = '$CLICKHOUSE_DATABASE' and table = '$table' and refcount > 1"
# Kill the query gracefully.
kill -INT $PID
wait $PID
}
# NOTE: index_granularity=1 to cancel ASAP
$CLICKHOUSE_CLIENT -nmq "
drop table if exists data_02340;
create table data_02340 (key Int, part Int) engine=MergeTree() partition by part order by key;
"
create table data_02340 (key Int, part Int) engine=MergeTree() partition by part order by key settings index_granularity=1;
" || exit 1
check_refcnt_for_table data_02340
$CLICKHOUSE_CLIENT -nmq "
drop table if exists data_02340_rep;
create table data_02340_rep (key Int, part Int) engine=ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX', '1') partition by part order by key;
"
create table data_02340_rep (key Int, part Int) engine=ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX', '1') partition by part order by key settings index_granularity=1;
" || exit 1
check_refcnt_for_table data_02340_rep
exit 0