From a54773f94739fa48934f446c8dd34f2832819ffd Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Mon, 18 May 2020 22:33:12 +0300
Subject: [PATCH 1/3] Add more logs to debug trace_log overflow

---
 docker/test/performance-comparison/compare.sh |  2 +-
 src/Interpreters/SystemLog.h                  | 65 ++++++++++++++-----
 2 files changed, 51 insertions(+), 16 deletions(-)
diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh
index 05c055107ac..023c931de1c 100755
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@@ -542,7 +542,7 @@ case "$stage" in
     # to collect the logs. Prefer not to restart, because addresses might change
     # and we won't be able to process trace_log data. Start in a subshell, so that
     # it doesn't interfere with the watchdog through `wait`.
-    ( time get_profiles || restart || get_profiles ||: )
+    ( time get_profiles || restart || get_profiles ||: ) 2>> profile-errors.log
 
     # Kill the whole process group, because somehow when the subshell is killed,
     # the sleep inside remains alive and orphaned.
diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h
index f5a024c7768..10dcb84c1ce 100644
--- a/src/Interpreters/SystemLog.h
+++ b/src/Interpreters/SystemLog.h
@@ -165,9 +165,9 @@ private:
     // Queue is bounded. But its size is quite large to not block in all normal cases.
     std::vector<LogElement> queue;
     // An always-incrementing index of the first message currently in the queue.
-    // We use it to give a global sequential index to every message, so that we can wait
-    // until a particular message is flushed. This is used to implement synchronous log
-    // flushing for SYSTEM FLUSH LOGS.
+    // We use it to give a global sequential index to every message, so that we
+    // can wait until a particular message is flushed. This is used to implement
+    // synchronous log flushing for SYSTEM FLUSH LOGS.
     uint64_t queue_front_index = 0;
     bool is_shutdown = false;
     std::condition_variable flush_event;
@@ -175,6 +175,8 @@ private:
     uint64_t requested_flush_before = 0;
     // Flushed log up to this index, exclusive
     uint64_t flushed_before = 0;
+    // Logged overflow message at this queue front index
+    uint64_t logged_queue_full_at_index = -1;
 
     void savingThreadFunction();
 
@@ -244,9 +246,22 @@ void SystemLog<LogElement>::add(const LogElement & element)
 
     if (queue.size() >= DBMS_SYSTEM_LOG_QUEUE_SIZE)
     {
-        // TextLog sets its logger level to 0, so this log is a noop and there
-        // is no recursive logging.
-        LOG_ERROR(log, "Queue is full for system log '" + demangle(typeid(*this).name()) + "'.");
+        // Ignore all further entries until the queue is flushed.
+        // Log a message about that. Don't spam it -- this might be especially
+        // problematic in case of trace log. Remember what the front index of the
+        // queue was when we last logged the message. If it changed, it means the
+        // queue was flushed, and we can log again.
+        if (queue_front_index != logged_queue_full_at_index)
+        {
+            logged_queue_full_at_index = queue_front_index;
+
+            // TextLog sets its logger level to 0, so this log is a noop and
+            // there is no recursive logging.
+            LOG_ERROR(log, "Queue is full for system log '"
+                << demangle(typeid(*this).name()) << "'"
+                << " at " << queue_front_index);
+        }
+
         return;
     }
 
@@ -325,9 +340,15 @@ void SystemLog<LogElement>::savingThreadFunction()
             uint64_t to_flush_end = 0;
 
             {
+                LOG_TRACE(log, "Sleeping");
                 std::unique_lock lock(mutex);
-                flush_event.wait_for(lock, std::chrono::milliseconds(flush_interval_milliseconds),
-                    [&] () { return requested_flush_before > flushed_before || is_shutdown; });
+                const bool predicate = flush_event.wait_for(lock,
+                    std::chrono::milliseconds(flush_interval_milliseconds),
+                    [&] () {
+                        return requested_flush_before > flushed_before
+                            || is_shutdown;
+                    }
+                );
 
                 queue_front_index += queue.size();
                 to_flush_end = queue_front_index;
@@ -337,6 +358,13 @@ void SystemLog<LogElement>::savingThreadFunction()
                 queue.swap(to_flush);
 
                 exit_this_thread = is_shutdown;
+
+                LOG_TRACE(log, "Woke up"
+                    << (predicate ? " by condition" : " by timeout ("
+                            + toString(flush_interval_milliseconds) + " ms)")
+                    << ", " << to_flush.size() << " elements to flush"
+                    << " up to " << to_flush_end
+                    << (is_shutdown ? ", shutdown requested" : ""));
             }
 
             if (to_flush.empty())
@@ -351,6 +379,7 @@ void SystemLog<LogElement>::savingThreadFunction()
             tryLogCurrentException(__PRETTY_FUNCTION__);
         }
     }
+    LOG_TRACE(log, "Terminating");
 }
 
 
@@ -359,11 +388,13 @@ void SystemLog<LogElement>::flushImpl(const std::vector<LogElement> & to_flush,
 {
     try
     {
-        LOG_TRACE(log, "Flushing system log");
+        LOG_TRACE(log, "Flushing system log, "
+            << to_flush.size() << " entries to flush");
 
-        /// We check for existence of the table and create it as needed at every flush.
-        /// This is done to allow user to drop the table at any moment (new empty table will be created automatically).
-        /// BTW, flush method is called from single thread.
+        /// We check for existence of the table and create it as needed at every
+        /// flush. This is done to allow user to drop the table at any moment
+        /// (new empty table will be created automatically). BTW, flush method
+        /// is called from single thread.
         prepareTable();
 
         Block block = LogElement::createBlock();
@@ -389,9 +420,13 @@ void SystemLog<LogElement>::flushImpl(const std::vector<LogElement> & to_flush,
         tryLogCurrentException(__PRETTY_FUNCTION__);
     }
 
-    std::unique_lock lock(mutex);
-    flushed_before = to_flush_end;
-    flush_event.notify_all();
+    {
+        std::unique_lock lock(mutex);
+        flushed_before = to_flush_end;
+        flush_event.notify_all();
+    }
+
+    LOG_TRACE(log, "Flushed system log");
 }
 
 

From a700b527ce43ff7b772935b9e6781e084a566ad2 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Tue, 19 May 2020 17:51:51 +0300
Subject: [PATCH 2/3] fixup

---
 docker/test/performance-comparison/compare.sh | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh
index 023c931de1c..1833c9a7a9e 100755
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@@ -27,7 +27,7 @@ function configure
     kill -0 $left_pid
     disown $left_pid
     set +m
-    while ! clickhouse-client --port 9001 --query "select 1" ; do kill -0 $left_pid ; echo . ; sleep 1 ; done
+    while ! clickhouse-client --port 9001 --query "select 1" && kill -0 $left_pid ; do echo . ; sleep 1 ; done
     echo server for setup started
 
     clickhouse-client --port 9001 --query "create database test" ||:
@@ -71,9 +71,9 @@ function restart
 
     set +m
 
-    while ! clickhouse-client --port 9001 --query "select 1" ; do kill -0 $left_pid ; echo . ; sleep 1 ; done
+    while ! clickhouse-client --port 9001 --query "select 1" && kill -0 $left_pid ; do echo . ; sleep 1 ; done
     echo left ok
-    while ! clickhouse-client --port 9002 --query "select 1" ; do kill -0 $right_pid ; echo . ; sleep 1 ; done
+    while ! clickhouse-client --port 9002 --query "select 1" && kill -0 $right_pid ; do echo . ; sleep 1 ; done
     echo right ok
 
     clickhouse-client --port 9001 --query "select * from system.tables where database != 'system'"
@@ -263,7 +263,7 @@ done
 wait
 unset IFS
 
-parallel --verbose --null < analyze-commands.txt
+parallel --null < analyze-commands.txt
 }
 
 # Analyze results
@@ -542,7 +542,7 @@ case "$stage" in
     # to collect the logs. Prefer not to restart, because addresses might change
     # and we won't be able to process trace_log data. Start in a subshell, so that
     # it doesn't interfere with the watchdog through `wait`.
-    ( time get_profiles || restart || get_profiles ||: ) 2>> profile-errors.log
+    ( get_profiles || restart || get_profiles ||: )
 
     # Kill the whole process group, because somehow when the subshell is killed,
     # the sleep inside remains alive and orphaned.

From 838b3e95332b83ab7cd28962e974543c708608ff Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Tue, 19 May 2020 17:55:27 +0300
Subject: [PATCH 3/3] fixup

---
 src/Interpreters/SystemLog.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h
index 10dcb84c1ce..b2a4eec7883 100644
--- a/src/Interpreters/SystemLog.h
+++ b/src/Interpreters/SystemLog.h
@@ -344,7 +344,8 @@ void SystemLog<LogElement>::savingThreadFunction()
                 std::unique_lock lock(mutex);
                 const bool predicate = flush_event.wait_for(lock,
                     std::chrono::milliseconds(flush_interval_milliseconds),
-                    [&] () {
+                    [&] ()
+                    {
                         return requested_flush_before > flushed_before
                             || is_shutdown;
                     }