From 846de8579916f54bc433518ca42d48c14971d98b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 16 Jan 2018 05:04:02 +0300
Subject: [PATCH] Continued experiments [#CLICKHOUSE-2]

---
 dbms/src/IO/LZ4_decompress_faster.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/dbms/src/IO/LZ4_decompress_faster.h b/dbms/src/IO/LZ4_decompress_faster.h
index a15f2662aa3..ecb7003c493 100644
--- a/dbms/src/IO/LZ4_decompress_faster.h
+++ b/dbms/src/IO/LZ4_decompress_faster.h
@@ -9,15 +9,15 @@ namespace LZ4
 {
 
 /** There are many implementation details of LZ4 decompression loop, that affect performance.
-  * For example: copy by 8 or by 16 bytes at once; use shuffle instruction to replicate match or not.
+  * For example: copy by 8 or by 16 (SSE2) bytes at once; use shuffle (SSSE3) instruction to replicate match or not.
   *
-  * The optimal algorithm is dependent:
+  * The optimal algorithm is dependent on:
   *
-  * - on CPU architecture
+  * 1. CPU architecture.
   * (example: on Skylake it's almost always better to copy by 16 bytes and use shuffle,
   *  but on Westmere using shuffle is worse and copy by 16 bytes is better only for high compression ratios)
   *
-  * - on data distribution
+  * 2. Data distribution.
   * (example: when compression ratio is higher than 10.20,
   *  it's usually better to copy by 16 bytes rather than 8).
   *
@@ -49,22 +49,22 @@ struct PerformanceStatistics
         double count = 0;
         double sum = 0;
 
-        double adjusted_count() const
+        double adjustedCount() const
         {
             return count - NUM_INVOCATIONS_TO_THROW_OFF;
         }
 
         double mean() const
         {
-            return sum / adjusted_count();
+            return sum / adjustedCount();
         }
 
         /// For better convergence, we don't use proper estimate of stddev.
-        /// We want to eventually choose between two algorithms even in case
+        /// We want to eventually separate between two algorithms even in case
         ///  when there is no statistical significant difference between them.
         double sigma() const
         {
-            return mean() / sqrt(adjusted_count());
+            return mean() / sqrt(adjustedCount());
         }
 
         void update(double seconds, double bytes)
@@ -80,8 +80,8 @@ struct PerformanceStatistics
             /// If there is a variant with not enough statistics, always choose it.
             /// And in that case prefer variant with less number of invocations.
 
-            if (adjusted_count() < 2)
-                return adjusted_count() - 1;
+            if (adjustedCount() < 2)
+                return adjustedCount() - 1;
             else
                 return std::normal_distribution<>(mean(), sigma())(rng);
         }