Removed non-breaking spaces: find dbms -name '*.h' -or -name '*.cpp' | xargs grep -l $'\xc2\xa0' | xargs sed -i -r -e 's/\xc2\xa0/ /g'

2024-11-24 16:42:05 +00:00 · 2019-01-25 02:01:56 +03:00 · 2019-01-25 02:01:56 +03:00 · ef50601b5c
commit ef50601b5c
parent 5ccc42fd96
4 changed files with 53 additions and 53 deletions
--- a/dbms/src/AggregateFunctions/UniqCombinedBiasData.h
+++ b/dbms/src/AggregateFunctions/UniqCombinedBiasData.h
@ -6,25 +6,25 @@ namespace DB
 {

 /** Data for HyperLogLogBiasEstimator in the uniqCombined function.
-  * The development plan is as follows:
-  * 1. Assemble ClickHouse.
-  * 2. Run the script src/dbms/scripts/gen-bias-data.py, which returns one array for getRawEstimates()
-  *     and another array for getBiases().
-  * 3. Update `raw_estimates` and `biases` arrays. Also update the size of arrays in InterpolatedData.
-  * 4. Assemble ClickHouse.
-  * 5. Run the script src/dbms/scripts/linear-counting-threshold.py, which creates 3 files:
-  * - raw_graph.txt (1st column: the present number of unique values;
-  *    2nd column: relative error in the case of HyperLogLog without applying any corrections)
-  * - linear_counting_graph.txt (1st column: the present number of unique values;
-  *    2nd column: relative error in the case of HyperLogLog using LinearCounting)
-  * - bias_corrected_graph.txt (1st column: the present number of unique values;
-  *    2nd column: relative error in the case of HyperLogLog with the use of corrections from the algorithm HyperLogLog++)
-  * 6. Generate a graph with gnuplot based on this data.
-  * 7. Determine the minimum number of unique values at which it is better to correct the error
-  *     using its evaluation (ie, using the HyperLogLog++ algorithm) than applying the LinearCounting algorithm.
-  * 7. Accordingly, update the constant in the function getThreshold()
-  * 8. Assemble ClickHouse.
-  */
+  * The development plan is as follows:
+  * 1. Assemble ClickHouse.
+  * 2. Run the script src/dbms/scripts/gen-bias-data.py, which returns one array for getRawEstimates()
+  *     and another array for getBiases().
+  * 3. Update `raw_estimates` and `biases` arrays. Also update the size of arrays in InterpolatedData.
+  * 4. Assemble ClickHouse.
+  * 5. Run the script src/dbms/scripts/linear-counting-threshold.py, which creates 3 files:
+  * - raw_graph.txt (1st column: the present number of unique values;
+  *    2nd column: relative error in the case of HyperLogLog without applying any corrections)
+  * - linear_counting_graph.txt (1st column: the present number of unique values;
+  *    2nd column: relative error in the case of HyperLogLog using LinearCounting)
+  * - bias_corrected_graph.txt (1st column: the present number of unique values;
+  *    2nd column: relative error in the case of HyperLogLog with the use of corrections from the algorithm HyperLogLog++)
+  * 6. Generate a graph with gnuplot based on this data.
+  * 7. Determine the minimum number of unique values at which it is better to correct the error
+  *     using its evaluation (ie, using the HyperLogLog++ algorithm) than applying the LinearCounting algorithm.
+  * 7. Accordingly, update the constant in the function getThreshold()
+  * 8. Assemble ClickHouse.
+  */
 struct UniqCombinedBiasData
 {
    using InterpolatedData = std::array<double, 200>;
--- a/dbms/src/AggregateFunctions/UniquesHashSet.h
+++ b/dbms/src/AggregateFunctions/UniquesHashSet.h
@ -15,33 +15,33 @@


 /** Approximate calculation of anything, as usual, is constructed according to the following scheme:
-  * - some data structure is used to calculate the value of X;
-  * - Not all values are added to the data structure, but only selected ones (according to some selectivity criteria);
-  * - after processing all elements, the data structure is in some state S;
-  * - as an approximate value of X, the value calculated according to the maximum likelihood principle is returned:
-  *   at what real value X, the probability of finding the data structure in the obtained state S is maximal.
-  */
+  * - some data structure is used to calculate the value of X;
+  * - Not all values are added to the data structure, but only selected ones (according to some selectivity criteria);
+  * - after processing all elements, the data structure is in some state S;
+  * - as an approximate value of X, the value calculated according to the maximum likelihood principle is returned:
+  *   at what real value X, the probability of finding the data structure in the obtained state S is maximal.
+  */

 /** In particular, what is described below can be found by the name of the BJKST algorithm.
-  */
+  */

 /** Very simple hash-set for approximate number of unique values.
-  * Works like this:
-  * - you can insert UInt64;
-  * - before insertion, first the hash function UInt64 -> UInt32 is calculated;
-  * - the original value is not saved (lost);
-  * - further all operations are made with these hashes;
-  * - hash table is constructed according to the scheme:
-  * -  open addressing (one buffer, position in buffer is calculated by taking remainder of division by its size);
-  * -  linear probing (if the cell already has a value, then the cell following it is taken, etc.);
-  * -  the missing value is zero-encoded; to remember presence of zero in set, separate variable of type bool is used;
-  * -  buffer growth by 2 times when filling more than 50%;
-  * - if the set has more UNIQUES_HASH_MAX_SIZE elements, then all the elements are removed from the set,
-  *   not divisible by 2, and then all elements that do not divide by 2 are not inserted into the set;
-  * - if the situation repeats, then only elements dividing by 4, etc., are taken.
-  * - the size() method returns an approximate number of elements that have been inserted into the set;
-  * - there are methods for quick reading and writing in binary and text form.
-  */
+  * Works like this:
+  * - you can insert UInt64;
+  * - before insertion, first the hash function UInt64 -> UInt32 is calculated;
+  * - the original value is not saved (lost);
+  * - further all operations are made with these hashes;
+  * - hash table is constructed according to the scheme:
+  * -  open addressing (one buffer, position in buffer is calculated by taking remainder of division by its size);
+  * -  linear probing (if the cell already has a value, then the cell following it is taken, etc.);
+  * -  the missing value is zero-encoded; to remember presence of zero in set, separate variable of type bool is used;
+  * -  buffer growth by 2 times when filling more than 50%;
+  * - if the set has more UNIQUES_HASH_MAX_SIZE elements, then all the elements are removed from the set,
+  *   not divisible by 2, and then all elements that do not divide by 2 are not inserted into the set;
+  * - if the situation repeats, then only elements dividing by 4, etc., are taken.
+  * - the size() method returns an approximate number of elements that have been inserted into the set;
+  * - there are methods for quick reading and writing in binary and text form.
+  */

 /// The maximum degree of buffer size before the values are discarded
 #define UNIQUES_HASH_MAX_SIZE_DEGREE 17
@ -50,8 +50,8 @@
 #define UNIQUES_HASH_MAX_SIZE (1ULL << (UNIQUES_HASH_MAX_SIZE_DEGREE - 1))

 /** The number of least significant bits used for thinning. The remaining high-order bits are used to determine the position in the hash table.
-  * (high-order bits are taken because the younger bits will be constant after dropping some of the values)
-  */
+  * (high-order bits are taken because the younger bits will be constant after dropping some of the values)
+  */
 #define UNIQUES_HASH_BITS_FOR_SKIP (32 - UNIQUES_HASH_MAX_SIZE_DEGREE)

 /// Initial buffer size degree
@ -59,8 +59,8 @@


 /** This hash function is not the most optimal, but UniquesHashSet states counted with it,
-  * stored in many places on disks (in the Yandex.Metrika), so it continues to be used.
-  */
+  * stored in many places on disks (in the Yandex.Metrika), so it continues to be used.
+  */
 struct UniquesHashSetDefaultHash
 {
    size_t operator() (UInt64 x) const
--- a/dbms/src/Client/tests/test_connect.cpp
+++ b/dbms/src/Client/tests/test_connect.cpp
@ -9,9 +9,9 @@


 /** In a loop it connects to the server and immediately breaks the connection.
-  * Using the SO_LINGER option, we ensure that the connection is terminated by sending a RST packet (not FIN).
-  * This behavior causes a bug in the TCPServer implementation in the Poco library.
-  */
+  * Using the SO_LINGER option, we ensure that the connection is terminated by sending a RST packet (not FIN).
+  * This behavior causes a bug in the TCPServer implementation in the Poco library.
+  */
 int main(int argc, char ** argv)
 try
 {
--- a/dbms/src/Common/InterruptListener.h
+++ b/dbms/src/Common/InterruptListener.h
@ -45,10 +45,10 @@ static int sigtimedwait(const sigset_t *set, siginfo_t *info, const struct times


 /** As long as there exists an object of this class - it blocks the INT signal, at the same time it lets you know if it came.
-  * This is necessary so that you can interrupt the execution of the request with Ctrl+C.
-  * Use only one instance of this class at a time.
-  * If `check` method returns true (the signal has arrived), the next call will wait for the next signal.
-  */
+  * This is necessary so that you can interrupt the execution of the request with Ctrl+C.
+  * Use only one instance of this class at a time.
+  * If `check` method returns true (the signal has arrived), the next call will wait for the next signal.
+  */
 class InterruptListener
 {
 private: