mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 00:22:29 +00:00
Fixed translation errors, part 1 [#CLICKHOUSE-3].
This commit is contained in:
parent
46db454562
commit
8e5f92f025
@ -22,12 +22,12 @@ namespace ErrorCodes
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/** Many modern allocators (for example, tcmalloc) do not know how to do a mremap for realloc,
|
/** Many modern allocators (for example, tcmalloc) do not do a mremap for realloc,
|
||||||
* even in case of large enough chunks of memory.
|
* even in case of large enough chunks of memory.
|
||||||
* Although this allows you to increase performance and reduce memory consumption during realloc.
|
* Although this allows you to increase performance and reduce memory consumption during realloc.
|
||||||
* To fix this, do the mremap yourself if the chunk of memory is large enough.
|
* To fix this, we do mremap manually if the chunk of memory is large enough.
|
||||||
* The threshold (64 MB) is chosen quite large, since changing the address space is
|
* The threshold (64 MB) is chosen quite large, since changing the address space is
|
||||||
* rather slow, especially in the case of a large number of threads.
|
* very slow, especially in the case of a large number of threads.
|
||||||
* We expect that the set of operations mmap/something to do/mremap can only be performed about 1000 times per second.
|
* We expect that the set of operations mmap/something to do/mremap can only be performed about 1000 times per second.
|
||||||
*
|
*
|
||||||
* PS. This is also required, because tcmalloc can not allocate a chunk of memory greater than 16 GB.
|
* PS. This is also required, because tcmalloc can not allocate a chunk of memory greater than 16 GB.
|
||||||
|
@ -13,7 +13,7 @@ namespace DB
|
|||||||
* For this, the requested size is rounded up to the power of two
|
* For this, the requested size is rounded up to the power of two
|
||||||
* (or up to 8, if less, or using memory allocation outside Arena if the size is greater than 65536).
|
* (or up to 8, if less, or using memory allocation outside Arena if the size is greater than 65536).
|
||||||
* When freeing memory, for each size (14 options in all: 8, 16 ... 65536),
|
* When freeing memory, for each size (14 options in all: 8, 16 ... 65536),
|
||||||
* a one-link list of free blocks is kept track.
|
* a single-linked list of free blocks is kept track.
|
||||||
* When allocating, we take the head of the list of free blocks,
|
* When allocating, we take the head of the list of free blocks,
|
||||||
* or, if the list is empty - allocate a new block using Arena.
|
* or, if the list is empty - allocate a new block using Arena.
|
||||||
*/
|
*/
|
||||||
|
@ -26,7 +26,7 @@ namespace DB
|
|||||||
* Designed for situations in which many arrays of the same small size are created,
|
* Designed for situations in which many arrays of the same small size are created,
|
||||||
* but the size is not known at compile time.
|
* but the size is not known at compile time.
|
||||||
* Also gives a significant advantage in cases where it is important that `sizeof` is minimal.
|
* Also gives a significant advantage in cases where it is important that `sizeof` is minimal.
|
||||||
* For example, if arrays are put in an open-addressing hash table with inplace storage of values (like HashMap)
|
* For example, if arrays are put in an open-addressing hash table with inplace storage of values (like HashMap)
|
||||||
*
|
*
|
||||||
* In this case, compared to std::vector:
|
* In this case, compared to std::vector:
|
||||||
* - for arrays of 1 element size - an advantage of about 2 times;
|
* - for arrays of 1 element size - an advantage of about 2 times;
|
||||||
@ -82,7 +82,7 @@ public:
|
|||||||
init(size_, dont_init_elems);
|
init(size_, dont_init_elems);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Preposition.
|
/** Move operations.
|
||||||
*/
|
*/
|
||||||
AutoArray(AutoArray && src)
|
AutoArray(AutoArray && src)
|
||||||
{
|
{
|
||||||
|
@ -18,7 +18,7 @@ namespace ErrorCodes
|
|||||||
/** Compact array for data storage, size `content_width`, in bits, of which is
|
/** Compact array for data storage, size `content_width`, in bits, of which is
|
||||||
* less than one byte. Instead of storing each value in a separate
|
* less than one byte. Instead of storing each value in a separate
|
||||||
* bytes, which leads to a waste of 37.5% of the space for content_width = 5, CompactArray stores
|
* bytes, which leads to a waste of 37.5% of the space for content_width = 5, CompactArray stores
|
||||||
* adjacent `content_width`-bit values in the byte array, that is actually CompactArray
|
* adjacent `content_width`-bit values in the byte array, that is actually CompactArray
|
||||||
* simulates an array of `content_width`-bit values.
|
* simulates an array of `content_width`-bit values.
|
||||||
*/
|
*/
|
||||||
template <typename BucketIndex, UInt8 content_width, size_t bucket_count>
|
template <typename BucketIndex, UInt8 content_width, size_t bucket_count>
|
||||||
|
@ -74,7 +74,7 @@ void throwFromErrno(const std::string & s, int code = 0, int the_errno = errno);
|
|||||||
|
|
||||||
|
|
||||||
/** Try to write an exception to the log (and forget about it).
|
/** Try to write an exception to the log (and forget about it).
|
||||||
* Can be used in destructors in the catch block (...).
|
* Can be used in destructors in the catch-all block.
|
||||||
*/
|
*/
|
||||||
void tryLogCurrentException(const char * log_name, const std::string & start_of_message = "");
|
void tryLogCurrentException(const char * log_name, const std::string & start_of_message = "");
|
||||||
void tryLogCurrentException(Poco::Logger * logger, const std::string & start_of_message = "");
|
void tryLogCurrentException(Poco::Logger * logger, const std::string & start_of_message = "");
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
/** A hash table that allows you to clear the table in O(1).
|
/** A hash table that allows you to clear the table in O(1).
|
||||||
* Even simpler than HashSet: Key and Mapped must be POD-types.
|
* Even simpler than HashSet: Key and Mapped must be POD-types.
|
||||||
*
|
*
|
||||||
* Instead of this class, you could just use the couple <version, key> in the HashSet as the key
|
* Instead of this class, you could just use the pair (version, key) in the HashSet as the key
|
||||||
* but then the table would accumulate all the keys that it ever stored, and it was unreasonably growing.
|
* but then the table would accumulate all the keys that it ever stored, and it was unreasonably growing.
|
||||||
* This class goes a step further and considers the keys with the old version empty in the hash table.
|
* This class goes a step further and considers the keys with the old version empty in the hash table.
|
||||||
*/
|
*/
|
||||||
|
@ -4,10 +4,17 @@
|
|||||||
|
|
||||||
|
|
||||||
/** Hash functions that are better than the trivial function std::hash.
|
/** Hash functions that are better than the trivial function std::hash.
|
||||||
* (when aggregated by the visitor ID, the performance increase is more than 5 times)
|
*
|
||||||
|
* Example: when aggregated by the visitor ID, the performance increase is more than 5 times.
|
||||||
|
* This is because of following reasons:
|
||||||
|
* - in Yandex, visitor identifier is an integer that has timestamp with seconds resolution in lower bits;
|
||||||
|
* - in typical implementation of standard library, hash function for integers is trivial and just use lower bits;
|
||||||
|
* - traffic is non-uniformly distributed across a day;
|
||||||
|
* - we are using open-addressing linear probing hash tables that are most critical to hash function quality,
|
||||||
|
* and trivial hash function gives disasterous results.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/** Taken from MurmurHash.
|
/** Taken from MurmurHash. This is Murmur finalizer.
|
||||||
* Faster than intHash32 when inserting into the hash table UInt64 -> UInt64, where the key is the visitor ID.
|
* Faster than intHash32 when inserting into the hash table UInt64 -> UInt64, where the key is the visitor ID.
|
||||||
*/
|
*/
|
||||||
inline DB::UInt64 intHash64(DB::UInt64 x)
|
inline DB::UInt64 intHash64(DB::UInt64 x)
|
||||||
@ -22,20 +29,21 @@ inline DB::UInt64 intHash64(DB::UInt64 x)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** CRC32C is not very high-quality as a hash function,
|
/** CRC32C is not very high-quality as a hash function,
|
||||||
* according to avalanche and bit independence tests, as well as a small number of bits,
|
* according to avalanche and bit independence tests (see SMHasher software), as well as a small number of bits,
|
||||||
* but can behave well when used in hash tables,
|
* but can behave well when used in hash tables,
|
||||||
* due to high speed (latency 3 + 1 clock cycle, throughput 1 clock cycle).
|
* due to high speed (latency 3 + 1 clock cycle, throughput 1 clock cycle).
|
||||||
* Works only with SSE 4.2 support.
|
* Works only with SSE 4.2 support.
|
||||||
* Used asm instead of intrinsics, so you do not have to build the entire project with -msse4.
|
|
||||||
*/
|
*/
|
||||||
|
#if __SSE4_2__
|
||||||
|
#include <nmmintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
inline DB::UInt64 intHashCRC32(DB::UInt64 x)
|
inline DB::UInt64 intHashCRC32(DB::UInt64 x)
|
||||||
{
|
{
|
||||||
#if defined(__x86_64__)
|
#if __SSE4_2__
|
||||||
DB::UInt64 crc = -1ULL;
|
return _mm_crc32_u64(-1ULL, x);
|
||||||
asm("crc32q %[x], %[crc]\n" : [crc] "+r" (crc) : [x] "rm" (x));
|
|
||||||
return crc;
|
|
||||||
#else
|
#else
|
||||||
/// On other platforms we do not need CRC32. NOTE This can be confusing.
|
/// On other platforms we do not have CRC32. NOTE This can be confusing.
|
||||||
return intHash64(x);
|
return intHash64(x);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
@ -128,7 +136,7 @@ struct TrivialHash
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
/** A relatively good non-cryptic hash function from UInt64 to UInt32.
|
/** A relatively good non-cryptographic hash function from UInt64 to UInt32.
|
||||||
* But worse (both in quality and speed) than just cutting intHash64.
|
* But worse (both in quality and speed) than just cutting intHash64.
|
||||||
* Taken from here: http://www.concentric.net/~ttwang/tech/inthash.htm
|
* Taken from here: http://www.concentric.net/~ttwang/tech/inthash.htm
|
||||||
*
|
*
|
||||||
@ -136,9 +144,14 @@ struct TrivialHash
|
|||||||
* This change did not affect the smhasher test results.
|
* This change did not affect the smhasher test results.
|
||||||
*
|
*
|
||||||
* It is recommended to use different salt for different tasks.
|
* It is recommended to use different salt for different tasks.
|
||||||
* That was the case that in the database values were sorted by hash (for low-quality pseudo-random spread),
|
* That was the case that in the database values were sorted by hash (for low-quality pseudo-random spread),
|
||||||
* and in another place, in the aggregate function, the same hash was used in the hash table,
|
* and in another place, in the aggregate function, the same hash was used in the hash table,
|
||||||
* as a result, this aggregate function was monstrously slowed due to collisions.
|
* as a result, this aggregate function was monstrously slowed due to collisions.
|
||||||
|
*
|
||||||
|
* NOTE Salting is far from perfect, because it commutes with first steps of calculation.
|
||||||
|
*
|
||||||
|
* NOTE As mentioned, this function is slower than intHash64.
|
||||||
|
* But occasionaly, it is faster, when written in a loop and loop is vectorized.
|
||||||
*/
|
*/
|
||||||
template <DB::UInt64 salt>
|
template <DB::UInt64 salt>
|
||||||
inline DB::UInt32 intHash32(DB::UInt64 key)
|
inline DB::UInt32 intHash32(DB::UInt64 key)
|
||||||
|
@ -66,7 +66,7 @@ struct HashMapCell
|
|||||||
/// Do I need to store the zero key separately (that is, can a zero key be inserted into the hash table).
|
/// Do I need to store the zero key separately (that is, can a zero key be inserted into the hash table).
|
||||||
static constexpr bool need_zero_value_storage = true;
|
static constexpr bool need_zero_value_storage = true;
|
||||||
|
|
||||||
/// Whether the cell is removed.
|
/// Whether the cell was deleted.
|
||||||
bool isDeleted() const { return false; }
|
bool isDeleted() const { return false; }
|
||||||
|
|
||||||
void setMapped(const value_type & value_) { value.second = value_.second; }
|
void setMapped(const value_type & value_) { value.second = value_.second; }
|
||||||
|
@ -46,9 +46,9 @@ namespace ErrorCodes
|
|||||||
|
|
||||||
/** The state of the hash table that affects the properties of its cells.
|
/** The state of the hash table that affects the properties of its cells.
|
||||||
* Used as a template parameter.
|
* Used as a template parameter.
|
||||||
* For example, there is an implementation of an instantly cleared hash table - ClearableHashMap.
|
* For example, there is an implementation of an instantly clearable hash table - ClearableHashMap.
|
||||||
* For it, each cell holds the version number, and in the hash table itself is the current version.
|
* For it, each cell holds the version number, and in the hash table itself is the current version.
|
||||||
* When cleaning, the current version simply increases; All cells with a mismatching version are considered empty.
|
* When clearing, the current version simply increases; All cells with a mismatching version are considered empty.
|
||||||
* Another example: for an approximate calculation of the number of unique visitors, there is a hash table for UniquesHashSet.
|
* Another example: for an approximate calculation of the number of unique visitors, there is a hash table for UniquesHashSet.
|
||||||
* It has the concept of "degree". At each overflow, cells with keys that do not divide by the corresponding power of the two are deleted.
|
* It has the concept of "degree". At each overflow, cells with keys that do not divide by the corresponding power of the two are deleted.
|
||||||
*/
|
*/
|
||||||
@ -77,8 +77,8 @@ void set(T & x) { x = 0; }
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
/** Compile-time cell interface of the hash table.
|
/** Compile-time interface for cell of the hash table.
|
||||||
* Different cells are used to implement different hash tables.
|
* Different cell types are used to implement different hash tables.
|
||||||
* The cell must contain a key.
|
* The cell must contain a key.
|
||||||
* It can also contain a value and arbitrary additional data
|
* It can also contain a value and arbitrary additional data
|
||||||
* (example: the stored hash value; version number for ClearableHashMap).
|
* (example: the stored hash value; version number for ClearableHashMap).
|
||||||
@ -95,7 +95,7 @@ struct HashTableCell
|
|||||||
|
|
||||||
/// Create a cell with the given key / key and value.
|
/// Create a cell with the given key / key and value.
|
||||||
HashTableCell(const Key & key_, const State & state) : key(key_) {}
|
HashTableCell(const Key & key_, const State & state) : key(key_) {}
|
||||||
/// HashTableCell(const value_type & value_, const State & state) : key(value_) {}
|
/// HashTableCell(const value_type & value_, const State & state) : key(value_) {}
|
||||||
|
|
||||||
/// Get what the value_type of the container will be.
|
/// Get what the value_type of the container will be.
|
||||||
value_type & getValue() { return key; }
|
value_type & getValue() { return key; }
|
||||||
@ -126,13 +126,13 @@ struct HashTableCell
|
|||||||
/// Set the key value to zero.
|
/// Set the key value to zero.
|
||||||
void setZero() { ZeroTraits::set(key); }
|
void setZero() { ZeroTraits::set(key); }
|
||||||
|
|
||||||
/// Do I need to store the zero key separately (that is, can a zero key be inserted into the hash table).
|
/// Do the hash table need to store the zero key separately (that is, can a zero key be inserted into the hash table).
|
||||||
static constexpr bool need_zero_value_storage = true;
|
static constexpr bool need_zero_value_storage = true;
|
||||||
|
|
||||||
/// Whether the cell is deleted.
|
/// Whether the cell is deleted.
|
||||||
bool isDeleted() const { return false; }
|
bool isDeleted() const { return false; }
|
||||||
|
|
||||||
/// Set the displayed value, if any (for HashMap), to the corresponding `value`.
|
/// Set the mapped value, if any (for HashMap), to the corresponding `value`.
|
||||||
void setMapped(const value_type & value) {}
|
void setMapped(const value_type & value) {}
|
||||||
|
|
||||||
/// Serialization, in binary and text form.
|
/// Serialization, in binary and text form.
|
||||||
@ -145,7 +145,7 @@ struct HashTableCell
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
/** Determines the size of the hash table, and when and how many times it should be resized.
|
/** Determines the size of the hash table, and when and how much it should be resized.
|
||||||
*/
|
*/
|
||||||
template <size_t initial_size_degree = 8>
|
template <size_t initial_size_degree = 8>
|
||||||
struct HashTableGrower
|
struct HashTableGrower
|
||||||
@ -195,7 +195,7 @@ struct HashTableGrower
|
|||||||
/** When used as a Grower, it turns a hash table into something like a lookup table.
|
/** When used as a Grower, it turns a hash table into something like a lookup table.
|
||||||
* It remains non-optimal - the cells store the keys.
|
* It remains non-optimal - the cells store the keys.
|
||||||
* Also, the compiler can not completely remove the code of passing through the collision resolution chain, although it is not needed.
|
* Also, the compiler can not completely remove the code of passing through the collision resolution chain, although it is not needed.
|
||||||
* TODO Make a full lookup table.
|
* TODO Make a proper lookup table.
|
||||||
*/
|
*/
|
||||||
template <size_t key_bits>
|
template <size_t key_bits>
|
||||||
struct HashTableFixedGrower
|
struct HashTableFixedGrower
|
||||||
@ -212,7 +212,7 @@ struct HashTableFixedGrower
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
/** If you want to store the null key separately - a place to store it. */
|
/** If you want to store the zero key separately - a place to store it. */
|
||||||
template <bool need_zero_value_storage, typename Cell>
|
template <bool need_zero_value_storage, typename Cell>
|
||||||
struct ZeroValueStorage;
|
struct ZeroValueStorage;
|
||||||
|
|
||||||
@ -272,7 +272,7 @@ protected:
|
|||||||
using cell_type = Cell;
|
using cell_type = Cell;
|
||||||
|
|
||||||
size_t m_size = 0; /// Amount of elements
|
size_t m_size = 0; /// Amount of elements
|
||||||
Cell * buf; /// A piece of memory for all elements except the element with key 0.
|
Cell * buf; /// A piece of memory for all elements except the element with zero key.
|
||||||
Grower grower;
|
Grower grower;
|
||||||
|
|
||||||
#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
|
#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
|
||||||
@ -334,7 +334,7 @@ protected:
|
|||||||
|
|
||||||
/** In case of exception for the object to remain in the correct state,
|
/** In case of exception for the object to remain in the correct state,
|
||||||
* changing the variable `grower` (which determines the buffer size of the hash table)
|
* changing the variable `grower` (which determines the buffer size of the hash table)
|
||||||
* postpone for a moment after a real buffer change.
|
* is postponed for a moment after a real buffer change.
|
||||||
* The temporary variable `new_grower` is used to determine the new size.
|
* The temporary variable `new_grower` is used to determine the new size.
|
||||||
*/
|
*/
|
||||||
Grower new_grower = grower;
|
Grower new_grower = grower;
|
||||||
@ -410,7 +410,7 @@ protected:
|
|||||||
memcpy(&buf[place_value], &x, sizeof(x));
|
memcpy(&buf[place_value], &x, sizeof(x));
|
||||||
x.setZero();
|
x.setZero();
|
||||||
|
|
||||||
/// Then the elements that previously were in conflict with this can move to the old place.
|
/// Then the elements that previously were in collision with this can move to the old place.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -638,7 +638,7 @@ protected:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/// Only for non-zero keys. Find the right place, insert the key there, if it does not already exist, return the iterator to the cell.
|
/// Only for non-zero keys. Find the right place, insert the key there, if it does not already exist. Set iterator to the cell in output parameter.
|
||||||
void ALWAYS_INLINE emplaceNonZero(Key x, iterator & it, bool & inserted, size_t hash_value)
|
void ALWAYS_INLINE emplaceNonZero(Key x, iterator & it, bool & inserted, size_t hash_value)
|
||||||
{
|
{
|
||||||
size_t place_value = findCell(x, hash_value, grower.place(hash_value));
|
size_t place_value = findCell(x, hash_value, grower.place(hash_value));
|
||||||
@ -664,8 +664,8 @@ protected:
|
|||||||
}
|
}
|
||||||
catch (...)
|
catch (...)
|
||||||
{
|
{
|
||||||
/** If you do not do it, then there will be problems.
|
/** If we have not resized successfully, then there will be problems.
|
||||||
* After all, there remains a key, but uninitialized mapped-value,
|
* There remains a key, but uninitialized mapped-value,
|
||||||
* which, perhaps, can not even be called a destructor.
|
* which, perhaps, can not even be called a destructor.
|
||||||
*/
|
*/
|
||||||
--m_size;
|
--m_size;
|
||||||
@ -698,7 +698,7 @@ public:
|
|||||||
* return the iterator to a position that can be used for `placement new` of value,
|
* return the iterator to a position that can be used for `placement new` of value,
|
||||||
* as well as the flag - whether a new key was inserted.
|
* as well as the flag - whether a new key was inserted.
|
||||||
*
|
*
|
||||||
* You are required to make `placement new` of value if you inserted a new key,
|
* You have to make `placement new` of value if you inserted a new key,
|
||||||
* since when destroying a hash table, it will call the destructor!
|
* since when destroying a hash table, it will call the destructor!
|
||||||
*
|
*
|
||||||
* Example usage:
|
* Example usage:
|
||||||
|
@ -215,7 +215,7 @@ public:
|
|||||||
* return the iterator to a position that can be used for `placement new` of value,
|
* return the iterator to a position that can be used for `placement new` of value,
|
||||||
* as well as the flag - whether a new key was inserted.
|
* as well as the flag - whether a new key was inserted.
|
||||||
*
|
*
|
||||||
* You have to make `placement new` of value if you inserted a new key,
|
* You have to make `placement new` of value if you inserted a new key,
|
||||||
* since when destroying a hash table, a destructor will be called for it!
|
* since when destroying a hash table, a destructor will be called for it!
|
||||||
*
|
*
|
||||||
* Example usage:
|
* Example usage:
|
||||||
|
@ -224,7 +224,7 @@ public:
|
|||||||
* return the iterator to a position that can be used for `placement new` value,
|
* return the iterator to a position that can be used for `placement new` value,
|
||||||
* as well as the flag - whether a new key was inserted.
|
* as well as the flag - whether a new key was inserted.
|
||||||
*
|
*
|
||||||
* You have to make `placement new` values if you inserted a new key,
|
* You have to make `placement new` values if you inserted a new key,
|
||||||
* since when destroying a hash table, the destructor will be invoked for it!
|
* since when destroying a hash table, the destructor will be invoked for it!
|
||||||
*
|
*
|
||||||
* Example usage:
|
* Example usage:
|
||||||
|
@ -22,7 +22,7 @@ public:
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Maximum number of unique values to which the correction should apply
|
/// Maximum number of unique values to which the correction should apply
|
||||||
/// from the LinearCounting algorithm.
|
/// from the LinearCounting algorithm.
|
||||||
static double getThreshold()
|
static double getThreshold()
|
||||||
{
|
{
|
||||||
@ -83,7 +83,7 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
/** Trivial case of HyperLogLogBiasEstimator: used if we do not want to fix
|
/** Trivial case of HyperLogLogBiasEstimator: used if we do not want to fix
|
||||||
* error. This has meaning for small values of the accuracy parameter, for example 5 or 12.
|
* error. This has meaning for small values of the accuracy parameter, for example 5 or 12.
|
||||||
* Then the corrections from the original version of the HyperLogLog algorithm are applied.
|
* Then the corrections from the original version of the HyperLogLog algorithm are applied.
|
||||||
* See "HyperLogLog: The analysis of a near-optimal cardinality estimation algorithm"
|
* See "HyperLogLog: The analysis of a near-optimal cardinality estimation algorithm"
|
||||||
* (P. Flajolet et al., AOFA '07: Proceedings of the 2007 International Conference on Analysis
|
* (P. Flajolet et al., AOFA '07: Proceedings of the 2007 International Conference on Analysis
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
/** The simplest cache for a free function.
|
/** The simplest cache for a free function.
|
||||||
* You can also pass a static class method or lambda without capturing.
|
* You can also pass a static class method or lambda without capturing.
|
||||||
* The size is unlimited. Values are not obsolete.
|
* The size is unlimited. Values are not obsolete.
|
||||||
* To synchronize, use mutex.
|
* To synchronize, use mutex.
|
||||||
* Suitable only for the simplest cases.
|
* Suitable only for the simplest cases.
|
||||||
*
|
*
|
||||||
|
@ -4,6 +4,10 @@
|
|||||||
#include <IO/ReadHelpers.h>
|
#include <IO/ReadHelpers.h>
|
||||||
#include <IO/WriteHelpers.h>
|
#include <IO/WriteHelpers.h>
|
||||||
|
|
||||||
|
#if __SSE4_2__
|
||||||
|
#include <nmmintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -42,15 +46,15 @@ struct UInt128Hash
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#if defined(__x86_64__)
|
#if __SSE4_2__
|
||||||
|
|
||||||
struct UInt128HashCRC32
|
struct UInt128HashCRC32
|
||||||
{
|
{
|
||||||
size_t operator()(UInt128 x) const
|
size_t operator()(UInt128 x) const
|
||||||
{
|
{
|
||||||
UInt64 crc = -1ULL;
|
UInt64 crc = -1ULL;
|
||||||
asm("crc32q %[x], %[crc]\n" : [crc] "+r" (crc) : [x] "rm" (x.first));
|
crc = _mm_crc32_u64(crc, x.first);
|
||||||
asm("crc32q %[x], %[crc]\n" : [crc] "+r" (crc) : [x] "rm" (x.second));
|
crc = _mm_crc32_u64(crc, x.second);
|
||||||
return crc;
|
return crc;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -122,17 +126,17 @@ struct UInt256Hash
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#if defined(__x86_64__)
|
#if __SSE4_2__
|
||||||
|
|
||||||
struct UInt256HashCRC32
|
struct UInt256HashCRC32
|
||||||
{
|
{
|
||||||
size_t operator()(UInt256 x) const
|
size_t operator()(UInt256 x) const
|
||||||
{
|
{
|
||||||
UInt64 crc = -1ULL;
|
UInt64 crc = -1ULL;
|
||||||
asm("crc32q %[x], %[crc]\n" : [crc] "+r" (crc) : [x] "rm" (x.a));
|
crc = _mm_crc32_u64(crc, x.a);
|
||||||
asm("crc32q %[x], %[crc]\n" : [crc] "+r" (crc) : [x] "rm" (x.b));
|
crc = _mm_crc32_u64(crc, x.b);
|
||||||
asm("crc32q %[x], %[crc]\n" : [crc] "+r" (crc) : [x] "rm" (x.c));
|
crc = _mm_crc32_u64(crc, x.c);
|
||||||
asm("crc32q %[x], %[crc]\n" : [crc] "+r" (crc) : [x] "rm" (x.d));
|
crc = _mm_crc32_u64(crc, x.d);
|
||||||
return crc;
|
return crc;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -19,7 +19,7 @@
|
|||||||
* - when initializing, fill in an open-addressing linear probing hash table of the form
|
* - when initializing, fill in an open-addressing linear probing hash table of the form
|
||||||
* hash from the bigram of needle -> the position of this bigram in needle + 1.
|
* hash from the bigram of needle -> the position of this bigram in needle + 1.
|
||||||
* (one is added only to distinguish zero offset from an empty cell)
|
* (one is added only to distinguish zero offset from an empty cell)
|
||||||
* - the keys are not stored in the hash table, only the values are stored;
|
* - the keys are not stored in the hash table, only the values are stored;
|
||||||
* - bigrams can be inserted several times if they occur in the needle several times;
|
* - bigrams can be inserted several times if they occur in the needle several times;
|
||||||
* - when searching, take from haystack bigram, which should correspond to the last bigram of needle (comparing from the end);
|
* - when searching, take from haystack bigram, which should correspond to the last bigram of needle (comparing from the end);
|
||||||
* - look for it in the hash table, if found - get the offset from the hash table and compare the string bytewise;
|
* - look for it in the hash table, if found - get the offset from the hash table and compare the string bytewise;
|
||||||
|
@ -15,7 +15,7 @@ struct BlockInfo
|
|||||||
{
|
{
|
||||||
/** is_overflows:
|
/** is_overflows:
|
||||||
* After running GROUP BY ... WITH TOTALS with the max_rows_to_group_by and group_by_overflow_mode = 'any' settings,
|
* After running GROUP BY ... WITH TOTALS with the max_rows_to_group_by and group_by_overflow_mode = 'any' settings,
|
||||||
* a row is inserted in the separate block with aggregated values that have not passed max_rows_to_group_by.
|
* a row is inserted in the separate block with aggregated values that have not passed max_rows_to_group_by.
|
||||||
* If it is such a block, then is_overflows is set to true for it.
|
* If it is such a block, then is_overflows is set to true for it.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -36,10 +36,10 @@ struct BlockInfo
|
|||||||
|
|
||||||
#undef DECLARE_FIELD
|
#undef DECLARE_FIELD
|
||||||
|
|
||||||
/// Write the values in binary form. NOTE: You could use protobuf, but it would be overkill for this case.
|
/// Write the values in binary form. NOTE: You could use protobuf, but it would be overkill for this case.
|
||||||
void write(WriteBuffer & out) const;
|
void write(WriteBuffer & out) const;
|
||||||
|
|
||||||
/// Read the values in binary form.
|
/// Read the values in binary form.
|
||||||
void read(ReadBuffer & in);
|
void read(ReadBuffer & in);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -27,7 +27,7 @@
|
|||||||
#define DEFAULT_MAX_COMPRESS_BLOCK_SIZE 1048576
|
#define DEFAULT_MAX_COMPRESS_BLOCK_SIZE 1048576
|
||||||
|
|
||||||
/** Which blocks by default read the data (by number of rows).
|
/** Which blocks by default read the data (by number of rows).
|
||||||
* Smaller values give better cache locality, less consumption of RAM, but more overhead to process the query.
|
* Smaller values give better cache locality, less consumption of RAM, but more overhead to process the query.
|
||||||
*/
|
*/
|
||||||
#define DEFAULT_BLOCK_SIZE 65536
|
#define DEFAULT_BLOCK_SIZE 65536
|
||||||
|
|
||||||
|
@ -171,17 +171,7 @@ struct StringRefHash64
|
|||||||
|
|
||||||
#if __SSE4_2__
|
#if __SSE4_2__
|
||||||
|
|
||||||
#ifdef __SSE4_1__
|
|
||||||
#include <smmintrin.h>
|
#include <smmintrin.h>
|
||||||
#else
|
|
||||||
|
|
||||||
inline UInt64 _mm_crc32_u64(UInt64 crc, UInt64 value)
|
|
||||||
{
|
|
||||||
asm("crc32q %[value], %[crc]\n" : [crc] "+r" (crc) : [value] "rm" (value));
|
|
||||||
return crc;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/// Parts are taken from CityHash.
|
/// Parts are taken from CityHash.
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
/** Data types for representing values from a database in RAM.
|
/** Data types for representing values from a database in RAM.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
STRONG_TYPEDEF(char, Null);
|
STRONG_TYPEDEF(char, Null);
|
||||||
|
@ -41,7 +41,7 @@ public:
|
|||||||
|
|
||||||
DataTypePtr clone() const override { return std::make_shared<DataTypeAggregateFunction>(function, argument_types, parameters); }
|
DataTypePtr clone() const override { return std::make_shared<DataTypeAggregateFunction>(function, argument_types, parameters); }
|
||||||
|
|
||||||
/// NOTE These two functions for serializing single values are incompatible with the functions below.
|
/// NOTE These two functions for serializing single values are incompatible with the functions below.
|
||||||
void serializeBinary(const Field & field, WriteBuffer & ostr) const override;
|
void serializeBinary(const Field & field, WriteBuffer & ostr) const override;
|
||||||
void deserializeBinary(Field & field, ReadBuffer & istr) const override;
|
void deserializeBinary(Field & field, ReadBuffer & istr) const override;
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
/** The data type corresponding to the set of values in the IN section.
|
/** The data type corresponding to the set of values in the IN section.
|
||||||
* Used only as an intermediate option when evaluating expressions.
|
* Used only as an intermediate option when evaluating expressions.
|
||||||
*/
|
*/
|
||||||
class DataTypeSet final : public IDataTypeDummy
|
class DataTypeSet final : public IDataTypeDummy
|
||||||
|
@ -26,7 +26,7 @@ namespace DB
|
|||||||
*
|
*
|
||||||
* cloud_path - the path to the "cloud"; There may be several different independent clouds
|
* cloud_path - the path to the "cloud"; There may be several different independent clouds
|
||||||
/table_definitions - set of unique table definitions so you do not write them many times for a large number of tables
|
/table_definitions - set of unique table definitions so you do not write them many times for a large number of tables
|
||||||
/hash128 -> sql - mapping: hash from table definition (identifier) -> table definition itself as CREATE query
|
/hash128 -> sql - mapping: hash from table definition (identifier) -> table definition itself as CREATE query
|
||||||
/tables - list of tables
|
/tables - list of tables
|
||||||
/database_name - name of the database
|
/database_name - name of the database
|
||||||
/name_hash_mod -> compressed_table_list
|
/name_hash_mod -> compressed_table_list
|
||||||
|
@ -28,7 +28,7 @@ public:
|
|||||||
QueryProcessingStage::Enum & processed_stage,
|
QueryProcessingStage::Enum & processed_stage,
|
||||||
size_t max_block_size,
|
size_t max_block_size,
|
||||||
unsigned threads,
|
unsigned threads,
|
||||||
size_t * inout_part_index, /// If not nullptr, from this counter values are taken for the virtual column _part_index.
|
size_t * inout_part_index, /// If not nullptr, from this counter values are taken for the virtual column _part_index.
|
||||||
Int64 max_block_number_to_read) const;
|
Int64 max_block_number_to_read) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -136,7 +136,7 @@ public:
|
|||||||
private:
|
private:
|
||||||
void init();
|
void init();
|
||||||
|
|
||||||
/** If `permutation` is given, it rearranges the values in the columns when writing.
|
/** If `permutation` is given, it rearranges the values in the columns when writing.
|
||||||
* This is necessary to not keep the whole block in the RAM to sort it.
|
* This is necessary to not keep the whole block in the RAM to sort it.
|
||||||
*/
|
*/
|
||||||
void writeImpl(const Block & block, const IColumn::Permutation * permutation);
|
void writeImpl(const Block & block, const IColumn::Permutation * permutation);
|
||||||
|
@ -63,7 +63,7 @@ struct ReplicatedMergeTreeLogEntryData
|
|||||||
/// The name of resulting part.
|
/// The name of resulting part.
|
||||||
/// For DROP_RANGE, the name of a non-existent part. You need to remove all the parts covered by it.
|
/// For DROP_RANGE, the name of a non-existent part. You need to remove all the parts covered by it.
|
||||||
String new_part_name;
|
String new_part_name;
|
||||||
String block_id; /// For parts of level zero, the block identifier for deduplication (node name in /blocks /).
|
String block_id; /// For parts of level zero, the block identifier for deduplication (node name in /blocks /).
|
||||||
|
|
||||||
Strings parts_to_merge;
|
Strings parts_to_merge;
|
||||||
bool deduplicate = false; /// Do deduplicate on merge
|
bool deduplicate = false; /// Do deduplicate on merge
|
||||||
|
@ -14,7 +14,7 @@ class StorageReplicatedMergeTree;
|
|||||||
|
|
||||||
|
|
||||||
/** Initializes ZK session.
|
/** Initializes ZK session.
|
||||||
* Exposes ephemeral nodes. It sets the node values that are required for replica detection.
|
* Exposes ephemeral nodes. It sets the node values that are required for replica detection.
|
||||||
* Starts participation in the leader selection. Starts all background threads.
|
* Starts participation in the leader selection. Starts all background threads.
|
||||||
* Then monitors whether the session has expired. And if it expired, it will reinitialize it.
|
* Then monitors whether the session has expired. And if it expired, it will reinitialize it.
|
||||||
*/
|
*/
|
||||||
|
Loading…
Reference in New Issue
Block a user