mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 09:02:00 +00:00
Fixed translation errors; miscellaneous changes [#CLICKHOUSE-3].
This commit is contained in:
parent
ff02af9804
commit
7bf7242ad3
@ -23,7 +23,7 @@ static inline ContainerType max(const ContainerType & lhs, const ContainerType &
|
||||
|
||||
}
|
||||
|
||||
/** For a small number of keys - an array of fixed size "on the stack."
|
||||
/** For a small number of keys - an array of fixed size "on the stack".
|
||||
* For the average, HashSet is allocated.
|
||||
* For large, HyperLogLog is allocated.
|
||||
*/
|
||||
|
@ -5,7 +5,7 @@
|
||||
|
||||
/** Hash functions that are better than the trivial function std::hash.
|
||||
*
|
||||
* Example: when aggregated by the visitor ID, the performance increase is more than 5 times.
|
||||
* Example: when we do aggregation by the visitor ID, the performance increase is more than 5 times.
|
||||
* This is because of following reasons:
|
||||
* - in Yandex, visitor identifier is an integer that has timestamp with seconds resolution in lower bits;
|
||||
* - in typical implementation of standard library, hash function for integers is trivial and just use lower bits;
|
||||
|
@ -695,7 +695,7 @@ public:
|
||||
|
||||
|
||||
/** Insert the key,
|
||||
* return the iterator to a position that can be used for `placement new` of value,
|
||||
* return an iterator to a position that can be used for `placement new` of value,
|
||||
* as well as the flag - whether a new key was inserted.
|
||||
*
|
||||
* You have to make `placement new` of value if you inserted a new key,
|
||||
|
@ -212,7 +212,7 @@ public:
|
||||
|
||||
|
||||
/** Insert the key,
|
||||
* return the iterator to a position that can be used for `placement new` of value,
|
||||
* return an iterator to a position that can be used for `placement new` of value,
|
||||
* as well as the flag - whether a new key was inserted.
|
||||
*
|
||||
* You have to make `placement new` of value if you inserted a new key,
|
||||
|
@ -9,9 +9,9 @@
|
||||
*
|
||||
* Usually works a little slower than a simple hash table.
|
||||
* However, it has advantages in some cases:
|
||||
* - if you need to measure two hash tables together, then you can easily parallelize them by buckets;
|
||||
* - lag during resizes is spread, since the small hash tables will be resized separately;
|
||||
* - in theory, the cache resize is local in a larger range of sizes.
|
||||
* - if you need to merge two hash tables together, then you can easily parallelize it by buckets;
|
||||
* - delay during resizes is amortized, since the small hash tables will be resized separately;
|
||||
* - in theory, resizes are cache-local in a larger range of sizes.
|
||||
*/
|
||||
|
||||
template <size_t initial_size_degree = 8>
|
||||
@ -52,7 +52,7 @@ public:
|
||||
|
||||
size_t hash(const Key & x) const { return Hash::operator()(x); }
|
||||
|
||||
/// NOTE Bad for hash tables for more than 2^32 cells.
|
||||
/// NOTE Bad for hash tables with more than 2^32 cells.
|
||||
static size_t getBucketFromHash(size_t hash_value) { return (hash_value >> (32 - BITS_FOR_BUCKET)) & MAX_BUCKET; }
|
||||
|
||||
protected:
|
||||
@ -95,7 +95,7 @@ public:
|
||||
{
|
||||
typename Source::const_iterator it = src.begin();
|
||||
|
||||
/// It is assumed that the zero key (stored separately) when iterating is first.
|
||||
/// It is assumed that the zero key (stored separately) is first in iteration order.
|
||||
if (it != src.end() && it.getPtr()->isZero(src))
|
||||
{
|
||||
insert(*it);
|
||||
@ -221,7 +221,7 @@ public:
|
||||
|
||||
|
||||
/** Insert the key,
|
||||
* return the iterator to a position that can be used for `placement new` value,
|
||||
* return an iterator to a position that can be used for `placement new` of value,
|
||||
* as well as the flag - whether a new key was inserted.
|
||||
*
|
||||
* You have to make `placement new` values if you inserted a new key,
|
||||
|
@ -9,7 +9,7 @@ namespace DB
|
||||
{
|
||||
|
||||
|
||||
/** For a small number of keys - an array of fixed size "on the stack."
|
||||
/** For a small number of keys - an array of fixed size "on the stack".
|
||||
* For large, HyperLogLog is allocated.
|
||||
* See also the more practical implementation in CombinedCardinalityEstimator.h,
|
||||
* where a hash table is also used for medium-sized sets.
|
||||
|
@ -3,8 +3,8 @@
|
||||
#include <Common/CounterInFile.h>
|
||||
|
||||
|
||||
/** Lets you receive an auto-increment number, storing it in a file.
|
||||
* Designed for rare calls (not designed for performance).
|
||||
/** Allows to get an auto-increment number, storing it in a file.
|
||||
* Intended for rare calls (not designed for performance).
|
||||
*/
|
||||
class Increment
|
||||
{
|
||||
@ -39,13 +39,13 @@ public:
|
||||
return getBunch(0, create_if_need);
|
||||
}
|
||||
|
||||
/** Get the next number and increase the count by `count`.
|
||||
* If the `create_if_need` parameter is not set to true, then
|
||||
* the file should already have a number written (if not - create the file manually with zero).
|
||||
*
|
||||
* To protect against race conditions between different processes, file locks are used.
|
||||
* (But when the first file is created, the race condition is possible, so it's better to create the file in advance.)
|
||||
*/
|
||||
/** Get the next number and increase the counter by `count`.
|
||||
* If the `create_if_need` parameter is not set to true, then
|
||||
* the file should already have a number written (if not - create the file manually with zero).
|
||||
*
|
||||
* To protect against race conditions between different processes, file locks are used.
|
||||
* (But when the first file is created, the race condition is possible, so it's better to create the file in advance.)
|
||||
*/
|
||||
UInt64 getBunch(UInt64 count, bool create_if_need = false)
|
||||
{
|
||||
return static_cast<UInt64>(counter.add(static_cast<Int64>(count), create_if_need) - count + 1);
|
||||
|
@ -4,10 +4,11 @@
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
#include <map>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** Apply the macros from the config in the line.
|
||||
/** Apply substitutions from the macros in config to the string.
|
||||
*/
|
||||
class Macros
|
||||
{
|
||||
|
@ -102,10 +102,10 @@ public:
|
||||
};
|
||||
|
||||
|
||||
/** The MemoryTracker object is quite difficult to drag to all places where significant amounts of memory are allocated.
|
||||
* Therefore, a thread-local pointer to used MemoryTracker or nullptr is used, if it does not need to be used.
|
||||
* This pointer is set when memory consumption is monitored in this thread.
|
||||
* So, you just need to drag it to all the threads that handle one request.
|
||||
/** The MemoryTracker object is quite difficult to pass to all places where significant amounts of memory are allocated.
|
||||
* Therefore, a thread-local pointer to used MemoryTracker is set, or nullptr if MemoryTracker does not need to be used.
|
||||
* This pointer is set when memory consumption is monitored in current thread.
|
||||
* So, you just need to pass it to all the threads that handle one request.
|
||||
*/
|
||||
extern __thread MemoryTracker * current_memory_tracker;
|
||||
|
||||
|
@ -8,8 +8,9 @@
|
||||
#define MIN_LENGTH_FOR_STRSTR 3
|
||||
#define MAX_SUBPATTERNS 5
|
||||
|
||||
template <bool b>
|
||||
void OptimizedRegularExpressionImpl<b>::analyze(
|
||||
|
||||
template <bool thread_safe>
|
||||
void OptimizedRegularExpressionImpl<thread_safe>::analyze(
|
||||
const std::string & regexp,
|
||||
std::string & required_substring,
|
||||
bool & is_trivial,
|
||||
@ -20,7 +21,8 @@ void OptimizedRegularExpressionImpl<b>::analyze(
|
||||
* a string outside parentheses,
|
||||
* in which all metacharacters are escaped,
|
||||
* and also if there are no '|' outside the brackets,
|
||||
* and also avoid substrings of the form `http://` or `www`.
|
||||
* and also avoid substrings of the form `http://` or `www` and some other
|
||||
* (this is the hack for typical use case in Yandex.Metrica).
|
||||
*/
|
||||
const char * begin = regexp.data();
|
||||
const char * pos = begin;
|
||||
@ -32,9 +34,9 @@ void OptimizedRegularExpressionImpl<b>::analyze(
|
||||
bool has_alternative_on_depth_0 = false;
|
||||
|
||||
/// Substring with a position.
|
||||
typedef std::pair<std::string, size_t> Substring;
|
||||
using Substring = std::pair<std::string, size_t>;
|
||||
using Substrings = std::vector<Substring>;
|
||||
|
||||
typedef std::vector<Substring> Substrings;
|
||||
Substrings trivial_substrings(1);
|
||||
Substring * last_substring = &trivial_substrings.back();
|
||||
|
||||
@ -157,7 +159,7 @@ void OptimizedRegularExpressionImpl<b>::analyze(
|
||||
++pos;
|
||||
break;
|
||||
|
||||
/// Quantifiers that allow a zero number.
|
||||
/// Quantifiers that allow a zero number of occurences.
|
||||
case '{':
|
||||
in_curly_braces = true;
|
||||
case '?': case '*':
|
||||
@ -208,7 +210,7 @@ void OptimizedRegularExpressionImpl<b>::analyze(
|
||||
{
|
||||
if (((it->second == 0 && candidate_it->second != 0)
|
||||
|| ((it->second == 0) == (candidate_it->second == 0) && it->first.size() > max_length))
|
||||
/// Tuning for the domain
|
||||
/// Tuning for typical usage domain
|
||||
&& (it->first.size() > strlen("://") || strncmp(it->first.data(), "://", strlen("://")))
|
||||
&& (it->first.size() > strlen("http://") || strncmp(it->first.data(), "http", strlen("http")))
|
||||
&& (it->first.size() > strlen("www.") || strncmp(it->first.data(), "www", strlen("www")))
|
||||
@ -241,12 +243,12 @@ void OptimizedRegularExpressionImpl<b>::analyze(
|
||||
}
|
||||
|
||||
|
||||
template <bool b>
|
||||
OptimizedRegularExpressionImpl<b>::OptimizedRegularExpressionImpl(const std::string & regexp_, int options)
|
||||
template <bool thread_safe>
|
||||
OptimizedRegularExpressionImpl<thread_safe>::OptimizedRegularExpressionImpl(const std::string & regexp_, int options)
|
||||
{
|
||||
analyze(regexp_, required_substring, is_trivial, required_substring_is_prefix);
|
||||
|
||||
/// 3 options are supported
|
||||
/// Just three following options are supported
|
||||
if (options & (~(RE_CASELESS | RE_NO_CAPTURE | RE_DOT_NL)))
|
||||
throw Poco::Exception("OptimizedRegularExpression: Unsupported option.");
|
||||
|
||||
@ -280,8 +282,8 @@ OptimizedRegularExpressionImpl<b>::OptimizedRegularExpressionImpl(const std::str
|
||||
}
|
||||
|
||||
|
||||
template <bool b>
|
||||
bool OptimizedRegularExpressionImpl<b>::match(const char * subject, size_t subject_size) const
|
||||
template <bool thread_safe>
|
||||
bool OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, size_t subject_size) const
|
||||
{
|
||||
if (is_trivial)
|
||||
{
|
||||
@ -309,8 +311,8 @@ bool OptimizedRegularExpressionImpl<b>::match(const char * subject, size_t subje
|
||||
}
|
||||
|
||||
|
||||
template <bool b>
|
||||
bool OptimizedRegularExpressionImpl<b>::match(const char * subject, size_t subject_size, Match & match) const
|
||||
template <bool thread_safe>
|
||||
bool OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, size_t subject_size, Match & match) const
|
||||
{
|
||||
if (is_trivial)
|
||||
{
|
||||
@ -357,8 +359,8 @@ bool OptimizedRegularExpressionImpl<b>::match(const char * subject, size_t subje
|
||||
}
|
||||
|
||||
|
||||
template <bool b>
|
||||
unsigned OptimizedRegularExpressionImpl<b>::match(const char * subject, size_t subject_size, MatchVec & matches, unsigned limit) const
|
||||
template <bool thread_safe>
|
||||
unsigned OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, size_t subject_size, MatchVec & matches, unsigned limit) const
|
||||
{
|
||||
matches.clear();
|
||||
|
||||
|
@ -24,7 +24,7 @@ namespace DB
|
||||
* To be more precise - for use in ColumnVector.
|
||||
* It differs from std::vector in that it does not initialize the elements.
|
||||
*
|
||||
* Made uncopable so that there are no random copies. You can copy the data using `assign` method.
|
||||
* Made noncopyable so that there are no accidential copies. You can copy the data using `assign` method.
|
||||
*
|
||||
* Only part of the std::vector interface is supported.
|
||||
*
|
||||
@ -40,20 +40,20 @@ template <typename T, size_t INITIAL_SIZE = 4096, typename TAllocator = Allocato
|
||||
class PODArray : private boost::noncopyable, private TAllocator /// empty base optimization
|
||||
{
|
||||
private:
|
||||
/// Round padding up to an integer number of elements to simplify arithmetic.
|
||||
/// Round padding up to an whole number of elements to simplify arithmetic.
|
||||
static constexpr size_t pad_right = (pad_right_ + sizeof(T) - 1) / sizeof(T) * sizeof(T);
|
||||
|
||||
char * c_start = nullptr;
|
||||
char * c_end = nullptr;
|
||||
char * c_start = nullptr;
|
||||
char * c_end = nullptr;
|
||||
char * c_end_of_storage = nullptr; /// Does not include pad_right.
|
||||
|
||||
T * t_start() { return reinterpret_cast<T *>(c_start); }
|
||||
T * t_end() { return reinterpret_cast<T *>(c_end); }
|
||||
T * t_end_of_storage() { return reinterpret_cast<T *>(c_end_of_storage); }
|
||||
T * t_start() { return reinterpret_cast<T *>(c_start); }
|
||||
T * t_end() { return reinterpret_cast<T *>(c_end); }
|
||||
T * t_end_of_storage() { return reinterpret_cast<T *>(c_end_of_storage); }
|
||||
|
||||
const T * t_start() const { return reinterpret_cast<const T *>(c_start); }
|
||||
const T * t_end() const { return reinterpret_cast<const T *>(c_end); }
|
||||
const T * t_end_of_storage() const { return reinterpret_cast<const T *>(c_end_of_storage); }
|
||||
const T * t_start() const { return reinterpret_cast<const T *>(c_start); }
|
||||
const T * t_end() const { return reinterpret_cast<const T *>(c_end); }
|
||||
const T * t_end_of_storage() const { return reinterpret_cast<const T *>(c_end_of_storage); }
|
||||
|
||||
/// The amount of memory occupied by the num_elements of the elements.
|
||||
static size_t byte_size(size_t num_elements) { return num_elements * sizeof(T); }
|
||||
@ -173,16 +173,16 @@ public:
|
||||
const T & operator[] (size_t n) const { return t_start()[n]; }
|
||||
|
||||
T & front() { return t_start()[0]; }
|
||||
T & back() { return t_end()[-1]; }
|
||||
T & back() { return t_end()[-1]; }
|
||||
const T & front() const { return t_start()[0]; }
|
||||
const T & back() const { return t_end()[-1]; }
|
||||
|
||||
iterator begin() { return t_start(); }
|
||||
iterator end() { return t_end(); }
|
||||
const_iterator begin() const { return t_start(); }
|
||||
const_iterator end() const { return t_end(); }
|
||||
const_iterator cbegin() const { return t_start(); }
|
||||
const_iterator cend() const { return t_end(); }
|
||||
iterator begin() { return t_start(); }
|
||||
iterator end() { return t_end(); }
|
||||
const_iterator begin() const { return t_start(); }
|
||||
const_iterator end() const { return t_end(); }
|
||||
const_iterator cbegin() const { return t_start(); }
|
||||
const_iterator cend() const { return t_end(); }
|
||||
|
||||
void reserve(size_t n)
|
||||
{
|
||||
@ -209,7 +209,7 @@ public:
|
||||
c_end = c_start + byte_size(n);
|
||||
}
|
||||
|
||||
/// Same as resize, but zeros new elements.
|
||||
/// Same as resize, but zeroes new elements.
|
||||
void resize_fill(size_t n)
|
||||
{
|
||||
size_t old_size = size();
|
||||
@ -261,7 +261,7 @@ public:
|
||||
c_end -= byte_size(1);
|
||||
}
|
||||
|
||||
/// Do not insert a piece of yourself into the array. Because with the resize, the iterators on themselves can be invalidated.
|
||||
/// Do not insert into the array a piece of itself. Because with the resize, the iterators on themselves can be invalidated.
|
||||
template <typename It1, typename It2>
|
||||
void insert(It1 from_begin, It2 from_end)
|
||||
{
|
||||
|
@ -8,8 +8,17 @@
|
||||
#include <common/logger_useful.h>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
/** A class from which you can inherit and get a pool of something. Used for database connection pools.
|
||||
* The heir must provide a method for creating a new object to place in the pool.
|
||||
* Descendant class must provide a method for creating a new object to place in the pool.
|
||||
*/
|
||||
|
||||
template <typename TObject>
|
||||
@ -63,27 +72,27 @@ public:
|
||||
Entry() {} /// For deferred initialization.
|
||||
|
||||
/** The `Entry` object protects the resource from being used by another thread.
|
||||
* The following methods are forbidden for `rvalue`, so you can not write a similar to
|
||||
*
|
||||
* auto q = pool.Get()->query("SELECT .."); // Oops, after this line Entry was destroyed
|
||||
* q.execute (); // Someone else can use this Connection
|
||||
*/
|
||||
* The following methods are forbidden for `rvalue`, so you can not write a similar to
|
||||
*
|
||||
* auto q = pool.Get()->query("SELECT .."); // Oops, after this line Entry was destroyed
|
||||
* q.execute (); // Someone else can use this Connection
|
||||
*/
|
||||
Object * operator->() && = delete;
|
||||
const Object * operator->() const && = delete;
|
||||
Object & operator*() && = delete;
|
||||
const Object & operator*() const && = delete;
|
||||
|
||||
Object * operator->() & { return &*data->data.object; }
|
||||
const Object * operator->() const & { return &*data->data.object; }
|
||||
Object & operator*() & { return *data->data.object; }
|
||||
const Object & operator*() const & { return *data->data.object; }
|
||||
Object * operator->() & { return &*data->data.object; }
|
||||
const Object * operator->() const & { return &*data->data.object; }
|
||||
Object & operator*() & { return *data->data.object; }
|
||||
const Object & operator*() const & { return *data->data.object; }
|
||||
|
||||
bool isNull() const { return data == nullptr; }
|
||||
|
||||
PoolBase * getPool() const
|
||||
{
|
||||
if (!data)
|
||||
throw DB::Exception("attempt to get pool from uninitialized entry");
|
||||
throw DB::Exception("Attempt to get pool from uninitialized entry", DB::ErrorCodes::LOGICAL_ERROR);
|
||||
return &data->data.pool;
|
||||
}
|
||||
|
||||
@ -95,7 +104,7 @@ public:
|
||||
|
||||
virtual ~PoolBase() {}
|
||||
|
||||
/** Allocates the object for the job. With timeout < 0, the timeout is infinite. */
|
||||
/** Allocates the object. Wait for free object in pool for 'timeout'. With 'timeout' < 0, the timeout is infinite. */
|
||||
Entry get(Poco::Timespan::TimeDiff timeout)
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
@ -137,7 +146,7 @@ private:
|
||||
/** Pool. */
|
||||
Objects items;
|
||||
|
||||
/** Block to access the pool. */
|
||||
/** Lock to access the pool. */
|
||||
std::mutex mutex;
|
||||
std::condition_variable available;
|
||||
|
||||
@ -151,7 +160,7 @@ protected:
|
||||
items.reserve(max_items);
|
||||
}
|
||||
|
||||
/** Creates a new object to put in the pool. */
|
||||
/** Creates a new object to put into the pool. */
|
||||
virtual ObjectPtr allocObject() = 0;
|
||||
};
|
||||
|
||||
|
@ -13,10 +13,10 @@
|
||||
#include <Core/Defines.h>
|
||||
|
||||
|
||||
/** Bitwise sort, has the following functionality:
|
||||
/** Radix sort, has the following functionality:
|
||||
* Can sort unsigned, signed numbers, and floats.
|
||||
* Can sort an array of fixed length elements that contain something else besides the key.
|
||||
* Customizable digit size.
|
||||
* Customizable radix size.
|
||||
*
|
||||
* LSB, stable.
|
||||
* NOTE For some applications it makes sense to add MSB-radix-sort,
|
||||
@ -49,7 +49,7 @@ struct RadixSortMallocAllocator
|
||||
template <typename KeyBits>
|
||||
struct RadixSortFloatTransform
|
||||
{
|
||||
/// Is it worth writing the result in memory, or is it better to do it every time again?
|
||||
/// Is it worth writing the result in memory, or is it better to do calculation every time again?
|
||||
static constexpr bool transform_is_simple = false;
|
||||
|
||||
static KeyBits forward(KeyBits x)
|
||||
@ -74,7 +74,7 @@ struct RadixSortFloatTraits
|
||||
/// The type to which the key is transformed to do bit operations. This UInt is the same size as the key.
|
||||
using KeyBits = typename std::conditional<sizeof(Float) == 8, uint64_t, uint32_t>::type;
|
||||
|
||||
static constexpr size_t PART_SIZE_BITS = 8; /// With what pieces of the key, it bits, to do one pass - reshuffle of the array.
|
||||
static constexpr size_t PART_SIZE_BITS = 8; /// With what pieces of the key, in bits, to do one pass - reshuffle of the array.
|
||||
|
||||
/// Converting a key into KeyBits is such that the order relation over the key corresponds to the order relation over KeyBits.
|
||||
using Transform = RadixSortFloatTransform<KeyBits>;
|
||||
@ -95,7 +95,7 @@ struct RadixSortIdentityTransform
|
||||
static constexpr bool transform_is_simple = true;
|
||||
|
||||
static KeyBits forward(KeyBits x) { return x; }
|
||||
static KeyBits backward(KeyBits x) { return x; }
|
||||
static KeyBits backward(KeyBits x) { return x; }
|
||||
};
|
||||
|
||||
|
||||
@ -105,7 +105,7 @@ struct RadixSortSignedTransform
|
||||
static constexpr bool transform_is_simple = true;
|
||||
|
||||
static KeyBits forward(KeyBits x) { return x ^ (KeyBits(1) << (sizeof(KeyBits) * 8 - 1)); }
|
||||
static KeyBits backward(KeyBits x) { return x ^ (KeyBits(1) << (sizeof(KeyBits) * 8 - 1)); }
|
||||
static KeyBits backward(KeyBits x) { return x ^ (KeyBits(1) << (sizeof(KeyBits) * 8 - 1)); }
|
||||
};
|
||||
|
||||
|
||||
@ -150,7 +150,7 @@ struct RadixSort
|
||||
private:
|
||||
using Element = typename Traits::Element;
|
||||
using Key = typename Traits::Key;
|
||||
using CountType = typename Traits::CountType;
|
||||
using CountType = typename Traits::CountType;
|
||||
using KeyBits = typename Traits::KeyBits;
|
||||
|
||||
static constexpr size_t HISTOGRAM_SIZE = 1 << Traits::PART_SIZE_BITS;
|
||||
@ -174,9 +174,9 @@ public:
|
||||
{
|
||||
/// If the array is smaller than 256, then it is better to use another algorithm.
|
||||
|
||||
/// There are loops of NUM_PASSES. It is very important that they unfold in compile-time.
|
||||
/// There are loops of NUM_PASSES. It is very important that they are unfolded at compile-time.
|
||||
|
||||
/// For each of the NUM_PASSES bits of the key, consider how many times each value of this piece met.
|
||||
/// For each of the NUM_PASSES bit ranges of the key, consider how many times each value of this bit range met.
|
||||
CountType histograms[HISTOGRAM_SIZE * NUM_PASSES] = {0};
|
||||
|
||||
typename Traits::Allocator allocator;
|
||||
@ -230,6 +230,7 @@ public:
|
||||
}
|
||||
|
||||
/// If the number of passes is odd, the result array is in a temporary buffer. Copy it to the place of the original array.
|
||||
/// NOTE Sometimes it will be more optimal to provide non-destructive interface, that will not modify original array.
|
||||
if (NUM_PASSES % 2)
|
||||
memcpy(arr, swap_buffer, size * sizeof(Element));
|
||||
|
||||
|
@ -10,8 +10,8 @@ namespace DB
|
||||
|
||||
|
||||
/** Lets you run the command,
|
||||
* read it stdout, stderr, write to stdin,
|
||||
* wait for completion.
|
||||
* read it stdout and stderr; write to stdin;
|
||||
* wait for completion.
|
||||
*
|
||||
* The implementation is similar to the popen function from POSIX (see libc source code).
|
||||
*
|
||||
@ -20,8 +20,8 @@ namespace DB
|
||||
* with some overcommit settings, if the address space of the process is more than half the amount of available memory.
|
||||
* Also, changing memory maps - a fairly resource-intensive operation.
|
||||
*
|
||||
* The second difference - allows to work simultaneously with stdin, and with stdout, and with stderr running process,
|
||||
* and also find out the code and the completion status.
|
||||
* The second difference - allows to work simultaneously with stdin, and with stdout, and with stderr of running process,
|
||||
* and also to obtain the return code and completion status.
|
||||
*/
|
||||
class ShellCommand
|
||||
{
|
||||
|
@ -7,9 +7,9 @@
|
||||
|
||||
|
||||
/** The simplest cache for a free function.
|
||||
* You can also pass a static class method or lambda without capturing.
|
||||
* The size is unlimited. Values are not obsolete.
|
||||
* To synchronize, use mutex.
|
||||
* You can also pass a static class method or lambda without captures.
|
||||
* The size is unlimited. Values are stored permanently and never evicted.
|
||||
* Mutex is used for synchronization.
|
||||
* Suitable only for the simplest cases.
|
||||
*
|
||||
* Usage
|
||||
|
@ -3,17 +3,17 @@
|
||||
/** SipHash is a fast cryptographic hash function for short strings.
|
||||
* Taken from here: https://www.131002.net/siphash/
|
||||
*
|
||||
* This is SipHash 2-4 variant.
|
||||
*
|
||||
* Two changes are made:
|
||||
* - returns 128 bits, not 64;
|
||||
* - returns also 128 bits, not only 64;
|
||||
* - done streaming (can be calculated in parts).
|
||||
*
|
||||
* On short strings (URL, search phrases) more than 3 times faster than MD5 from OpenSSL.
|
||||
* (~ 700 MB/sec, 15 million strings per second)
|
||||
*/
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstddef>
|
||||
#include <Core/Types.h>
|
||||
#include <common/Types.h>
|
||||
|
||||
#define ROTL(x,b) static_cast<u64>( ((x) << (b)) | ( (x) >> (64 - (b))) )
|
||||
|
||||
@ -30,23 +30,20 @@
|
||||
class SipHash
|
||||
{
|
||||
private:
|
||||
using u64 = DB::UInt64;
|
||||
using u8 = DB::UInt8;
|
||||
|
||||
/// Status.
|
||||
u64 v0;
|
||||
u64 v1;
|
||||
u64 v2;
|
||||
u64 v3;
|
||||
/// State.
|
||||
UInt64 v0;
|
||||
UInt64 v1;
|
||||
UInt64 v2;
|
||||
UInt64 v3;
|
||||
|
||||
/// How many bytes have been processed.
|
||||
u64 cnt;
|
||||
UInt64 cnt;
|
||||
|
||||
/// The current 8 bytes of input data.
|
||||
union
|
||||
{
|
||||
u64 current_word;
|
||||
u8 current_bytes[8];
|
||||
UInt64 current_word;
|
||||
UInt8 current_bytes[8];
|
||||
};
|
||||
|
||||
void finalize()
|
||||
@ -68,7 +65,7 @@ private:
|
||||
|
||||
public:
|
||||
/// Arguments - seed.
|
||||
SipHash(u64 k0 = 0, u64 k1 = 0)
|
||||
SipHash(UInt64 k0 = 0, UInt64 k1 = 0)
|
||||
{
|
||||
/// Initialize the state with some random bytes and seed.
|
||||
v0 = 0x736f6d6570736575ULL ^ k0;
|
||||
@ -80,7 +77,7 @@ public:
|
||||
current_word = 0;
|
||||
}
|
||||
|
||||
void update(const char * data, u64 size)
|
||||
void update(const char * data, UInt64 size)
|
||||
{
|
||||
const char * end = data + size;
|
||||
|
||||
@ -94,7 +91,7 @@ public:
|
||||
++cnt;
|
||||
}
|
||||
|
||||
/// If you still do not have enough bytes to an 8-byte word.
|
||||
/// If we still do not have enough bytes to an 8-byte word.
|
||||
if (cnt & 7)
|
||||
return;
|
||||
|
||||
@ -108,7 +105,7 @@ public:
|
||||
|
||||
while (data + 8 <= end)
|
||||
{
|
||||
current_word = *reinterpret_cast<const u64 *>(data);
|
||||
current_word = *reinterpret_cast<const UInt64 *>(data);
|
||||
|
||||
v3 ^= current_word;
|
||||
SIPROUND;
|
||||
@ -138,18 +135,18 @@ public:
|
||||
void get128(char * out)
|
||||
{
|
||||
finalize();
|
||||
reinterpret_cast<u64 *>(out)[0] = v0 ^ v1;
|
||||
reinterpret_cast<u64 *>(out)[1] = v2 ^ v3;
|
||||
reinterpret_cast<UInt64 *>(out)[0] = v0 ^ v1;
|
||||
reinterpret_cast<UInt64 *>(out)[1] = v2 ^ v3;
|
||||
}
|
||||
|
||||
void get128(u64 & lo, u64 & hi)
|
||||
void get128(UInt64 & lo, UInt64 & hi)
|
||||
{
|
||||
finalize();
|
||||
lo = v0 ^ v1;
|
||||
hi = v2 ^ v3;
|
||||
}
|
||||
|
||||
u64 get64()
|
||||
UInt64 get64()
|
||||
{
|
||||
finalize();
|
||||
return v0 ^ v1 ^ v2 ^ v3;
|
||||
@ -160,6 +157,7 @@ public:
|
||||
#undef ROTL
|
||||
#undef SIPROUND
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
inline void sipHash128(const char * data, const size_t size, char * out)
|
||||
{
|
||||
@ -168,7 +166,7 @@ inline void sipHash128(const char * data, const size_t size, char * out)
|
||||
hash.get128(out);
|
||||
}
|
||||
|
||||
inline DB::UInt64 sipHash64(const char * data, const size_t size)
|
||||
inline UInt64 sipHash64(const char * data, const size_t size)
|
||||
{
|
||||
SipHash hash;
|
||||
hash.update(data, size);
|
||||
@ -177,7 +175,7 @@ inline DB::UInt64 sipHash64(const char * data, const size_t size)
|
||||
|
||||
#include <string>
|
||||
|
||||
inline DB::UInt64 sipHash64(const std::string & s)
|
||||
inline UInt64 sipHash64(const std::string & s)
|
||||
{
|
||||
return sipHash64(s.data(), s.size());
|
||||
}
|
||||
|
@ -19,15 +19,14 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int UNSUPPORTED_PARAMETER;
|
||||
}
|
||||
|
||||
|
||||
/** Variants for finding a substring in a string.
|
||||
* In most cases, less productive than Volnitsky (see Volnitsky.h).
|
||||
/** Variants for searching a substring in a string.
|
||||
* In most cases, performance is less than Volnitsky (see Volnitsky.h).
|
||||
*/
|
||||
|
||||
|
||||
@ -37,7 +36,7 @@ struct StringSearcherBase
|
||||
static constexpr auto n = sizeof(__m128i);
|
||||
const int page_size = getpagesize();
|
||||
|
||||
bool page_safe(const void * const ptr) const
|
||||
bool pageSafe(const void * const ptr) const
|
||||
{
|
||||
return ((page_size - 1) & reinterpret_cast<std::uintptr_t>(ptr)) <= page_size - n;
|
||||
}
|
||||
@ -55,7 +54,7 @@ class StringSearcher<false, false> : private StringSearcherBase
|
||||
private:
|
||||
using UTF8SequenceBuffer = UInt8[6];
|
||||
|
||||
/// string to be searched for
|
||||
/// substring to be searched for
|
||||
const UInt8 * const needle;
|
||||
const std::size_t needle_size;
|
||||
const UInt8 * const needle_end = needle + needle_size;
|
||||
@ -135,8 +134,7 @@ public:
|
||||
if (!(dst_l_len == dst_u_len && dst_u_len == src_len))
|
||||
throw DB::Exception{
|
||||
"UTF8 sequences with different lowercase and uppercase lengths are not supported",
|
||||
DB::ErrorCodes::UNSUPPORTED_PARAMETER
|
||||
};
|
||||
DB::ErrorCodes::UNSUPPORTED_PARAMETER};
|
||||
|
||||
cache_actual_len += src_len;
|
||||
if (cache_actual_len < n)
|
||||
@ -165,7 +163,7 @@ public:
|
||||
static const Poco::UTF8Encoding utf8;
|
||||
|
||||
#if __SSE4_1__
|
||||
if (page_safe(pos))
|
||||
if (pageSafe(pos))
|
||||
{
|
||||
const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos));
|
||||
const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel);
|
||||
@ -230,7 +228,7 @@ public:
|
||||
while (haystack < haystack_end)
|
||||
{
|
||||
#if __SSE4_1__
|
||||
if (haystack + n <= haystack_end && page_safe(haystack))
|
||||
if (haystack + n <= haystack_end && pageSafe(haystack))
|
||||
{
|
||||
const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
|
||||
const auto v_against_l = _mm_cmpeq_epi8(v_haystack, patl);
|
||||
@ -249,7 +247,7 @@ public:
|
||||
const auto offset = __builtin_ctz(mask);
|
||||
haystack += offset;
|
||||
|
||||
if (haystack < haystack_end && haystack + n <= haystack_end && page_safe(haystack))
|
||||
if (haystack < haystack_end && haystack + n <= haystack_end && pageSafe(haystack))
|
||||
{
|
||||
const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
|
||||
const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel);
|
||||
@ -377,7 +375,7 @@ public:
|
||||
bool compare(const UInt8 * pos) const
|
||||
{
|
||||
#if __SSE4_1__
|
||||
if (page_safe(pos))
|
||||
if (pageSafe(pos))
|
||||
{
|
||||
const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos));
|
||||
const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel);
|
||||
@ -429,7 +427,7 @@ public:
|
||||
while (haystack < haystack_end)
|
||||
{
|
||||
#if __SSE4_1__
|
||||
if (haystack + n <= haystack_end && page_safe(haystack))
|
||||
if (haystack + n <= haystack_end && pageSafe(haystack))
|
||||
{
|
||||
const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
|
||||
const auto v_against_l = _mm_cmpeq_epi8(v_haystack, patl);
|
||||
@ -447,7 +445,7 @@ public:
|
||||
const auto offset = __builtin_ctz(mask);
|
||||
haystack += offset;
|
||||
|
||||
if (haystack < haystack_end && haystack + n <= haystack_end && page_safe(haystack))
|
||||
if (haystack < haystack_end && haystack + n <= haystack_end && pageSafe(haystack))
|
||||
{
|
||||
const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
|
||||
const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel);
|
||||
@ -559,7 +557,7 @@ public:
|
||||
bool compare(const UInt8 * pos) const
|
||||
{
|
||||
#if __SSE4_1__
|
||||
if (page_safe(pos))
|
||||
if (pageSafe(pos))
|
||||
{
|
||||
const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos));
|
||||
const auto v_against_cache = _mm_cmpeq_epi8(v_haystack, cache);
|
||||
@ -609,7 +607,7 @@ public:
|
||||
while (haystack < haystack_end)
|
||||
{
|
||||
#if __SSE4_1__
|
||||
if (haystack + n <= haystack_end && page_safe(haystack))
|
||||
if (haystack + n <= haystack_end && pageSafe(haystack))
|
||||
{
|
||||
/// find first character
|
||||
const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
|
||||
@ -627,7 +625,7 @@ public:
|
||||
const auto offset = __builtin_ctz(mask);
|
||||
haystack += offset;
|
||||
|
||||
if (haystack < haystack_end && haystack + n <= haystack_end && page_safe(haystack))
|
||||
if (haystack < haystack_end && haystack + n <= haystack_end && pageSafe(haystack))
|
||||
{
|
||||
/// check for first 16 octets
|
||||
const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
|
||||
@ -694,9 +692,9 @@ using UTF8CaseInsensitiveStringSearcher = StringSearcher<false, false>;
|
||||
|
||||
|
||||
/** Uses functions from libc.
|
||||
* It makes sense to use short strings when cheap initialization is required.
|
||||
* There is no option for register-independent search for UTF-8 strings.
|
||||
* It is required that the end of the lines be zero byte.
|
||||
* It makes sense to use only with short haystacks when cheap initialization is required.
|
||||
* There is no option for case-insensitive search for UTF-8 strings.
|
||||
* It is required that strings are zero-terminated.
|
||||
*/
|
||||
|
||||
struct LibCASCIICaseSensitiveStringSearcher
|
||||
|
@ -1,11 +1,13 @@
|
||||
#pragma once
|
||||
|
||||
#include <time.h> /// nanosleep
|
||||
#include <mutex>
|
||||
#include <memory>
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -15,12 +17,12 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
|
||||
/** Allows you to limit the speed of something (in pieces per second) using sleep.
|
||||
/** Allows you to limit the speed of something (in entities per second) using sleep.
|
||||
* Specifics of work:
|
||||
* - only the average speed is considered, from the moment of the first call of `add` function;
|
||||
* if there were periods with low speed, then during some time after them, the speed will be higher;
|
||||
*
|
||||
* Also allows you to set a limit on the maximum number of pieces. If you exceed, an exception is thrown.
|
||||
* Also allows you to set a limit on the maximum number of entities. If exceeded, an exception will be thrown.
|
||||
*/
|
||||
class Throttler
|
||||
{
|
||||
@ -56,7 +58,7 @@ public:
|
||||
|
||||
if (max_speed)
|
||||
{
|
||||
/// How much time would have gone for the speed to become `max_speed`.
|
||||
/// How much time to wait for the average speed to become `max_speed`.
|
||||
UInt64 desired_ns = new_count * 1000000000 / max_speed;
|
||||
|
||||
if (desired_ns > elapsed_ns)
|
||||
@ -65,7 +67,7 @@ public:
|
||||
timespec sleep_ts;
|
||||
sleep_ts.tv_sec = sleep_ns / 1000000000;
|
||||
sleep_ts.tv_nsec = sleep_ns % 1000000000;
|
||||
nanosleep(&sleep_ts, nullptr); /// NOTE Ends early in case of a signal. This is considered normal.
|
||||
nanosleep(&sleep_ts, nullptr); /// NOTE Returns early in case of a signal. This is considered normal.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -16,10 +16,10 @@ class Context;
|
||||
namespace VirtualColumnUtils
|
||||
{
|
||||
|
||||
/// Calculate the minimum numeric suffix to add to the row so that it is not present in the set
|
||||
/// Calculate the minimum numeric suffix to add to the string so that it is not present in the set
|
||||
String chooseSuffix(const NamesAndTypesList & columns, const String & name);
|
||||
|
||||
/// Calculate the minimum total numeric suffix to add to each row,
|
||||
/// Calculate the minimum total numeric suffix to add to each string,
|
||||
/// so that none is present in the set.
|
||||
String chooseSuffixForSet(const NamesAndTypesList & columns, const std::vector<String> & names);
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <Common/StringSearcher.h>
|
||||
#include <Common/StringUtils.h>
|
||||
#include <Core/Types.h>
|
||||
#include <Poco/UTF8Encoding.h>
|
||||
#include <Poco/Unicode.h>
|
||||
#include <ext/range.hpp>
|
||||
@ -12,7 +13,7 @@
|
||||
/** Search for a substring in a string by Volnitsky's algorithm
|
||||
* http://volnitsky.com/project/str_search/
|
||||
*
|
||||
* `haystack` and `needle` can contain null bytes.
|
||||
* `haystack` and `needle` can contain zero bytes.
|
||||
*
|
||||
* Algorithm:
|
||||
* - if the `needle` is too small or too large, or too small `haystack`, use std::search or memchr;
|
||||
@ -23,7 +24,7 @@
|
||||
* - bigrams can be inserted several times if they occur in the needle several times;
|
||||
* - when searching, take from haystack bigram, which should correspond to the last bigram of needle (comparing from the end);
|
||||
* - look for it in the hash table, if found - get the offset from the hash table and compare the string bytewise;
|
||||
* - if it did not work, we check the next cell of the hash table from the collision resolution chain;
|
||||
* - if it did not match, we check the next cell of the hash table from the collision resolution chain;
|
||||
* - if not found, skip to haystack almost the size of the needle bytes;
|
||||
*
|
||||
* Unaligned memory access is used.
|
||||
@ -39,34 +40,35 @@ template <typename CRTP>
|
||||
class VolnitskyBase
|
||||
{
|
||||
protected:
|
||||
using offset_t = uint8_t; /// Offset in the needle. For the basic algorithm, the length of the needle must not be greater than 255.
|
||||
using ngram_t = uint16_t; /// n-gram (2 bytes).
|
||||
using Offset = UInt8; /// Offset in the needle. For the basic algorithm, the length of the needle must not be greater than 255.
|
||||
using Ngram = UInt16; /// n-gram (2 bytes).
|
||||
|
||||
const UInt8 * const needle;
|
||||
const size_t needle_size;
|
||||
const UInt8 * const needle_end = needle + needle_size;
|
||||
/// For how long we move, if the n-gram from haystack is not found in the hash table.
|
||||
const size_t step = needle_size - sizeof(ngram_t) + 1;
|
||||
const size_t step = needle_size - sizeof(Ngram) + 1;
|
||||
|
||||
/** max needle length is 255, max distinct ngrams for case-sensitive is (255 - 1), case-insensitive is 4 * (255 - 1)
|
||||
* storage of 64K ngrams (n = 2, 128 KB) should be large enough for both cases */
|
||||
static const size_t hash_size = 64 * 1024; /// Fits into the L2 cache.
|
||||
offset_t hash[hash_size]; /// Hash table.
|
||||
* storage of 64K ngrams (n = 2, 128 KB) should be large enough for both cases */
|
||||
static const size_t hash_size = 64 * 1024; /// Fits into the L2 cache (of common Intel CPUs).
|
||||
Offset hash[hash_size]; /// Hash table.
|
||||
|
||||
/// min haystack size to use main algorithm instead of fallback
|
||||
static constexpr auto min_haystack_size_for_algorithm = 20000;
|
||||
const bool fallback; /// Do I need to use the fallback algorithm.
|
||||
const bool fallback; /// Do we need to use the fallback algorithm.
|
||||
|
||||
public:
|
||||
/** haystack_size_hint - the expected total size of the haystack for `search` calls. Can not specify.
|
||||
/** haystack_size_hint - the expected total size of the haystack for `search` calls. Optional (zero means unspecified).
|
||||
* If you specify it small enough, the fallback algorithm will be used,
|
||||
* since it is considered that it's useless to waste time initializing the hash table.
|
||||
*/
|
||||
VolnitskyBase(const char * const needle, const size_t needle_size, size_t haystack_size_hint = 0)
|
||||
: needle{reinterpret_cast<const UInt8 *>(needle)}, needle_size{needle_size},
|
||||
fallback{
|
||||
needle_size < 2 * sizeof(ngram_t) || needle_size >= std::numeric_limits<offset_t>::max() ||
|
||||
(haystack_size_hint && haystack_size_hint < min_haystack_size_for_algorithm)}
|
||||
needle_size < 2 * sizeof(Ngram)
|
||||
|| needle_size >= std::numeric_limits<Offset>::max()
|
||||
|| (haystack_size_hint && haystack_size_hint < min_haystack_size_for_algorithm)}
|
||||
{
|
||||
if (fallback)
|
||||
return;
|
||||
@ -74,7 +76,7 @@ public:
|
||||
memset(hash, 0, sizeof(hash));
|
||||
|
||||
/// int is used here because unsigned can't be used with condition like `i >= 0`, unsigned always >= 0
|
||||
for (auto i = static_cast<int>(needle_size - sizeof(ngram_t)); i >= 0; --i)
|
||||
for (auto i = static_cast<int>(needle_size - sizeof(Ngram)); i >= 0; --i)
|
||||
self().putNGram(this->needle + i, i + 1, this->needle);
|
||||
}
|
||||
|
||||
@ -91,7 +93,7 @@ public:
|
||||
return self().search_fallback(haystack, haystack_end);
|
||||
|
||||
/// Let's "apply" the needle to the haystack and compare the n-gram from the end of the needle.
|
||||
const auto * pos = haystack + needle_size - sizeof(ngram_t);
|
||||
const auto * pos = haystack + needle_size - sizeof(Ngram);
|
||||
for (; pos <= haystack_end - needle_size; pos += step)
|
||||
{
|
||||
/// We look at all the cells of the hash table that can correspond to the n-gram from haystack.
|
||||
@ -119,12 +121,12 @@ protected:
|
||||
CRTP & self() { return static_cast<CRTP &>(*this); }
|
||||
const CRTP & self() const { return const_cast<VolnitskyBase *>(this)->self(); }
|
||||
|
||||
static const ngram_t & toNGram(const UInt8 * const pos)
|
||||
static const Ngram & toNGram(const UInt8 * const pos)
|
||||
{
|
||||
return *reinterpret_cast<const ngram_t *>(pos);
|
||||
return *reinterpret_cast<const Ngram *>(pos);
|
||||
}
|
||||
|
||||
void putNGramBase(const ngram_t ngram, const int offset)
|
||||
void putNGramBase(const Ngram ngram, const int offset)
|
||||
{
|
||||
/// Put the offset for the n-gram in the corresponding cell or the nearest free cell.
|
||||
size_t cell_num = ngram % hash_size;
|
||||
@ -145,7 +147,7 @@ protected:
|
||||
|
||||
union
|
||||
{
|
||||
ngram_t n;
|
||||
Ngram n;
|
||||
Chars chars;
|
||||
};
|
||||
|
||||
@ -260,7 +262,7 @@ template <> struct VolnitskyImpl<false, false> : VolnitskyBase<VolnitskyImpl<fal
|
||||
|
||||
union
|
||||
{
|
||||
ngram_t n;
|
||||
Ngram n;
|
||||
Chars chars;
|
||||
};
|
||||
|
||||
@ -277,10 +279,12 @@ template <> struct VolnitskyImpl<false, false> : VolnitskyBase<VolnitskyImpl<fal
|
||||
* or intersect with two code points.
|
||||
*
|
||||
* In the first case, you need to consider up to two alternatives - this code point in upper and lower case,
|
||||
* and in the second case - up to four alternatives - fragments of two code points in all combinations of registers.
|
||||
* and in the second case - up to four alternatives - fragments of two code points in all combinations of cases.
|
||||
*
|
||||
* It does not take into account the dependence of the transformation between the registers from the locale (for example - Turkish `Ii`)
|
||||
* It does not take into account the dependence of the case-transformation from the locale (for example - Turkish `Ii`)
|
||||
* as well as composition / decomposition and other features.
|
||||
*
|
||||
* It also does not work if characters with lower and upper cases are represented by different number of bytes or code points.
|
||||
*/
|
||||
|
||||
using Seq = UInt8[6];
|
||||
@ -302,12 +306,12 @@ template <> struct VolnitskyImpl<false, false> : VolnitskyBase<VolnitskyImpl<fal
|
||||
putNGramBase(n, offset);
|
||||
else
|
||||
{
|
||||
/// where is the given ngram in respect to UTF-8 sequence start?
|
||||
/// where is the given ngram in respect to the start of UTF-8 sequence?
|
||||
const auto seq_ngram_offset = pos - seq_pos;
|
||||
|
||||
Seq seq;
|
||||
|
||||
/// put ngram from lowercase
|
||||
/// put ngram for lowercase
|
||||
utf8.convert(l_u32, seq, sizeof(seq));
|
||||
chars.c0 = seq[seq_ngram_offset];
|
||||
chars.c1 = seq[seq_ngram_offset + 1];
|
||||
@ -326,7 +330,7 @@ template <> struct VolnitskyImpl<false, false> : VolnitskyBase<VolnitskyImpl<fal
|
||||
/// first sequence may start before u_pos if it is not ASCII
|
||||
auto first_seq_pos = pos;
|
||||
UTF8::syncBackward(first_seq_pos, begin);
|
||||
/// where is the given ngram in respect to the first UTF-8 sequence start?
|
||||
/// where is the given ngram in respect to the start of first UTF-8 sequence?
|
||||
const auto seq_ngram_offset = pos - first_seq_pos;
|
||||
|
||||
const auto first_u32 = utf8.convert(first_seq_pos);
|
||||
|
@ -4,11 +4,11 @@
|
||||
#include <IO/WriteBuffer.h>
|
||||
|
||||
|
||||
/// Displays the transmitted size in bytes as 123.45 GiB.
|
||||
/// Displays the passed size in bytes as 123.45 GiB.
|
||||
void formatReadableSizeWithBinarySuffix(double value, DB::WriteBuffer & out, int precision = 2);
|
||||
std::string formatReadableSizeWithBinarySuffix(double value, int precision = 2);
|
||||
|
||||
/// Displays the transmitted size in bytes as 132.55 GB.
|
||||
/// Displays the passed size in bytes as 132.55 GB.
|
||||
void formatReadableSizeWithDecimalSuffix(double value, DB::WriteBuffer & out, int precision = 2);
|
||||
std::string formatReadableSizeWithDecimalSuffix(double value, int precision = 2);
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
#include <string>
|
||||
|
||||
/** Get the FQDN for the local server by resolving DNS hostname - similar to calling the hostname utility with the -f flag.
|
||||
* If it does not work, return hostname - similar to calling hostname without flags or uname -n.
|
||||
/** Get the FQDN for the local server by resolving DNS hostname - similar to calling the 'hostname' tool with the -f flag.
|
||||
* If it does not work, return hostname - similar to calling 'hostname' without flags or 'uname -n'.
|
||||
*/
|
||||
const std::string & getFQDNOrHostName();
|
||||
|
@ -16,7 +16,7 @@ namespace DB
|
||||
}
|
||||
|
||||
|
||||
/** Checks match of type by comparing typeid.
|
||||
/** Checks type by comparing typeid.
|
||||
* The exact match of the type is checked. That is, cast in the ancestor will be unsuccessful.
|
||||
* In the rest, behaves like a dynamic_cast.
|
||||
*/
|
||||
|
Loading…
Reference in New Issue
Block a user