Merge branch 'master' into joins

This commit is contained in:
chertus 2019-02-12 15:51:04 +03:00
commit aaf3813607
92 changed files with 2128 additions and 832 deletions

5
.gitmodules vendored
View File

@ -63,7 +63,4 @@
url = https://github.com/ClickHouse-Extras/libgsasl.git
[submodule "contrib/cppkafka"]
path = contrib/cppkafka
url = https://github.com/mfontanini/cppkafka.git
[submodule "contrib/pdqsort"]
path = contrib/pdqsort
url = https://github.com/orlp/pdqsort
url = https://github.com/ClickHouse-Extras/cppkafka.git

View File

@ -3,7 +3,6 @@ set -e -x
source default-config
# TODO Non debian systems
./install-os-packages.sh svn
./install-os-packages.sh cmake

2
contrib/cppkafka vendored

@ -1 +1 @@
Subproject commit 520465510efef7704346cf8d140967c4abb057c1
Subproject commit 860c90e92eee6690aa74a2ca7b7c5c6930dffecd

1
contrib/pdqsort vendored

@ -1 +0,0 @@
Subproject commit 08879029ab8dcb80a70142acb709e3df02de5d37

2
contrib/pdqsort/README Normal file
View File

@ -0,0 +1,2 @@
Source from https://github.com/orlp/pdqsort
Mandatory for Clickhouse, not available in OS packages, we can't use it as submodule.

View File

@ -0,0 +1,16 @@
Copyright (c) 2015 Orson Peters <orsonpeters@gmail.com>
This software is provided 'as-is', without any express or implied warranty. In no event will the
authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose, including commercial
applications, and to alter it and redistribute it freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the
original software. If you use this software in a product, an acknowledgment in the product
documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as
being the original software.
3. This notice may not be removed or altered from any source distribution.

544
contrib/pdqsort/pdqsort.h Normal file
View File

@ -0,0 +1,544 @@
/*
pdqsort.h - Pattern-defeating quicksort.
Copyright (c) 2015 Orson Peters
This software is provided 'as-is', without any express or implied warranty. In no event will the
authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose, including commercial
applications, and to alter it and redistribute it freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the
original software. If you use this software in a product, an acknowledgment in the product
documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as
being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef PDQSORT_H
#define PDQSORT_H
#include <algorithm>
#include <cstddef>
#include <functional>
#include <utility>
#include <iterator>
#if __cplusplus >= 201103L
#include <cstdint>
#include <type_traits>
#define PDQSORT_PREFER_MOVE(x) std::move(x)
#else
#define PDQSORT_PREFER_MOVE(x) (x)
#endif
namespace pdqsort_detail {
enum {
// Partitions below this size are sorted using insertion sort.
insertion_sort_threshold = 24,
// Partitions above this size use Tukey's ninther to select the pivot.
ninther_threshold = 128,
// When we detect an already sorted partition, attempt an insertion sort that allows this
// amount of element moves before giving up.
partial_insertion_sort_limit = 8,
// Must be multiple of 8 due to loop unrolling, and < 256 to fit in unsigned char.
block_size = 64,
// Cacheline size, assumes power of two.
cacheline_size = 64
};
#if __cplusplus >= 201103L
template<class T> struct is_default_compare : std::false_type { };
template<class T> struct is_default_compare<std::less<T>> : std::true_type { };
template<class T> struct is_default_compare<std::greater<T>> : std::true_type { };
#endif
// Returns floor(log2(n)), assumes n > 0.
template<class T>
inline int log2(T n) {
int log = 0;
while (n >>= 1) ++log;
return log;
}
// Sorts [begin, end) using insertion sort with the given comparison function.
template<class Iter, class Compare>
inline void insertion_sort(Iter begin, Iter end, Compare comp) {
typedef typename std::iterator_traits<Iter>::value_type T;
if (begin == end) return;
for (Iter cur = begin + 1; cur != end; ++cur) {
Iter sift = cur;
Iter sift_1 = cur - 1;
// Compare first so we can avoid 2 moves for an element already positioned correctly.
if (comp(*sift, *sift_1)) {
T tmp = PDQSORT_PREFER_MOVE(*sift);
do { *sift-- = PDQSORT_PREFER_MOVE(*sift_1); }
while (sift != begin && comp(tmp, *--sift_1));
*sift = PDQSORT_PREFER_MOVE(tmp);
}
}
}
// Sorts [begin, end) using insertion sort with the given comparison function. Assumes
// *(begin - 1) is an element smaller than or equal to any element in [begin, end).
template<class Iter, class Compare>
inline void unguarded_insertion_sort(Iter begin, Iter end, Compare comp) {
typedef typename std::iterator_traits<Iter>::value_type T;
if (begin == end) return;
for (Iter cur = begin + 1; cur != end; ++cur) {
Iter sift = cur;
Iter sift_1 = cur - 1;
// Compare first so we can avoid 2 moves for an element already positioned correctly.
if (comp(*sift, *sift_1)) {
T tmp = PDQSORT_PREFER_MOVE(*sift);
do { *sift-- = PDQSORT_PREFER_MOVE(*sift_1); }
while (comp(tmp, *--sift_1));
*sift = PDQSORT_PREFER_MOVE(tmp);
}
}
}
// Attempts to use insertion sort on [begin, end). Will return false if more than
// partial_insertion_sort_limit elements were moved, and abort sorting. Otherwise it will
// successfully sort and return true.
template<class Iter, class Compare>
inline bool partial_insertion_sort(Iter begin, Iter end, Compare comp) {
typedef typename std::iterator_traits<Iter>::value_type T;
if (begin == end) return true;
int limit = 0;
for (Iter cur = begin + 1; cur != end; ++cur) {
if (limit > partial_insertion_sort_limit) return false;
Iter sift = cur;
Iter sift_1 = cur - 1;
// Compare first so we can avoid 2 moves for an element already positioned correctly.
if (comp(*sift, *sift_1)) {
T tmp = PDQSORT_PREFER_MOVE(*sift);
do { *sift-- = PDQSORT_PREFER_MOVE(*sift_1); }
while (sift != begin && comp(tmp, *--sift_1));
*sift = PDQSORT_PREFER_MOVE(tmp);
limit += cur - sift;
}
}
return true;
}
template<class Iter, class Compare>
inline void sort2(Iter a, Iter b, Compare comp) {
if (comp(*b, *a)) std::iter_swap(a, b);
}
// Sorts the elements *a, *b and *c using comparison function comp.
template<class Iter, class Compare>
inline void sort3(Iter a, Iter b, Iter c, Compare comp) {
sort2(a, b, comp);
sort2(b, c, comp);
sort2(a, b, comp);
}
template<class T>
inline T* align_cacheline(T* p) {
#if defined(UINTPTR_MAX) && __cplusplus >= 201103L
std::uintptr_t ip = reinterpret_cast<std::uintptr_t>(p);
#else
std::size_t ip = reinterpret_cast<std::size_t>(p);
#endif
ip = (ip + cacheline_size - 1) & -cacheline_size;
return reinterpret_cast<T*>(ip);
}
template<class Iter>
inline void swap_offsets(Iter first, Iter last,
unsigned char* offsets_l, unsigned char* offsets_r,
int num, bool use_swaps) {
typedef typename std::iterator_traits<Iter>::value_type T;
if (use_swaps) {
// This case is needed for the descending distribution, where we need
// to have proper swapping for pdqsort to remain O(n).
for (int i = 0; i < num; ++i) {
std::iter_swap(first + offsets_l[i], last - offsets_r[i]);
}
} else if (num > 0) {
Iter l = first + offsets_l[0]; Iter r = last - offsets_r[0];
T tmp(PDQSORT_PREFER_MOVE(*l)); *l = PDQSORT_PREFER_MOVE(*r);
for (int i = 1; i < num; ++i) {
l = first + offsets_l[i]; *r = PDQSORT_PREFER_MOVE(*l);
r = last - offsets_r[i]; *l = PDQSORT_PREFER_MOVE(*r);
}
*r = PDQSORT_PREFER_MOVE(tmp);
}
}
// Partitions [begin, end) around pivot *begin using comparison function comp. Elements equal
// to the pivot are put in the right-hand partition. Returns the position of the pivot after
// partitioning and whether the passed sequence already was correctly partitioned. Assumes the
// pivot is a median of at least 3 elements and that [begin, end) is at least
// insertion_sort_threshold long. Uses branchless partitioning.
template<class Iter, class Compare>
inline std::pair<Iter, bool> partition_right_branchless(Iter begin, Iter end, Compare comp) {
typedef typename std::iterator_traits<Iter>::value_type T;
// Move pivot into local for speed.
T pivot(PDQSORT_PREFER_MOVE(*begin));
Iter first = begin;
Iter last = end;
// Find the first element greater than or equal than the pivot (the median of 3 guarantees
// this exists).
while (comp(*++first, pivot));
// Find the first element strictly smaller than the pivot. We have to guard this search if
// there was no element before *first.
if (first - 1 == begin) while (first < last && !comp(*--last, pivot));
else while ( !comp(*--last, pivot));
// If the first pair of elements that should be swapped to partition are the same element,
// the passed in sequence already was correctly partitioned.
bool already_partitioned = first >= last;
if (!already_partitioned) {
std::iter_swap(first, last);
++first;
}
// The following branchless partitioning is derived from "BlockQuicksort: How Branch
// Mispredictions dont affect Quicksort" by Stefan Edelkamp and Armin Weiss.
unsigned char offsets_l_storage[block_size + cacheline_size];
unsigned char offsets_r_storage[block_size + cacheline_size];
unsigned char* offsets_l = align_cacheline(offsets_l_storage);
unsigned char* offsets_r = align_cacheline(offsets_r_storage);
int num_l, num_r, start_l, start_r;
num_l = num_r = start_l = start_r = 0;
while (last - first > 2 * block_size) {
// Fill up offset blocks with elements that are on the wrong side.
if (num_l == 0) {
start_l = 0;
Iter it = first;
for (unsigned char i = 0; i < block_size;) {
offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
}
}
if (num_r == 0) {
start_r = 0;
Iter it = last;
for (unsigned char i = 0; i < block_size;) {
offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
}
}
// Swap elements and update block sizes and first/last boundaries.
int num = std::min(num_l, num_r);
swap_offsets(first, last, offsets_l + start_l, offsets_r + start_r,
num, num_l == num_r);
num_l -= num; num_r -= num;
start_l += num; start_r += num;
if (num_l == 0) first += block_size;
if (num_r == 0) last -= block_size;
}
int l_size = 0, r_size = 0;
int unknown_left = (last - first) - ((num_r || num_l) ? block_size : 0);
if (num_r) {
// Handle leftover block by assigning the unknown elements to the other block.
l_size = unknown_left;
r_size = block_size;
} else if (num_l) {
l_size = block_size;
r_size = unknown_left;
} else {
// No leftover block, split the unknown elements in two blocks.
l_size = unknown_left/2;
r_size = unknown_left - l_size;
}
// Fill offset buffers if needed.
if (unknown_left && !num_l) {
start_l = 0;
Iter it = first;
for (unsigned char i = 0; i < l_size;) {
offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
}
}
if (unknown_left && !num_r) {
start_r = 0;
Iter it = last;
for (unsigned char i = 0; i < r_size;) {
offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
}
}
int num = std::min(num_l, num_r);
swap_offsets(first, last, offsets_l + start_l, offsets_r + start_r, num, num_l == num_r);
num_l -= num; num_r -= num;
start_l += num; start_r += num;
if (num_l == 0) first += l_size;
if (num_r == 0) last -= r_size;
// We have now fully identified [first, last)'s proper position. Swap the last elements.
if (num_l) {
offsets_l += start_l;
while (num_l--) std::iter_swap(first + offsets_l[num_l], --last);
first = last;
}
if (num_r) {
offsets_r += start_r;
while (num_r--) std::iter_swap(last - offsets_r[num_r], first), ++first;
last = first;
}
// Put the pivot in the right place.
Iter pivot_pos = first - 1;
*begin = PDQSORT_PREFER_MOVE(*pivot_pos);
*pivot_pos = PDQSORT_PREFER_MOVE(pivot);
return std::make_pair(pivot_pos, already_partitioned);
}
// Partitions [begin, end) around pivot *begin using comparison function comp. Elements equal
// to the pivot are put in the right-hand partition. Returns the position of the pivot after
// partitioning and whether the passed sequence already was correctly partitioned. Assumes the
// pivot is a median of at least 3 elements and that [begin, end) is at least
// insertion_sort_threshold long.
template<class Iter, class Compare>
inline std::pair<Iter, bool> partition_right(Iter begin, Iter end, Compare comp) {
typedef typename std::iterator_traits<Iter>::value_type T;
// Move pivot into local for speed.
T pivot(PDQSORT_PREFER_MOVE(*begin));
Iter first = begin;
Iter last = end;
// Find the first element greater than or equal than the pivot (the median of 3 guarantees
// this exists).
while (comp(*++first, pivot));
// Find the first element strictly smaller than the pivot. We have to guard this search if
// there was no element before *first.
if (first - 1 == begin) while (first < last && !comp(*--last, pivot));
else while ( !comp(*--last, pivot));
// If the first pair of elements that should be swapped to partition are the same element,
// the passed in sequence already was correctly partitioned.
bool already_partitioned = first >= last;
// Keep swapping pairs of elements that are on the wrong side of the pivot. Previously
// swapped pairs guard the searches, which is why the first iteration is special-cased
// above.
while (first < last) {
std::iter_swap(first, last);
while (comp(*++first, pivot));
while (!comp(*--last, pivot));
}
// Put the pivot in the right place.
Iter pivot_pos = first - 1;
*begin = PDQSORT_PREFER_MOVE(*pivot_pos);
*pivot_pos = PDQSORT_PREFER_MOVE(pivot);
return std::make_pair(pivot_pos, already_partitioned);
}
// Similar function to the one above, except elements equal to the pivot are put to the left of
// the pivot and it doesn't check or return if the passed sequence already was partitioned.
// Since this is rarely used (the many equal case), and in that case pdqsort already has O(n)
// performance, no block quicksort is applied here for simplicity.
template<class Iter, class Compare>
inline Iter partition_left(Iter begin, Iter end, Compare comp) {
typedef typename std::iterator_traits<Iter>::value_type T;
T pivot(PDQSORT_PREFER_MOVE(*begin));
Iter first = begin;
Iter last = end;
while (comp(pivot, *--last));
if (last + 1 == end) while (first < last && !comp(pivot, *++first));
else while ( !comp(pivot, *++first));
while (first < last) {
std::iter_swap(first, last);
while (comp(pivot, *--last));
while (!comp(pivot, *++first));
}
Iter pivot_pos = last;
*begin = PDQSORT_PREFER_MOVE(*pivot_pos);
*pivot_pos = PDQSORT_PREFER_MOVE(pivot);
return pivot_pos;
}
template<class Iter, class Compare, bool Branchless>
inline void pdqsort_loop(Iter begin, Iter end, Compare comp, int bad_allowed, bool leftmost = true) {
typedef typename std::iterator_traits<Iter>::difference_type diff_t;
// Use a while loop for tail recursion elimination.
while (true) {
diff_t size = end - begin;
// Insertion sort is faster for small arrays.
if (size < insertion_sort_threshold) {
if (leftmost) insertion_sort(begin, end, comp);
else unguarded_insertion_sort(begin, end, comp);
return;
}
// Choose pivot as median of 3 or pseudomedian of 9.
diff_t s2 = size / 2;
if (size > ninther_threshold) {
sort3(begin, begin + s2, end - 1, comp);
sort3(begin + 1, begin + (s2 - 1), end - 2, comp);
sort3(begin + 2, begin + (s2 + 1), end - 3, comp);
sort3(begin + (s2 - 1), begin + s2, begin + (s2 + 1), comp);
std::iter_swap(begin, begin + s2);
} else sort3(begin + s2, begin, end - 1, comp);
// If *(begin - 1) is the end of the right partition of a previous partition operation
// there is no element in [begin, end) that is smaller than *(begin - 1). Then if our
// pivot compares equal to *(begin - 1) we change strategy, putting equal elements in
// the left partition, greater elements in the right partition. We do not have to
// recurse on the left partition, since it's sorted (all equal).
if (!leftmost && !comp(*(begin - 1), *begin)) {
begin = partition_left(begin, end, comp) + 1;
continue;
}
// Partition and get results.
std::pair<Iter, bool> part_result =
Branchless ? partition_right_branchless(begin, end, comp)
: partition_right(begin, end, comp);
Iter pivot_pos = part_result.first;
bool already_partitioned = part_result.second;
// Check for a highly unbalanced partition.
diff_t l_size = pivot_pos - begin;
diff_t r_size = end - (pivot_pos + 1);
bool highly_unbalanced = l_size < size / 8 || r_size < size / 8;
// If we got a highly unbalanced partition we shuffle elements to break many patterns.
if (highly_unbalanced) {
// If we had too many bad partitions, switch to heapsort to guarantee O(n log n).
if (--bad_allowed == 0) {
std::make_heap(begin, end, comp);
std::sort_heap(begin, end, comp);
return;
}
if (l_size >= insertion_sort_threshold) {
std::iter_swap(begin, begin + l_size / 4);
std::iter_swap(pivot_pos - 1, pivot_pos - l_size / 4);
if (l_size > ninther_threshold) {
std::iter_swap(begin + 1, begin + (l_size / 4 + 1));
std::iter_swap(begin + 2, begin + (l_size / 4 + 2));
std::iter_swap(pivot_pos - 2, pivot_pos - (l_size / 4 + 1));
std::iter_swap(pivot_pos - 3, pivot_pos - (l_size / 4 + 2));
}
}
if (r_size >= insertion_sort_threshold) {
std::iter_swap(pivot_pos + 1, pivot_pos + (1 + r_size / 4));
std::iter_swap(end - 1, end - r_size / 4);
if (r_size > ninther_threshold) {
std::iter_swap(pivot_pos + 2, pivot_pos + (2 + r_size / 4));
std::iter_swap(pivot_pos + 3, pivot_pos + (3 + r_size / 4));
std::iter_swap(end - 2, end - (1 + r_size / 4));
std::iter_swap(end - 3, end - (2 + r_size / 4));
}
}
} else {
// If we were decently balanced and we tried to sort an already partitioned
// sequence try to use insertion sort.
if (already_partitioned && partial_insertion_sort(begin, pivot_pos, comp)
&& partial_insertion_sort(pivot_pos + 1, end, comp)) return;
}
// Sort the left partition first using recursion and do tail recursion elimination for
// the right-hand partition.
pdqsort_loop<Iter, Compare, Branchless>(begin, pivot_pos, comp, bad_allowed, leftmost);
begin = pivot_pos + 1;
leftmost = false;
}
}
}
template<class Iter, class Compare>
inline void pdqsort(Iter begin, Iter end, Compare comp) {
if (begin == end) return;
#if __cplusplus >= 201103L
pdqsort_detail::pdqsort_loop<Iter, Compare,
pdqsort_detail::is_default_compare<typename std::decay<Compare>::type>::value &&
std::is_arithmetic<typename std::iterator_traits<Iter>::value_type>::value>(
begin, end, comp, pdqsort_detail::log2(end - begin));
#else
pdqsort_detail::pdqsort_loop<Iter, Compare, false>(
begin, end, comp, pdqsort_detail::log2(end - begin));
#endif
}
template<class Iter>
inline void pdqsort(Iter begin, Iter end) {
typedef typename std::iterator_traits<Iter>::value_type T;
pdqsort(begin, end, std::less<T>());
}
template<class Iter, class Compare>
inline void pdqsort_branchless(Iter begin, Iter end, Compare comp) {
if (begin == end) return;
pdqsort_detail::pdqsort_loop<Iter, Compare, true>(
begin, end, comp, pdqsort_detail::log2(end - begin));
}
template<class Iter>
inline void pdqsort_branchless(Iter begin, Iter end) {
typedef typename std::iterator_traits<Iter>::value_type T;
pdqsort_branchless(begin, end, std::less<T>());
}
#undef PDQSORT_PREFER_MOVE
#endif

119
contrib/pdqsort/readme.md Normal file
View File

@ -0,0 +1,119 @@
pdqsort
-------
Pattern-defeating quicksort (pdqsort) is a novel sorting algorithm that combines the fast average
case of randomized quicksort with the fast worst case of heapsort, while achieving linear time on
inputs with certain patterns. pdqsort is an extension and improvement of David Mussers introsort.
All code is available for free under the zlib license.
Best Average Worst Memory Stable Deterministic
n n log n n log n log n No Yes
### Usage
`pdqsort` is a drop-in replacement for [`std::sort`](http://en.cppreference.com/w/cpp/algorithm/sort).
Just replace a call to `std::sort` with `pdqsort` to start using pattern-defeating quicksort. If your
comparison function is branchless, you can call `pdqsort_branchless` for a potential big speedup. If
you are using C++11, the type you're sorting is arithmetic and your comparison function is not given
or is `std::less`/`std::greater`, `pdqsort` automatically delegates to `pdqsort_branchless`.
### Benchmark
A comparison of pdqsort and GCC's `std::sort` and `std::stable_sort` with various input
distributions:
![Performance graph](http://i.imgur.com/1RnIGBO.png)
Compiled with `-std=c++11 -O2 -m64 -march=native`.
### Visualization
A visualization of pattern-defeating quicksort sorting a ~200 element array with some duplicates.
Generated using Timo Bingmann's [The Sound of Sorting](http://panthema.net/2013/sound-of-sorting/)
program, a tool that has been invaluable during the development of pdqsort. For the purposes of
this visualization the cutoff point for insertion sort was lowered to 8 elements.
![Visualization](http://i.imgur.com/QzFG09F.gif)
### The best case
pdqsort is designed to run in linear time for a couple of best-case patterns. Linear time is
achieved for inputs that are in strictly ascending or descending order, only contain equal elements,
or are strictly in ascending order followed by one out-of-place element. There are two separate
mechanisms at play to achieve this.
For equal elements a smart partitioning scheme is used that always puts equal elements in the
partition containing elements greater than the pivot. When a new pivot is chosen it's compared to
the greatest element in the partition before it. If they compare equal we can derive that there are
no elements smaller than the chosen pivot. When this happens we switch strategy for this partition,
and filter out all elements equal to the pivot.
To get linear time for the other patterns we check after every partition if any swaps were made. If
no swaps were made and the partition was decently balanced we will optimistically attempt to use
insertion sort. This insertion sort aborts if more than a constant amount of moves are required to
sort.
### The average case
On average case data where no patterns are detected pdqsort is effectively a quicksort that uses
median-of-3 pivot selection, switching to insertion sort if the number of elements to be
(recursively) sorted is small. The overhead associated with detecting the patterns for the best case
is so small it lies within the error of measurement.
pdqsort gets a great speedup over the traditional way of implementing quicksort when sorting large
arrays (1000+ elements). This is due to a new technique described in "BlockQuicksort: How Branch
Mispredictions don't affect Quicksort" by Stefan Edelkamp and Armin Weiss. In short, we bypass the
branch predictor by using small buffers (entirely in L1 cache) of the indices of elements that need
to be swapped. We fill these buffers in a branch-free way that's quite elegant (in pseudocode):
```cpp
buffer_num = 0; buffer_max_size = 64;
for (int i = 0; i < buffer_max_size; ++i) {
// With branch:
if (elements[i] < pivot) { buffer[buffer_num] = i; buffer_num++; }
// Without:
buffer[buffer_num] = i; buffer_num += (elements[i] < pivot);
}
```
This is only a speedup if the comparison function itself is branchless, however. By default pdqsort
will detect this if you're using C++11 or higher, the type you're sorting is arithmetic (e.g.
`int`), and you're using either `std::less` or `std::greater`. You can explicitly request branchless
partitioning by calling `pdqsort_branchless` instead of `pdqsort`.
### The worst case
Quicksort naturally performs bad on inputs that form patterns, due to it being a partition-based
sort. Choosing a bad pivot will result in many comparisons that give little to no progress in the
sorting process. If the pattern does not get broken up, this can happen many times in a row. Worse,
real world data is filled with these patterns.
Traditionally the solution to this is to randomize the pivot selection of quicksort. While this
technically still allows for a quadratic worst case, the chances of it happening are astronomically
small. Later, in introsort, pivot selection is kept deterministic, instead switching to the
guaranteed O(n log n) heapsort if the recursion depth becomes too big. In pdqsort we adopt a hybrid
approach, (deterministically) shuffling some elements to break up patterns when we encounter a "bad"
partition. If we encounter too many "bad" partitions we switch to heapsort.
### Bad partitions
A bad partition occurs when the position of the pivot after partitioning is under 12.5% (1/8th)
percentile or over 87,5% percentile - the partition is highly unbalanced. When this happens we will
shuffle four elements at fixed locations for both partitions. This effectively breaks up many
patterns. If we encounter more than log(n) bad partitions we will switch to heapsort.
The 1/8th percentile is not chosen arbitrarily. An upper bound of quicksorts worst case runtime can
be approximated within a constant factor by the following recurrence:
T(n, p) = n + T(p(n-1), p) + T((1-p)(n-1), p)
Where n is the number of elements, and p is the percentile of the pivot after partitioning.
`T(n, 1/2)` is the best case for quicksort. On modern systems heapsort is profiled to be
approximately 1.8 to 2 times as slow as quicksort. Choosing p such that `T(n, 1/2) / T(n, p) ~= 1.9`
as n gets big will ensure that we will only switch to heapsort if it would speed up the sorting.
p = 1/8 is a reasonably close value and is cheap to compute on every platform using a bitshift.

View File

@ -102,7 +102,9 @@ add_headers_and_sources(dbms src/Interpreters/ClusterProxy)
add_headers_and_sources(dbms src/Columns)
add_headers_and_sources(dbms src/Storages)
add_headers_and_sources(dbms src/Storages/Distributed)
add_headers_and_sources(dbms src/Storages/Kafka)
if(USE_RDKAFKA)
add_headers_and_sources(dbms src/Storages/Kafka)
endif()
add_headers_and_sources(dbms src/Storages/MergeTree)
add_headers_and_sources(dbms src/Client)
add_headers_and_sources(dbms src/Formats)

View File

@ -1,11 +1,11 @@
# This strings autochanged from release_lib.sh:
set(VERSION_REVISION 54414)
set(VERSION_REVISION 54415)
set(VERSION_MAJOR 19)
set(VERSION_MINOR 2)
set(VERSION_MINOR 3)
set(VERSION_PATCH 0)
set(VERSION_GITHASH dcfca1355468a2d083b33c867effa8f79642ed6e)
set(VERSION_DESCRIBE v19.2.0-testing)
set(VERSION_STRING 19.2.0)
set(VERSION_GITHASH 1db4bd8c2a1a0cd610c8a6564e8194dca5265562)
set(VERSION_DESCRIBE v19.3.0-testing)
set(VERSION_STRING 19.3.0)
# end of autochange
set(VERSION_EXTRA "" CACHE STRING "")

View File

@ -724,7 +724,11 @@ private:
try
{
if (!processSingleQuery(str, ast) && !ignore_error)
auto ast_to_process = ast;
if (insert && insert->data)
ast_to_process = nullptr;
if (!processSingleQuery(str, ast_to_process) && !ignore_error)
return false;
}
catch (...)

View File

@ -18,6 +18,32 @@ namespace ErrorCodes
extern const int NOT_IMPLEMENTED;
}
namespace
{
void waitQuery(Connection & connection)
{
bool finished = false;
while (true)
{
if (!connection.poll(1000000))
continue;
Connection::Packet packet = connection.receivePacket();
switch (packet.type)
{
case Protocol::Server::EndOfStream:
finished = true;
break;
case Protocol::Server::Exception:
throw *packet.exception;
}
if (finished)
break;
}
}
}
namespace fs = boost::filesystem;
PerformanceTest::PerformanceTest(
@ -135,14 +161,18 @@ void PerformanceTest::prepare() const
{
for (const auto & query : test_info.create_queries)
{
LOG_INFO(log, "Executing create query '" << query << "'");
connection.sendQuery(query);
LOG_INFO(log, "Executing create query \"" << query << '\"');
connection.sendQuery(query, "", QueryProcessingStage::Complete, &test_info.settings, nullptr, false);
waitQuery(connection);
LOG_INFO(log, "Query finished");
}
for (const auto & query : test_info.fill_queries)
{
LOG_INFO(log, "Executing fill query '" << query << "'");
connection.sendQuery(query);
LOG_INFO(log, "Executing fill query \"" << query << '\"');
connection.sendQuery(query, "", QueryProcessingStage::Complete, &test_info.settings, nullptr, false);
waitQuery(connection);
LOG_INFO(log, "Query finished");
}
}
@ -151,8 +181,10 @@ void PerformanceTest::finish() const
{
for (const auto & query : test_info.drop_queries)
{
LOG_INFO(log, "Executing drop query '" << query << "'");
connection.sendQuery(query);
LOG_INFO(log, "Executing drop query \"" << query << '\"');
connection.sendQuery(query, "", QueryProcessingStage::Complete, &test_info.settings, nullptr, false);
waitQuery(connection);
LOG_INFO(log, "Query finished");
}
}
@ -208,7 +240,7 @@ void PerformanceTest::runQueries(
statistics.startWatches();
try
{
executeQuery(connection, query, statistics, stop_conditions, interrupt_listener, context);
executeQuery(connection, query, statistics, stop_conditions, interrupt_listener, context, test_info.settings);
if (test_info.exec_type == ExecutionType::Loop)
{
@ -222,7 +254,7 @@ void PerformanceTest::runQueries(
break;
}
executeQuery(connection, query, statistics, stop_conditions, interrupt_listener, context);
executeQuery(connection, query, statistics, stop_conditions, interrupt_listener, context, test_info.settings);
}
}
}

View File

@ -170,11 +170,13 @@ private:
for (auto & test_config : tests_configurations)
{
auto [output, signal] = runTest(test_config);
if (lite_output)
std::cout << output;
else
outputs.push_back(output);
if (!output.empty())
{
if (lite_output)
std::cout << output;
else
outputs.push_back(output);
}
if (signal)
break;
}
@ -203,26 +205,32 @@ private:
LOG_INFO(log, "Config for test '" << info.test_name << "' parsed");
PerformanceTest current(test_config, connection, interrupt_listener, info, global_context, query_indexes[info.path]);
current.checkPreconditions();
LOG_INFO(log, "Preconditions for test '" << info.test_name << "' are fullfilled");
LOG_INFO(log, "Preparing for run, have " << info.create_queries.size()
<< " create queries and " << info.fill_queries.size() << " fill queries");
current.prepare();
LOG_INFO(log, "Prepared");
LOG_INFO(log, "Running test '" << info.test_name << "'");
auto result = current.execute();
LOG_INFO(log, "Test '" << info.test_name << "' finished");
if (current.checkPreconditions())
{
LOG_INFO(log, "Preconditions for test '" << info.test_name << "' are fullfilled");
LOG_INFO(
log,
"Preparing for run, have " << info.create_queries.size() << " create queries and " << info.fill_queries.size()
<< " fill queries");
current.prepare();
LOG_INFO(log, "Prepared");
LOG_INFO(log, "Running test '" << info.test_name << "'");
auto result = current.execute();
LOG_INFO(log, "Test '" << info.test_name << "' finished");
LOG_INFO(log, "Running post run queries");
current.finish();
LOG_INFO(log, "Postqueries finished");
if (lite_output)
return {report_builder->buildCompactReport(info, result, query_indexes[info.path]), current.checkSIGINT()};
LOG_INFO(log, "Running post run queries");
current.finish();
LOG_INFO(log, "Postqueries finished");
if (lite_output)
return {report_builder->buildCompactReport(info, result, query_indexes[info.path]), current.checkSIGINT()};
else
return {report_builder->buildFullReport(info, result, query_indexes[info.path]), current.checkSIGINT()};
}
else
return {report_builder->buildFullReport(info, result, query_indexes[info.path]), current.checkSIGINT()};
}
LOG_INFO(log, "Preconditions for test '" << info.test_name << "' are not fullfilled, skip run");
return {"", current.checkSIGINT()};
}
};
}

View File

@ -44,14 +44,14 @@ void executeQuery(
TestStats & statistics,
TestStopConditions & stop_conditions,
InterruptListener & interrupt_listener,
Context & context)
Context & context,
const Settings & settings)
{
statistics.watch_per_query.restart();
statistics.last_query_was_cancelled = false;
statistics.last_query_rows_read = 0;
statistics.last_query_bytes_read = 0;
Settings settings;
RemoteBlockInputStream stream(connection, query, {}, context, &settings);
stream.setProgressCallback(

View File

@ -4,6 +4,7 @@
#include "TestStopConditions.h"
#include <Common/InterruptListener.h>
#include <Interpreters/Context.h>
#include <Interpreters/Settings.h>
#include <Client/Connection.h>
namespace DB
@ -14,5 +15,6 @@ void executeQuery(
TestStats & statistics,
TestStopConditions & stop_conditions,
InterruptListener & interrupt_listener,
Context & context);
Context & context,
const Settings & settings);
}

View File

@ -416,6 +416,7 @@ namespace ErrorCodes
extern const int CANNOT_SCHEDULE_TASK = 439;
extern const int INVALID_LIMIT_EXPRESSION = 440;
extern const int CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING = 441;
extern const int BAD_DATABASE_FOR_TEMPORARY_TABLE = 442;
extern const int KEEPER_EXCEPTION = 999;
extern const int POCO_EXCEPTION = 1000;

View File

@ -130,9 +130,10 @@ private:
/**
* prompter for names, if a person makes a typo for some function or type, it
* helps to find best possible match (in particular, edit distance is one or two symbols)
* helps to find best possible match (in particular, edit distance is done like in clang
* (max edit distance is (typo.size() + 2) / 3)
*/
NamePrompter</*MistakeFactor=*/2, /*MaxNumHints=*/2> prompter;
NamePrompter</*MaxNumHints=*/2> prompter;
};
}

View File

@ -4,12 +4,13 @@
#include <algorithm>
#include <cctype>
#include <cmath>
#include <queue>
#include <utility>
namespace DB
{
template <size_t MistakeFactor, size_t MaxNumHints>
template <size_t MaxNumHints>
class NamePrompter
{
public:
@ -53,10 +54,18 @@ private:
static void appendToQueue(size_t ind, const String & name, DistanceIndexQueue & queue, const std::vector<String> & prompting_strings)
{
if (prompting_strings[ind].size() <= name.size() + MistakeFactor && prompting_strings[ind].size() + MistakeFactor >= name.size())
const String & prompt = prompting_strings[ind];
/// Clang SimpleTypoCorrector logic
const size_t min_possible_edit_distance = std::abs(static_cast<int64_t>(name.size()) - static_cast<int64_t>(prompt.size()));
const size_t mistake_factor = (name.size() + 2) / 3;
if (min_possible_edit_distance > 0 && name.size() / min_possible_edit_distance < 3)
return;
if (prompt.size() <= name.size() + mistake_factor && prompt.size() + mistake_factor >= name.size())
{
size_t distance = levenshteinDistance(prompting_strings[ind], name);
if (distance <= MistakeFactor)
size_t distance = levenshteinDistance(prompt, name);
if (distance <= mistake_factor)
{
queue.emplace(distance, ind);
if (queue.size() > MaxNumHints)

View File

@ -258,7 +258,7 @@ protected:
Block extremes;
void addChild(BlockInputStreamPtr & child)
void addChild(const BlockInputStreamPtr & child)
{
std::unique_lock lock(children_mutex);
children.push_back(child);

View File

@ -17,7 +17,7 @@ namespace ErrorCodes
InputStreamFromASTInsertQuery::InputStreamFromASTInsertQuery(
const ASTPtr & ast, ReadBuffer & input_buffer_tail_part, const BlockIO & streams, Context & context)
const ASTPtr & ast, ReadBuffer * input_buffer_tail_part, const Block & header, const Context & context)
{
const ASTInsertQuery * ast_insert_query = dynamic_cast<const ASTInsertQuery *>(ast.get());
@ -36,7 +36,9 @@ InputStreamFromASTInsertQuery::InputStreamFromASTInsertQuery(
ConcatReadBuffer::ReadBuffers buffers;
if (ast_insert_query->data)
buffers.push_back(input_buffer_ast_part.get());
buffers.push_back(&input_buffer_tail_part);
if (input_buffer_tail_part)
buffers.push_back(input_buffer_tail_part);
/** NOTE Must not read from 'input_buffer_tail_part' before read all between 'ast_insert_query.data' and 'ast_insert_query.end'.
* - because 'query.data' could refer to memory piece, used as buffer for 'input_buffer_tail_part'.
@ -44,7 +46,7 @@ InputStreamFromASTInsertQuery::InputStreamFromASTInsertQuery(
input_buffer_contacenated = std::make_unique<ConcatReadBuffer>(buffers);
res_stream = context.getInputFormat(format, *input_buffer_contacenated, streams.out->getHeader(), context.getSettings().max_insert_block_size);
res_stream = context.getInputFormat(format, *input_buffer_contacenated, header, context.getSettings().max_insert_block_size);
auto columns_description = ColumnsDescription::loadFromContext(context, ast_insert_query->database, ast_insert_query->table);
if (columns_description && !columns_description->defaults.empty())

View File

@ -19,7 +19,7 @@ class Context;
class InputStreamFromASTInsertQuery : public IBlockInputStream
{
public:
InputStreamFromASTInsertQuery(const ASTPtr & ast, ReadBuffer & input_buffer_tail_part, const BlockIO & streams, Context & context);
InputStreamFromASTInsertQuery(const ASTPtr & ast, ReadBuffer * input_buffer_tail_part, const Block & header, const Context & context);
Block readImpl() override { return res_stream->read(); }
void readPrefixImpl() override { return res_stream->readPrefix(); }

View File

@ -34,8 +34,7 @@ static inline bool typeIsSigned(const IDataType & type)
{
return typeIsEither<
DataTypeInt8, DataTypeInt16, DataTypeInt32, DataTypeInt64,
DataTypeFloat32, DataTypeFloat64,
DataTypeDate, DataTypeDateTime, DataTypeInterval
DataTypeFloat32, DataTypeFloat64, DataTypeInterval
>(type);
}

View File

@ -12,8 +12,13 @@ namespace DB
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int TOO_LARGE_ARRAY_SIZE;
}
/// Reasonable threshold.
static constexpr size_t max_arrays_size_in_block = 1000000000;
/* arrayWithConstant(num, const) - make array of constants with length num.
* arrayWithConstant(3, 'hello') = ['hello', 'hello', 'hello']
* arrayWithConstant(1, 'hello') = ['hello']
@ -55,6 +60,8 @@ public:
for (size_t i = 0; i < num_rows; ++i)
{
offset += col_num->getUInt(i);
if (unlikely(offset > max_arrays_size_in_block))
throw Exception("Too large array size while executing function " + getName(), ErrorCodes::TOO_LARGE_ARRAY_SIZE);
offsets.push_back(offset);
}

View File

@ -24,6 +24,7 @@ void registerFunctionToStartOfFiveMinute(FunctionFactory &);
void registerFunctionToStartOfTenMinutes(FunctionFactory &);
void registerFunctionToStartOfFifteenMinutes(FunctionFactory &);
void registerFunctionToStartOfHour(FunctionFactory &);
void registerFunctionToStartOfInterval(FunctionFactory &);
void registerFunctionToStartOfISOYear(FunctionFactory &);
void registerFunctionToRelativeYearNum(FunctionFactory &);
void registerFunctionToRelativeQuarterNum(FunctionFactory &);
@ -86,6 +87,7 @@ void registerFunctionsDateTime(FunctionFactory & factory)
registerFunctionToStartOfTenMinutes(factory);
registerFunctionToStartOfFifteenMinutes(factory);
registerFunctionToStartOfHour(factory);
registerFunctionToStartOfInterval(factory);
registerFunctionToStartOfISOYear(factory);
registerFunctionToRelativeYearNum(factory);
registerFunctionToRelativeQuarterNum(factory);

View File

@ -0,0 +1,301 @@
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeInterval.h>
#include <Functions/DateTimeTransforms.h>
#include <Functions/FunctionFactory.h>
#include <Functions/IFunction.h>
#include <IO/WriteHelpers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ARGUMENT_OUT_OF_BOUND;
}
namespace
{
static constexpr auto function_name = "toStartOfInterval";
template <DataTypeInterval::Kind unit>
struct Transform;
template <>
struct Transform<DataTypeInterval::Year>
{
static UInt16 execute(UInt16 d, UInt64 years, const DateLUTImpl & time_zone)
{
return time_zone.toStartOfYearInterval(DayNum(d), years);
}
static UInt16 execute(UInt32 t, UInt64 years, const DateLUTImpl & time_zone)
{
return time_zone.toStartOfYearInterval(time_zone.toDayNum(t), years);
}
};
template <>
struct Transform<DataTypeInterval::Quarter>
{
static UInt16 execute(UInt16 d, UInt64 quarters, const DateLUTImpl & time_zone)
{
return time_zone.toStartOfQuarterInterval(DayNum(d), quarters);
}
static UInt16 execute(UInt32 t, UInt64 quarters, const DateLUTImpl & time_zone)
{
return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t), quarters);
}
};
template <>
struct Transform<DataTypeInterval::Month>
{
static UInt16 execute(UInt16 d, UInt64 months, const DateLUTImpl & time_zone)
{
return time_zone.toStartOfMonthInterval(DayNum(d), months);
}
static UInt16 execute(UInt32 t, UInt64 months, const DateLUTImpl & time_zone)
{
return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t), months);
}
};
template <>
struct Transform<DataTypeInterval::Week>
{
static UInt16 execute(UInt16 d, UInt64 weeks, const DateLUTImpl & time_zone)
{
return time_zone.toStartOfWeekInterval(DayNum(d), weeks);
}
static UInt16 execute(UInt32 t, UInt64 weeks, const DateLUTImpl & time_zone)
{
return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t), weeks);
}
};
template <>
struct Transform<DataTypeInterval::Day>
{
static UInt32 execute(UInt16 d, UInt64 days, const DateLUTImpl & time_zone)
{
return time_zone.toStartOfDayInterval(DayNum(d), days);
}
static UInt32 execute(UInt32 t, UInt64 days, const DateLUTImpl & time_zone)
{
return time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days);
}
};
template <>
struct Transform<DataTypeInterval::Hour>
{
static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); }
static UInt32 execute(UInt32 t, UInt64 hours, const DateLUTImpl & time_zone) { return time_zone.toStartOfHourInterval(t, hours); }
};
template <>
struct Transform<DataTypeInterval::Minute>
{
static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); }
static UInt32 execute(UInt32 t, UInt64 minutes, const DateLUTImpl & time_zone)
{
return time_zone.toStartOfMinuteInterval(t, minutes);
}
};
template <>
struct Transform<DataTypeInterval::Second>
{
static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); }
static UInt32 execute(UInt32 t, UInt64 seconds, const DateLUTImpl & time_zone)
{
return time_zone.toStartOfSecondInterval(t, seconds);
}
};
}
class FunctionToStartOfInterval : public IFunction
{
public:
static FunctionPtr create(const Context &) { return std::make_shared<FunctionToStartOfInterval>(); }
static constexpr auto name = function_name;
String getName() const override { return name; }
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
auto check_date_time_argument = [&] {
if (!isDateOrDateTime(arguments[0].type))
throw Exception(
"Illegal type " + arguments[0].type->getName() + " of argument of function " + getName()
+ ". Should be a date or a date with time",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
};
const DataTypeInterval * interval_type = nullptr;
auto check_interval_argument = [&] {
interval_type = checkAndGetDataType<DataTypeInterval>(arguments[1].type.get());
if (!interval_type)
throw Exception(
"Illegal type " + arguments[1].type->getName() + " of argument of function " + getName()
+ ". Should be an interval of time",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
};
auto check_timezone_argument = [&] {
if (!WhichDataType(arguments[2].type).isString())
throw Exception(
"Illegal type " + arguments[2].type->getName() + " of argument of function " + getName()
+ ". This argument is optional and must be a constant string with timezone name"
". This argument is allowed only when the 1st argument has the type DateTime",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
};
if (arguments.size() == 2)
{
check_date_time_argument();
check_interval_argument();
}
else if (arguments.size() == 3)
{
check_date_time_argument();
check_interval_argument();
check_timezone_argument();
}
else
{
throw Exception(
"Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size())
+ ", should be 2 or 3",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
}
if ((interval_type->getKind() == DataTypeInterval::Second) || (interval_type->getKind() == DataTypeInterval::Minute)
|| (interval_type->getKind() == DataTypeInterval::Hour) || (interval_type->getKind() == DataTypeInterval::Day))
return std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0));
else
return std::make_shared<DataTypeDate>();
}
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; }
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /* input_rows_count */) override
{
const auto & time_column = block.getByPosition(arguments[0]);
const auto & interval_column = block.getByPosition(arguments[1]);
const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(block, arguments, 2, 0);
auto result_column = dispatchForColumns(time_column, interval_column, time_zone);
block.getByPosition(result).column = std::move(result_column);
}
bool hasInformationAboutMonotonicity() const override
{
return true;
}
Monotonicity getMonotonicityForRange(const IDataType &, const Field &, const Field &) const override
{
return { true, true, true };
}
private:
ColumnPtr dispatchForColumns(
const ColumnWithTypeAndName & time_column, const ColumnWithTypeAndName & interval_column, const DateLUTImpl & time_zone)
{
if (WhichDataType(time_column.type.get()).isDateTime())
{
const auto * time_column_vec = checkAndGetColumn<ColumnUInt32>(time_column.column.get());
if (time_column_vec)
return dispatchForIntervalColumn(*time_column_vec, interval_column, time_zone);
}
if (WhichDataType(time_column.type.get()).isDate())
{
const auto * time_column_vec = checkAndGetColumn<ColumnUInt16>(time_column.column.get());
if (time_column_vec)
return dispatchForIntervalColumn(*time_column_vec, interval_column, time_zone);
}
throw Exception(
"Illegal column for first argument of function " + getName() + ". Must contain dates or dates with time",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
template <typename FromType>
ColumnPtr dispatchForIntervalColumn(
const ColumnVector<FromType> & time_column, const ColumnWithTypeAndName & interval_column, const DateLUTImpl & time_zone)
{
const auto * interval_type = checkAndGetDataType<DataTypeInterval>(interval_column.type.get());
if (!interval_type)
throw Exception(
"Illegal column for second argument of function " + getName() + ", must be an interval of time.",
ErrorCodes::ILLEGAL_COLUMN);
const auto * interval_column_const_int64 = checkAndGetColumnConst<ColumnInt64>(interval_column.column.get());
if (!interval_column_const_int64)
throw Exception(
"Illegal column for second argument of function " + getName() + ", must be a const interval of time.", ErrorCodes::ILLEGAL_COLUMN);
Int64 num_units = interval_column_const_int64->getValue<Int64>();
if (num_units <= 0)
throw Exception("Value for second argument of function " + getName() + " must be positive.", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
switch (interval_type->getKind())
{
case DataTypeInterval::Second:
return execute<FromType, UInt32, DataTypeInterval::Second>(time_column, num_units, time_zone);
case DataTypeInterval::Minute:
return execute<FromType, UInt32, DataTypeInterval::Minute>(time_column, num_units, time_zone);
case DataTypeInterval::Hour:
return execute<FromType, UInt32, DataTypeInterval::Hour>(time_column, num_units, time_zone);
case DataTypeInterval::Day:
return execute<FromType, UInt32, DataTypeInterval::Day>(time_column, num_units, time_zone);
case DataTypeInterval::Week:
return execute<FromType, UInt16, DataTypeInterval::Week>(time_column, num_units, time_zone);
case DataTypeInterval::Month:
return execute<FromType, UInt16, DataTypeInterval::Month>(time_column, num_units, time_zone);
case DataTypeInterval::Quarter:
return execute<FromType, UInt16, DataTypeInterval::Quarter>(time_column, num_units, time_zone);
case DataTypeInterval::Year:
return execute<FromType, UInt16, DataTypeInterval::Year>(time_column, num_units, time_zone);
}
__builtin_unreachable();
}
template <typename FromType, typename ToType, DataTypeInterval::Kind unit>
ColumnPtr execute(const ColumnVector<FromType> & time_column, UInt64 num_units, const DateLUTImpl & time_zone)
{
const auto & time_data = time_column.getData();
size_t size = time_column.size();
auto result = ColumnVector<ToType>::create();
auto & result_data = result->getData();
result_data.resize(size);
for (size_t i = 0; i != size; ++i)
result_data[i] = Transform<unit>::execute(time_data[i], num_units, time_zone);
return result;
}
};
void registerFunctionToStartOfInterval(FunctionFactory & factory)
{
factory.registerFunction<FunctionToStartOfInterval>();
}
}

View File

@ -0,0 +1,52 @@
#pragma once
#include <IO/ReadBuffer.h>
#include <Common/typeid_cast.h>
namespace DB
{
/// Consistently reads from one sub-buffer in a circle, and delimits its output with a character.
/// Owns sub-buffer.
class DelimitedReadBuffer : public ReadBuffer
{
public:
DelimitedReadBuffer(ReadBuffer * buffer_, char delimiter_) : ReadBuffer(nullptr, 0), buffer(buffer_), delimiter(delimiter_)
{
// TODO: check that `buffer_` is not nullptr.
}
template <class BufferType>
BufferType * subBufferAs()
{
return typeid_cast<BufferType *>(buffer.get());
}
protected:
// XXX: don't know how to guarantee that the next call to this method is done after we read all previous data.
bool nextImpl() override
{
if (put_delimiter)
{
BufferBase::set(&delimiter, 1, 0);
put_delimiter = false;
}
else
{
if (!buffer->next())
return false;
BufferBase::set(buffer->position(), buffer->available(), 0);
put_delimiter = true;
}
return true;
}
private:
std::unique_ptr<ReadBuffer> buffer; // FIXME: should be `const`, but `ReadBuffer` doesn't allow
char delimiter; // FIXME: should be `const`, but `ReadBuffer` doesn't allow
bool put_delimiter = false;
};
} // namespace DB

View File

@ -41,6 +41,7 @@ public:
*/
ReadBuffer(Position ptr, size_t size, size_t offset) : BufferBase(ptr, size, offset) {}
// FIXME: behavior differs greately from `BufferBase::set()` and it's very confusing.
void set(Position ptr, size_t size) { BufferBase::set(ptr, size, 0); working_buffer.resize(0); }
/** read next data and fill a buffer with it; set position to the beginning;

View File

@ -812,7 +812,7 @@ void ExpressionActions::finalize(const Names & output_columns)
/// This has to be done before removing redundant actions and inserting REMOVE_COLUMNs
/// because inlining may change dependency sets.
if (settings.compile_expressions)
compileFunctions(actions, output_columns, sample_block, compilation_cache, settings.min_count_to_compile);
compileFunctions(actions, output_columns, sample_block, compilation_cache, settings.min_count_to_compile_expression);
#endif
/// Which columns are needed to perform actions from the current to the last.

View File

@ -654,7 +654,7 @@ std::vector<std::unordered_set<std::optional<size_t>>> getActionsDependents(cons
return dependents;
}
void compileFunctions(ExpressionActions::Actions & actions, const Names & output_columns, const Block & sample_block, std::shared_ptr<CompiledExpressionCache> compilation_cache, size_t min_count_to_compile)
void compileFunctions(ExpressionActions::Actions & actions, const Names & output_columns, const Block & sample_block, std::shared_ptr<CompiledExpressionCache> compilation_cache, size_t min_count_to_compile_expression)
{
static std::unordered_map<UInt128, UInt32, UInt128Hash> counter;
static std::mutex mutex;
@ -688,7 +688,7 @@ void compileFunctions(ExpressionActions::Actions & actions, const Names & output
auto hash_key = ExpressionActions::ActionsHash{}(fused[i]);
{
std::lock_guard lock(mutex);
if (counter[hash_key]++ < min_count_to_compile)
if (counter[hash_key]++ < min_count_to_compile_expression)
continue;
}

View File

@ -73,7 +73,7 @@ public:
/// For each APPLY_FUNCTION action, try to compile the function to native code; if the only uses of a compilable
/// function's result are as arguments to other compilable functions, inline it and leave the now-redundant action as-is.
void compileFunctions(ExpressionActions::Actions & actions, const Names & output_columns, const Block & sample_block, std::shared_ptr<CompiledExpressionCache> compilation_cache, size_t min_count_to_compile);
void compileFunctions(ExpressionActions::Actions & actions, const Names & output_columns, const Block & sample_block, std::shared_ptr<CompiledExpressionCache> compilation_cache, size_t min_count_to_compile_expression);
}

View File

@ -63,6 +63,7 @@ namespace ErrorCodes
extern const int DATABASE_ALREADY_EXISTS;
extern const int QUERY_IS_PROHIBITED;
extern const int THERE_IS_NO_DEFAULT_VALUE;
extern const int BAD_DATABASE_FOR_TEMPORARY_TABLE;
}
@ -547,6 +548,11 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
return executeDDLQueryOnCluster(query_ptr, context, std::move(databases));
}
/// Temporary tables are created out of databases.
if (create.temporary && !create.database.empty())
throw Exception("Temporary tables cannot be inside a database. You should not specify a database for a temporary table.",
ErrorCodes::BAD_DATABASE_FOR_TEMPORARY_TABLE);
String path = context.getPath();
String current_database = context.getCurrentDatabase();

View File

@ -1,13 +1,17 @@
#include <IO/ConcatReadBuffer.h>
#include <IO/ReadBufferFromMemory.h>
#include <Common/typeid_cast.h>
#include <DataStreams/AddingDefaultBlockOutputStream.h>
#include <DataStreams/CountingBlockOutputStream.h>
#include <DataStreams/AddingDefaultsBlockInputStream.h>
#include <DataStreams/OwningBlockInputStream.h>
#include <DataStreams/ConvertingBlockInputStream.h>
#include <DataStreams/CountingBlockOutputStream.h>
#include <DataStreams/NullAndDoCopyBlockInputStream.h>
#include <DataStreams/PushingToViewsBlockOutputStream.h>
#include <DataStreams/SquashingBlockOutputStream.h>
#include <DataStreams/InputStreamFromASTInsertQuery.h>
#include <DataStreams/copyData.h>
#include <Parsers/ASTInsertQuery.h>
@ -106,11 +110,12 @@ BlockIO InterpreterInsertQuery::execute()
out = std::make_shared<SquashingBlockOutputStream>(
out, table->getSampleBlock(), context.getSettingsRef().min_insert_block_size_rows, context.getSettingsRef().min_insert_block_size_bytes);
}
auto query_sample_block = getSampleBlock(query, table);
/// Actually we don't know structure of input blocks from query/table,
/// because some clients break insertion protocol (columns != header)
out = std::make_shared<AddingDefaultBlockOutputStream>(
out, getSampleBlock(query, table), table->getSampleBlock(), table->getColumns().defaults, context);
out, query_sample_block, table->getSampleBlock(), table->getColumns().defaults, context);
auto out_wrapper = std::make_shared<CountingBlockOutputStream>(out);
out_wrapper->setProcessListElement(context.getProcessListElement());
@ -140,6 +145,12 @@ BlockIO InterpreterInsertQuery::execute()
throw Exception("Cannot insert column " + name_type.name + ", because it is MATERIALIZED column.", ErrorCodes::ILLEGAL_COLUMN);
}
}
else if (query.data && !query.has_tail) /// can execute without additional data
{
res.in = std::make_shared<InputStreamFromASTInsertQuery>(query_ptr, nullptr, query_sample_block, context);
res.in = std::make_shared<NullAndDoCopyBlockInputStream>(res.in, res.out);
res.out = nullptr;
}
return res;
}

View File

@ -77,6 +77,7 @@ struct Settings
M(SettingBool, compile, false, "Whether query compilation is enabled.") \
M(SettingBool, compile_expressions, true, "Compile some scalar functions and operators to native code.") \
M(SettingUInt64, min_count_to_compile, 3, "The number of structurally identical queries before they are compiled.") \
M(SettingUInt64, min_count_to_compile_expression, 3, "The number of identical expressions before they are JIT-compiled") \
M(SettingUInt64, group_by_two_level_threshold, 100000, "From what number of keys, a two-level aggregation starts. 0 - the threshold is not set.") \
M(SettingUInt64, group_by_two_level_threshold_bytes, 100000000, "From what size of the aggregation state in bytes, a two-level aggregation begins to be used. 0 - the threshold is not set. Two-level aggregation is used when at least one of the thresholds is triggered.") \
M(SettingBool, distributed_aggregation_memory_efficient, false, "Is the memory-saving mode of distributed aggregation enabled.") \

View File

@ -141,7 +141,8 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
const char * end,
Context & context,
bool internal,
QueryProcessingStage::Enum stage)
QueryProcessingStage::Enum stage,
bool has_query_tail)
{
time_t current_time = time(nullptr);
@ -164,9 +165,12 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
/// TODO Parser should fail early when max_query_size limit is reached.
ast = parseQuery(parser, begin, end, "", max_query_size);
const auto * insert_query = dynamic_cast<const ASTInsertQuery *>(ast.get());
auto * insert_query = dynamic_cast<ASTInsertQuery *>(ast.get());
if (insert_query && insert_query->data)
{
query_end = insert_query->data;
insert_query->has_tail = has_query_tail;
}
else
query_end = end;
}
@ -434,7 +438,7 @@ BlockIO executeQuery(
QueryProcessingStage::Enum stage)
{
BlockIO streams;
std::tie(std::ignore, streams) = executeQueryImpl(query.data(), query.data() + query.size(), context, internal, stage);
std::tie(std::ignore, streams) = executeQueryImpl(query.data(), query.data() + query.size(), context, internal, stage, false);
return streams;
}
@ -479,13 +483,13 @@ void executeQuery(
ASTPtr ast;
BlockIO streams;
std::tie(ast, streams) = executeQueryImpl(begin, end, context, false, QueryProcessingStage::Complete);
std::tie(ast, streams) = executeQueryImpl(begin, end, context, false, QueryProcessingStage::Complete, !istr.eof());
try
{
if (streams.out)
{
InputStreamFromASTInsertQuery in(ast, istr, streams, context);
InputStreamFromASTInsertQuery in(ast, &istr, streams.out->getHeader(), context);
copyData(in, *streams.out);
}

View File

@ -26,6 +26,9 @@ public:
const char * data = nullptr;
const char * end = nullptr;
/// Query has additional data, which will be sent later
bool has_tail = false;
/** Get the text that identifies this element. */
String getID(char delim) const override { return "InsertQuery" + (delim + database) + delim + table; }

View File

@ -0,0 +1,69 @@
#include <Storages/Kafka/KafkaBlockInputStream.h>
#include <Formats/FormatFactory.h>
#include <Storages/Kafka/ReadBufferFromKafkaConsumer.h>
namespace DB
{
KafkaBlockInputStream::KafkaBlockInputStream(
StorageKafka & storage_, const Context & context_, const String & schema, UInt64 max_block_size_)
: storage(storage_), context(context_), max_block_size(max_block_size_)
{
context.setSetting("input_format_skip_unknown_fields", 1u); // Always skip unknown fields regardless of the context (JSON or TSKV)
context.setSetting("input_format_allow_errors_ratio", 0.);
context.setSetting("input_format_allow_errors_num", storage.skip_broken);
if (!schema.empty())
context.setSetting("format_schema", schema);
}
KafkaBlockInputStream::~KafkaBlockInputStream()
{
if (!claimed)
return;
if (broken)
{
LOG_TRACE(storage.log, "Re-joining claimed consumer after failure");
consumer->unsubscribe();
}
storage.pushConsumer(consumer);
}
void KafkaBlockInputStream::readPrefixImpl()
{
consumer = storage.tryClaimConsumer(context.getSettingsRef().queue_max_wait_ms.totalMilliseconds());
claimed = !!consumer;
if (!consumer)
consumer = std::make_shared<cppkafka::Consumer>(storage.createConsumerConfiguration());
// While we wait for an assignment after subscribtion, we'll poll zero messages anyway.
// If we're doing a manual select then it's better to get something after a wait, then immediate nothing.
if (consumer->get_subscription().empty())
{
using namespace std::chrono_literals;
consumer->pause(); // don't accidentally read any messages
consumer->subscribe(storage.topics);
consumer->poll(5s);
consumer->resume();
}
buffer = std::make_unique<DelimitedReadBuffer>(
new ReadBufferFromKafkaConsumer(consumer, storage.log, max_block_size), storage.row_delimiter);
addChild(FormatFactory::instance().getInput(storage.format_name, *buffer, storage.getSampleBlock(), context, max_block_size));
broken = true;
}
void KafkaBlockInputStream::readSuffixImpl()
{
buffer->subBufferAs<ReadBufferFromKafkaConsumer>()->commit();
broken = false;
}
} // namespace DB

View File

@ -0,0 +1,35 @@
#pragma once
#include <DataStreams/IBlockInputStream.h>
#include <IO/DelimitedReadBuffer.h>
#include <Interpreters/Context.h>
#include <Storages/Kafka/StorageKafka.h>
namespace DB
{
class KafkaBlockInputStream : public IBlockInputStream
{
public:
KafkaBlockInputStream(StorageKafka & storage_, const Context & context_, const String & schema, size_t max_block_size_);
~KafkaBlockInputStream() override;
String getName() const override { return storage.getName(); }
Block readImpl() override { return children.back()->read(); }
Block getHeader() const override { return storage.getSampleBlock(); }
void readPrefixImpl() override;
void readSuffixImpl() override;
private:
StorageKafka & storage;
Context context;
UInt64 max_block_size;
ConsumerPtr consumer;
std::unique_ptr<DelimitedReadBuffer> buffer;
bool broken = true, claimed = false;
};
} // namespace DB

View File

@ -1,6 +1,3 @@
#include <Common/config.h>
#if USE_RDKAFKA
#include <Storages/Kafka/KafkaSettings.h>
#include <Parsers/ASTCreateQuery.h>
#include <Common/Exception.h>
@ -41,4 +38,3 @@ void KafkaSettings::loadFromQuery(ASTStorage & storage_def)
}
}
#endif

View File

@ -1,6 +1,4 @@
#pragma once
#include <Common/config.h>
#if USE_RDKAFKA
#include <Poco/Util/AbstractConfiguration.h>
#include <Core/Defines.h>
@ -42,4 +40,3 @@ public:
};
}
#endif

View File

@ -0,0 +1,53 @@
#include <Storages/Kafka/ReadBufferFromKafkaConsumer.h>
namespace DB
{
namespace
{
const auto READ_POLL_MS = 500; /// How long to wait for a batch of messages.
} // namespace
void ReadBufferFromKafkaConsumer::commit()
{
if (messages.empty() || current == messages.begin())
return;
auto & previous = *std::prev(current);
LOG_TRACE(log, "Committing message with offset " << previous.get_offset());
consumer->async_commit(previous);
}
/// Do commit messages implicitly after we processed the previous batch.
bool ReadBufferFromKafkaConsumer::nextImpl()
{
if (current == messages.end())
{
commit();
messages = consumer->poll_batch(batch_size, std::chrono::milliseconds(READ_POLL_MS));
current = messages.begin();
LOG_TRACE(log, "Polled batch of " << messages.size() << " messages");
}
if (messages.empty() || current == messages.end())
return false;
if (auto err = current->get_error())
{
++current;
// TODO: should throw exception instead
LOG_ERROR(log, "Consumer error: " << err);
return false;
}
// XXX: very fishy place with const casting.
auto new_position = reinterpret_cast<char *>(const_cast<unsigned char *>(current->get_payload().get_data()));
BufferBase::set(new_position, current->get_payload().get_size(), 0);
++current;
return true;
}
} // namespace DB

View File

@ -0,0 +1,36 @@
#pragma once
#include <IO/ReadBuffer.h>
#include <common/logger_useful.h>
#include <cppkafka/cppkafka.h>
namespace DB
{
using ConsumerPtr = std::shared_ptr<cppkafka::Consumer>;
class ReadBufferFromKafkaConsumer : public ReadBuffer
{
public:
ReadBufferFromKafkaConsumer(ConsumerPtr consumer_, Poco::Logger * log_, size_t max_batch_size)
: ReadBuffer(nullptr, 0), consumer(consumer_), log(log_), batch_size(max_batch_size), current(messages.begin())
{
}
// Commit all processed messages.
void commit();
private:
using Messages = std::vector<cppkafka::Message>;
ConsumerPtr consumer;
Poco::Logger * log;
const size_t batch_size = 1;
Messages messages;
Messages::const_iterator current;
bool nextImpl() override;
};
} // namespace DB

View File

@ -1,14 +1,9 @@
#include <Storages/Kafka/StorageKafka.h>
#if USE_RDKAFKA
#include <DataStreams/IBlockInputStream.h>
#include <DataStreams/LimitBlockInputStream.h>
#include <DataStreams/UnionBlockInputStream.h>
#include <DataStreams/copyData.h>
#include <Formats/FormatFactory.h>
#include <IO/ReadBuffer.h>
#include <Interpreters/Context.h>
#include <Interpreters/InterpreterInsertQuery.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Parsers/ASTCreateQuery.h>
@ -16,6 +11,7 @@
#include <Parsers/ASTInsertQuery.h>
#include <Parsers/ASTLiteral.h>
#include <Storages/Kafka/KafkaSettings.h>
#include <Storages/Kafka/KafkaBlockInputStream.h>
#include <Storages/StorageFactory.h>
#include <Storages/StorageMaterializedView.h>
#include <boost/algorithm/string/replace.hpp>
@ -42,210 +38,31 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
extern const int BAD_ARGUMENTS;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int TIMEOUT_EXCEEDED;
}
using namespace Poco::Util;
/// How long to wait for a single message (applies to each individual message)
static const auto READ_POLL_MS = 500;
static const auto CLEANUP_TIMEOUT_MS = 3000;
/// Configuration prefix
static const String CONFIG_PREFIX = "kafka";
class ReadBufferFromKafkaConsumer : public ReadBuffer
namespace
{
ConsumerPtr consumer;
cppkafka::Message current;
bool current_pending = false; /// We've fetched "current" message and need to process it on the next iteration.
Poco::Logger * log;
size_t read_messages = 0;
char row_delimiter;
const auto RESCHEDULE_MS = 500;
const auto CLEANUP_TIMEOUT_MS = 3000;
bool nextImpl() override
/// Configuration prefix
const String CONFIG_PREFIX = "kafka";
void loadFromConfig(cppkafka::Configuration & conf, const Poco::Util::AbstractConfiguration & config, const std::string & path)
{
if (current_pending)
Poco::Util::AbstractConfiguration::Keys keys;
std::vector<char> errstr(512);
config.keys(path, keys);
for (const auto & key : keys)
{
// XXX: very fishy place with const casting.
BufferBase::set(reinterpret_cast<char *>(const_cast<unsigned char *>(current.get_payload().get_data())), current.get_payload().get_size(), 0);
current_pending = false;
return true;
const String key_path = path + "." + key;
const String key_name = boost::replace_all_copy(key, "_", ".");
conf.set(key_name, config.getString(key_path));
}
// Process next buffered message
auto message = consumer->poll(std::chrono::milliseconds(READ_POLL_MS));
if (!message)
return false;
if (message.is_eof())
{
// Reached EOF while reading current batch, skip it.
LOG_TRACE(log, "EOF reached for partition " << message.get_partition() << " offset " << message.get_offset());
return nextImpl();
}
else if (auto err = message.get_error())
{
LOG_ERROR(log, "Consumer error: " << err);
return false;
}
++read_messages;
// Now we've received a new message. Check if we need to produce a delimiter
if (row_delimiter != '\0' && current)
{
BufferBase::set(&row_delimiter, 1, 0);
current = std::move(message);
current_pending = true;
return true;
}
// Consume message and mark the topic/partition offset
// The offsets will be committed in the readSuffix() method after the block is completed
// If an exception is thrown before that would occur, the client will rejoin without committing offsets
current = std::move(message);
// XXX: very fishy place with const casting.
BufferBase::set(reinterpret_cast<char *>(const_cast<unsigned char *>(current.get_payload().get_data())), current.get_payload().get_size(), 0);
return true;
}
public:
ReadBufferFromKafkaConsumer(ConsumerPtr consumer_, Poco::Logger * log_, char row_delimiter_)
: ReadBuffer(nullptr, 0), consumer(consumer_), log(log_), row_delimiter(row_delimiter_)
{
if (row_delimiter != '\0')
LOG_TRACE(log, "Row delimiter is: " << row_delimiter);
}
/// Commit messages read with this consumer
void commit()
{
LOG_TRACE(log, "Committing " << read_messages << " messages");
if (read_messages == 0)
return;
consumer->async_commit();
read_messages = 0;
}
};
class KafkaBlockInputStream : public IBlockInputStream
{
public:
KafkaBlockInputStream(StorageKafka & storage_, const Context & context_, const String & schema, UInt64 max_block_size_)
: storage(storage_), context(context_), max_block_size(max_block_size_)
{
// Always skip unknown fields regardless of the context (JSON or TSKV)
context.setSetting("input_format_skip_unknown_fields", 1u);
// We don't use ratio since the number of Kafka messages may vary from stream to stream.
// Thus, ratio is meaningless.
context.setSetting("input_format_allow_errors_ratio", 1.);
context.setSetting("input_format_allow_errors_num", storage.skip_broken);
if (schema.size() > 0)
context.setSetting("format_schema", schema);
}
~KafkaBlockInputStream() override
{
if (!hasClaimed())
return;
// An error was thrown during the stream or it did not finish successfully
// The read offsets weren't committed, so consumer must rejoin the group from the original starting point
if (!finalized)
{
LOG_TRACE(storage.log, "KafkaBlockInputStream did not finish successfully, unsubscribing from assignments and rejoining");
consumer->unsubscribe();
consumer->subscribe(storage.topics);
}
// Return consumer for another reader
storage.pushConsumer(consumer);
consumer = nullptr;
}
String getName() const override
{
return storage.getName();
}
Block readImpl() override
{
if (isCancelledOrThrowIfKilled() || !hasClaimed())
return {};
if (!reader)
throw Exception("Logical error: reader is not initialized", ErrorCodes::LOGICAL_ERROR);
return reader->read();
}
Block getHeader() const override { return storage.getSampleBlock(); }
void readPrefixImpl() override
{
if (!hasClaimed())
{
// Create a formatted reader on Kafka messages
LOG_TRACE(storage.log, "Creating formatted reader");
consumer = storage.tryClaimConsumer(context.getSettingsRef().queue_max_wait_ms.totalMilliseconds());
if (consumer == nullptr)
throw Exception("Failed to claim consumer: ", ErrorCodes::TIMEOUT_EXCEEDED);
read_buf = std::make_unique<ReadBufferFromKafkaConsumer>(consumer, storage.log, storage.row_delimiter);
reader = FormatFactory::instance().getInput(storage.format_name, *read_buf, storage.getSampleBlock(), context, max_block_size);
}
// Start reading data
finalized = false;
reader->readPrefix();
}
void readSuffixImpl() override
{
if (hasClaimed())
{
reader->readSuffix();
// Store offsets read in this stream
read_buf->commit();
}
// Mark as successfully finished
finalized = true;
}
private:
StorageKafka & storage;
ConsumerPtr consumer;
Context context;
UInt64 max_block_size;
Block sample_block;
std::unique_ptr<ReadBufferFromKafkaConsumer> read_buf;
BlockInputStreamPtr reader;
bool finalized = false;
// Return true if consumer has been claimed by the stream
bool hasClaimed() { return consumer != nullptr; }
};
static void loadFromConfig(cppkafka::Configuration & conf, const AbstractConfiguration & config, const std::string & path)
{
AbstractConfiguration::Keys keys;
std::vector<char> errstr(512);
config.keys(path, keys);
for (const auto & key : keys)
{
const String key_path = path + "." + key;
const String key_name = boost::replace_all_copy(key, "_", ".");
conf.set(key_name, config.getString(key_path));
}
}
} // namespace
StorageKafka::StorageKafka(
const std::string & table_name_,
@ -358,9 +175,18 @@ cppkafka::Configuration StorageKafka::createConsumerConfiguration()
conf.set("client.id", VERSION_FULL);
// If no offset stored for this group, read all messages from the start
conf.set("auto.offset.reset", "smallest");
// We manually commit offsets after a stream successfully finished
conf.set("enable.auto.commit", "false");
// Ignore EOF messages
conf.set("enable.partition.eof", "false");
// for debug logs inside rdkafka
// conf.set("debug", "consumer,cgrp,topic,fetch");
// Update consumer configuration from the configuration
const auto & config = global_context.getConfigRef();
if (config.has(CONFIG_PREFIX))
@ -461,7 +287,7 @@ void StorageKafka::streamThread()
// Wait for attached views
if (!stream_cancelled)
task->scheduleAfter(READ_POLL_MS);
task->scheduleAfter(RESCHEDULE_MS);
}
@ -769,5 +595,3 @@ void registerStorageKafka(StorageFactory & factory)
}
#endif

View File

@ -1,9 +1,5 @@
#pragma once
#include <Common/config.h>
#if USE_RDKAFKA
#include <Core/BackgroundSchedulePool.h>
#include <Core/NamesAndTypes.h>
#include <DataStreams/IBlockOutputStream.h>
@ -107,5 +103,3 @@ protected:
};
}
#endif

View File

@ -15,9 +15,11 @@ const char * auto_contributors[] {
"Alexander Millin",
"Alexander Prudaev",
"Alexander Sapin",
"Alexander Tokmakov",
"Alexandr Krasheninnikov",
"Alexandr Orlov",
"Alexei Averchenko",
"Alexey",
"Alexey Arno",
"Alexey Milovidov",
"Alexey Tronov",
@ -45,6 +47,7 @@ const char * auto_contributors[] {
"Artemkin Pavel",
"Arthur Tokarchuk",
"Atri Sharma",
"BSD_Conqueror",
"Babacar Diassé",
"BayoNet",
"BlahGeek",
@ -61,6 +64,7 @@ const char * auto_contributors[] {
"Daniel Bershatsky",
"Daniel Dao",
"Danila Kutenin",
"DarkWanderer",
"Denis Burlaka",
"Denis Zhuravlev",
"Derek Perkins",
@ -79,6 +83,7 @@ const char * auto_contributors[] {
"Evgeniy Gatov",
"Evgeniy Udodov",
"Evgeny Konkov",
"Fadi Hadzh",
"Flowyi",
"Fruit of Eden",
"George",
@ -120,6 +125,7 @@ const char * auto_contributors[] {
"Liu Cong",
"LiuCong",
"LiuYangkuan",
"Lopatin Konstantin",
"Luis Bosque",
"Léo Ercolanelli",
"Maks Skorokhod",
@ -132,6 +138,7 @@ const char * auto_contributors[] {
"Max",
"Max Akhmedov",
"Max Vetrov",
"Maxim Akhmedov",
"Maxim Fedotov",
"Maxim Fridental",
"Maxim Khrisanfov",
@ -141,12 +148,14 @@ const char * auto_contributors[] {
"Michael Kolupaev",
"Michael Razuvaev",
"Michal Lisowski",
"Mihail Fandyushin",
"Mikhail Filimonov",
"Mikhail Salosin",
"Mikhail Surin",
"Mikhail f. Shiryaev",
"Milad Arabi",
"Narek Galstyan",
"NeZeD [Mac Pro]",
"Nicolae Vartolomei",
"Nikhil Raman",
"Nikita Vasilev",
@ -178,6 +187,7 @@ const char * auto_contributors[] {
"Sabyanin Maxim",
"SaltTan",
"Samuel Chou",
"Sergei Semin",
"Sergei Tsetlin (rekub)",
"Sergey Elantsev",
"Sergey Fedorov",
@ -191,6 +201,7 @@ const char * auto_contributors[] {
"Snow",
"Stanislav Pavlovichev",
"Stas Pavlovichev",
"Stupnikov Andrey",
"SuperBot",
"Tangaev",
"The-Alchemist",
@ -206,6 +217,7 @@ const char * auto_contributors[] {
"Veloman Yunkan",
"Veniamin Gvozdikov",
"Victor Tarnavsky",
"Vitaliy Karnienko",
"Vitaliy Lyudvichenko",
"Vitaly Baranov",
"Vitaly Samigullin",
@ -223,13 +235,16 @@ const char * auto_contributors[] {
"Yegor Andreenko",
"Yuri Dyachenko",
"Yurii Vlasenko",
"Yuriy Baranov",
"Yury Karpovich",
"Yury Stankevich",
"abdrakhmanov",
"abyss7",
"achulkov2",
"alesapin",
"alexander kozhikhov",
"alexey-milovidov",
"andrewsg",
"ap11",
"aprudaev",
"artpaul",
@ -272,6 +287,7 @@ const char * auto_contributors[] {
"moscas",
"nicelulu",
"ns-vasilev",
"objatie_groba",
"ogorbacheva",
"orantius",
"peshkurov",
@ -286,10 +302,12 @@ const char * auto_contributors[] {
"serebrserg",
"shangshujie",
"shedx",
"simon-says",
"stavrolia",
"sundy-li",
"sundyli",
"topvisor",
"urgordeadbeef",
"velom",
"vicdashkov",
"zamulla",

View File

@ -361,8 +361,11 @@ services:
cap_add:
- SYS_PTRACE
depends_on: {depends_on}
user: '{user}'
env_file:
- {env_file}
security_opt:
- label:disable
{networks}
{app_net}
{ipv4_address}
@ -669,6 +672,7 @@ class ClickHouseInstance:
db_dir=db_dir,
logs_dir=logs_dir,
depends_on=str(depends_on),
user=os.getuid(),
env_file=env_file,
odbc_ini_path=odbc_ini_path,
entrypoint_cmd=entrypoint_cmd,

View File

@ -8,6 +8,8 @@ services:
ZOO_MY_ID: 1
ZOO_PORT: 2181
ZOO_SERVERS: server.1=kafka_zookeeper:2888:3888
security_opt:
- label:disable
kafka1:
image: confluentinc/cp-kafka:4.1.0
@ -22,3 +24,5 @@ services:
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
depends_on:
- kafka_zookeeper
security_opt:
- label:disable

View File

@ -1,2 +1,3 @@
[pytest]
python_files = test.py
norecursedirs = _instances

View File

@ -9,25 +9,21 @@ import json
import subprocess
# TODO: add test for run-time offset update in CH, if we manually update it on Kafka side.
# TODO: add test for mat. view is working.
# TODO: add test for SELECT LIMIT is working.
# TODO: modify tests to respect `skip_broken_messages` setting.
cluster = ClickHouseCluster(__file__)
instance = cluster.add_instance('instance',
main_configs=['configs/kafka.xml'],
with_kafka=True)
kafka_id = ''
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
instance.query('CREATE DATABASE test')
# Helpers
yield cluster
finally:
cluster.shutdown()
def kafka_is_available(kafka_id):
def check_kafka_is_available():
p = subprocess.Popen(('docker',
'exec',
'-i',
@ -36,11 +32,24 @@ def kafka_is_available(kafka_id):
'--bootstrap-server',
'PLAINTEXT://localhost:9092'),
stdout=subprocess.PIPE)
p.communicate()[0]
p.communicate()
return p.returncode == 0
def kafka_produce(kafka_id, topic, messages):
def wait_kafka_is_available(max_retries=50):
retries = 0
while True:
if check_kafka_is_available():
break
else:
retries += 1
if retries > max_retries:
raise "Kafka is not available"
print("Waiting for Kafka to start up")
time.sleep(1)
def kafka_produce(topic, messages):
p = subprocess.Popen(('docker',
'exec',
'-i',
@ -49,52 +58,165 @@ def kafka_produce(kafka_id, topic, messages):
'--broker-list',
'localhost:9092',
'--topic',
topic),
topic,
'--sync',
'--message-send-max-retries',
'100'),
stdin=subprocess.PIPE)
p.communicate(messages)
p.stdin.close()
print("Produced {} messages".format(len(messages.splitlines())))
def kafka_check_json_numbers(instance):
retries = 0
while True:
if kafka_is_available(instance.cluster.kafka_docker_id):
break
else:
retries += 1
if retries > 50:
raise 'Cannot connect to kafka.'
print("Waiting for kafka to be available...")
time.sleep(1)
messages = ''
for i in range(50):
messages += json.dumps({'key': i, 'value': i}) + '\n'
kafka_produce(instance.cluster.kafka_docker_id, 'json', messages)
for i in range(30):
result = instance.query('SELECT * FROM test.kafka;')
if result:
break
time.sleep(0.5)
# Since everything is async and shaky when receiving messages from Kafka,
# we may want to try and check results multiple times in a loop.
def kafka_check_result(result, check=False):
fpath = p.join(p.dirname(__file__), 'test_kafka_json.reference')
with open(fpath) as reference:
assert TSV(result) == TSV(reference)
if check:
assert TSV(result) == TSV(reference)
else:
return TSV(result) == TSV(reference)
def test_kafka_json(started_cluster):
instance.query('''
DROP TABLE IF EXISTS test.kafka;
CREATE TABLE test.kafka (key UInt64, value UInt64)
ENGINE = Kafka('kafka1:9092', 'json', 'json',
'JSONEachRow', '\\n');
''')
kafka_check_json_numbers(instance)
# Fixtures
@pytest.fixture(scope="module")
def kafka_cluster():
try:
global kafka_id
cluster.start()
kafka_id = instance.cluster.kafka_docker_id
print("kafka_id is {}".format(kafka_id))
instance.query('CREATE DATABASE test')
yield cluster
finally:
cluster.shutdown()
@pytest.fixture(autouse=True)
def kafka_setup_teardown():
instance.query('DROP TABLE IF EXISTS test.kafka')
wait_kafka_is_available()
print("kafka is available - running test")
yield # run test
instance.query('DROP TABLE test.kafka')
def test_kafka_json_settings(started_cluster):
# Tests
def test_kafka_settings_old_syntax(kafka_cluster):
instance.query('''
DROP TABLE IF EXISTS test.kafka;
CREATE TABLE test.kafka (key UInt64, value UInt64)
ENGINE = Kafka('kafka1:9092', 'old', 'old', 'JSONEachRow', '\\n');
''')
# Don't insert malformed messages since old settings syntax
# doesn't support skipping of broken messages.
messages = ''
for i in range(50):
messages += json.dumps({'key': i, 'value': i}) + '\n'
kafka_produce('old', messages)
result = ''
for i in range(50):
result += instance.query('SELECT * FROM test.kafka')
if kafka_check_result(result):
break
kafka_check_result(result, True)
def test_kafka_settings_new_syntax(kafka_cluster):
instance.query('''
CREATE TABLE test.kafka (key UInt64, value UInt64)
ENGINE = Kafka
SETTINGS
kafka_broker_list = 'kafka1:9092',
kafka_topic_list = 'new',
kafka_group_name = 'new',
kafka_format = 'JSONEachRow',
kafka_row_delimiter = '\\n',
kafka_skip_broken_messages = 1;
''')
messages = ''
for i in range(25):
messages += json.dumps({'key': i, 'value': i}) + '\n'
kafka_produce('new', messages)
# Insert couple of malformed messages.
kafka_produce('new', '}{very_broken_message,\n')
kafka_produce('new', '}another{very_broken_message,\n')
messages = ''
for i in range(25, 50):
messages += json.dumps({'key': i, 'value': i}) + '\n'
kafka_produce('new', messages)
result = ''
for i in range(50):
result += instance.query('SELECT * FROM test.kafka')
if kafka_check_result(result):
break
kafka_check_result(result, True)
def test_kafka_csv_with_delimiter(kafka_cluster):
instance.query('''
CREATE TABLE test.kafka (key UInt64, value UInt64)
ENGINE = Kafka
SETTINGS
kafka_broker_list = 'kafka1:9092',
kafka_topic_list = 'csv',
kafka_group_name = 'csv',
kafka_format = 'CSV',
kafka_row_delimiter = '\\n';
''')
messages = ''
for i in range(50):
messages += '{i}, {i}\n'.format(i=i)
kafka_produce('csv', messages)
result = ''
for i in range(50):
result += instance.query('SELECT * FROM test.kafka')
if kafka_check_result(result):
break
kafka_check_result(result, True)
def test_kafka_tsv_with_delimiter(kafka_cluster):
instance.query('''
CREATE TABLE test.kafka (key UInt64, value UInt64)
ENGINE = Kafka
SETTINGS
kafka_broker_list = 'kafka1:9092',
kafka_topic_list = 'tsv',
kafka_group_name = 'tsv',
kafka_format = 'TSV',
kafka_row_delimiter = '\\n';
''')
messages = ''
for i in range(50):
messages += '{i}\t{i}\n'.format(i=i)
kafka_produce('tsv', messages)
result = ''
for i in range(50):
result += instance.query('SELECT * FROM test.kafka')
if kafka_check_result(result):
break
kafka_check_result(result, True)
def test_kafka_materialized_view(kafka_cluster):
instance.query('''
DROP TABLE IF EXISTS test.view;
DROP TABLE IF EXISTS test.consumer;
CREATE TABLE test.kafka (key UInt64, value UInt64)
ENGINE = Kafka
SETTINGS
@ -103,9 +225,29 @@ def test_kafka_json_settings(started_cluster):
kafka_group_name = 'json',
kafka_format = 'JSONEachRow',
kafka_row_delimiter = '\\n';
''')
kafka_check_json_numbers(instance)
instance.query('DROP TABLE test.kafka')
CREATE TABLE test.view (key UInt64, value UInt64)
ENGINE = MergeTree()
ORDER BY key;
CREATE MATERIALIZED VIEW test.consumer TO test.view AS
SELECT * FROM test.kafka;
''')
messages = ''
for i in range(50):
messages += json.dumps({'key': i, 'value': i}) + '\n'
kafka_produce('json', messages)
for i in range(20):
time.sleep(1)
result = instance.query('SELECT * FROM test.view')
if kafka_check_result(result):
break
kafka_check_result(result, True)
instance.query('''
DROP TABLE test.consumer;
DROP TABLE test.view;
''')
if __name__ == '__main__':

View File

@ -38,9 +38,9 @@
-->
<query><![CDATA[select sum(UserID + 1 in (select UserID from hits_100m_single)) from hits_100m_single]]></query>
<query><![CDATA[select sum((UserID + 1, RegionID) in (select UserID, RegionID from hits_100m_single)) from hits_100m_single]]></query>
<query><![CDATA[select sum(URL in (select URL from hits_100m where URL != '')) from hits_100m_single]]></query>
<query><![CDATA[select sum(MobilePhoneModel in (select MobilePhoneModel from hits_1000m where MobilePhoneModel != '')) from hits_1000m_single]]></query>
<query><![CDATA[select sum((MobilePhoneModel, UserID + 1) in (select MobilePhoneModel, UserID from hits_1000m where MobilePhoneModel != '')) from hits_1000m_single]]></query>
<query><![CDATA[select sum(URL in (select URL from hits_100m_single where URL != '')) from hits_100m_single]]></query>
<query><![CDATA[select sum(MobilePhoneModel in (select MobilePhoneModel from hits_1000m_single where MobilePhoneModel != '')) from hits_1000m_single]]></query>
<query><![CDATA[select sum((MobilePhoneModel, UserID + 1) in (select MobilePhoneModel, UserID from hits_1000m_single where MobilePhoneModel != '')) from hits_1000m_single]]></query>
<main_metric>
<min_time/>

View File

@ -76,7 +76,6 @@
<value>toISOWeek</value>
<value>toISOYear</value>
<value>toStartOfDay</value>
<value>toDate</value>
<value>toMonday</value>
<value>toStartOfMonth</value>

View File

@ -1,152 +1,44 @@
<test>
<name>IPv4 Functions</name>
<type>once</type>
<tags>
</tags>
<type>loop</type>
<stop_conditions>
<all_of>
<total_time_ms>30000</total_time_ms>
</all_of>
<any_of>
<average_speed_not_changing_for_ms>2000</average_speed_not_changing_for_ms>
<total_time_ms>10000</total_time_ms>
<min_time_not_changing_for_ms>5000</min_time_not_changing_for_ms>
<total_time_ms>60000</total_time_ms>
</any_of>
</stop_conditions>
<metrics>
<max_rows_per_second />
<max_bytes_per_second />
<avg_rows_per_second />
<avg_bytes_per_second />
</metrics>
<create_query>CREATE TABLE IF NOT EXISTS ips_v4(ip String) ENGINE = MergeTree() PARTITION BY tuple() ORDER BY tuple()</create_query>
<!-- The CAIDA UCSD IPv4 Routed /24 DNS Names Dataset 20171130,
http://www.caida.org/data/active/ipv4_dnsnames_dataset.xml.
Randomly selected entries from first 50000 rows of dataset. -->
<substitutions>
<substitution>
<name>ipv4_string</name>
<values>
<!-- The CAIDA UCSD IPv4 Routed /24 DNS Names Dataset 20171130,
http://www.caida.org/data/active/ipv4_dnsnames_dataset.xml.
Randomly selected entries from first 50000 rows of dataset. -->
<value>116.253.40.133</value>
<value>183.247.232.58</value>
<value>116.106.34.242</value>
<value>111.56.27.171</value>
<value>183.245.137.140</value>
<value>183.212.25.70</value>
<value>162.144.2.57</value>
<value>111.4.229.190</value>
<value>59.52.3.168</value>
<value>115.11.21.200</value>
<value>121.28.97.113</value>
<value>111.46.39.248</value>
<value>120.192.122.34</value>
<value>113.56.44.105</value>
<value>116.66.238.92</value>
<value>67.22.254.206</value>
<value>115.0.24.191</value>
<value>182.30.107.86</value>
<value>223.73.153.243</value>
<value>115.159.103.38</value>
<value>36.186.75.121</value>
<value>111.56.188.125</value>
<value>115.14.93.25</value>
<value>211.97.110.141</value>
<value>61.58.96.173</value>
<value>203.126.212.37</value>
<value>192.220.125.142</value>
<value>115.22.20.223</value>
<value>121.25.160.80</value>
<value>117.150.98.199</value>
<value>183.211.172.143</value>
<value>180.244.18.143</value>
<value>209.131.3.252</value>
<value>220.200.1.22</value>
<value>171.225.130.45</value>
<value>115.4.78.200</value>
<value>36.183.59.29</value>
<value>218.42.159.17</value>
<value>115.13.39.164</value>
<value>142.254.161.133</value>
<value>116.2.211.43</value>
<value>36.183.126.25</value>
<value>66.150.171.196</value>
<value>104.149.148.137</value>
<value>120.239.82.212</value>
<value>111.14.182.156</value>
<value>115.6.63.224</value>
<value>153.35.83.233</value>
<value>113.142.1.1</value>
<value>121.25.82.29</value>
<value>62.151.203.189</value>
<value>104.27.46.146</value>
<value>36.189.46.88</value>
<value>116.252.54.207</value>
<value>64.77.240.1</value>
<value>142.252.102.78</value>
<value>36.82.224.170</value>
<value>117.33.191.217</value>
<value>144.12.164.251</value>
<value>122.10.93.66</value>
<value>104.25.84.59</value>
<value>111.4.242.106</value>
<value>222.216.51.186</value>
<value>112.33.13.212</value>
<value>115.9.240.116</value>
<value>171.228.0.153</value>
<value>45.3.47.158</value>
<value>69.57.193.230</value>
<value>115.6.104.199</value>
<value>104.24.237.140</value>
<value>199.17.84.108</value>
<value>120.193.17.57</value>
<value>112.40.38.145</value>
<value>67.55.90.43</value>
<value>180.253.57.249</value>
<value>14.204.253.158</value>
<value>1.83.241.116</value>
<value>202.198.37.147</value>
<value>115.6.31.95</value>
<value>117.32.14.179</value>
<value>23.238.237.26</value>
<value>116.97.76.104</value>
<value>1.80.2.248</value>
<value>59.50.185.152</value>
<value>42.117.228.166</value>
<value>119.36.22.147</value>
<value>210.66.18.184</value>
<value>115.19.192.159</value>
<value>112.15.128.113</value>
<value>1.55.138.211</value>
<value>210.183.19.113</value>
<value>42.115.43.114</value>
<value>58.16.171.31</value>
<value>171.234.78.185</value>
<value>113.56.43.134</value>
<value>111.53.182.225</value>
<value>107.160.215.141</value>
<value>171.229.231.90</value>
<value>58.19.84.138</value>
<value>36.79.88.107</value>
<fill_query> INSERT INTO ips_v4 VALUES ('116.253.40.133')('183.247.232.58')('116.106.34.242')('111.56.27.171')('183.245.137.140')('183.212.25.70')('162.144.2.57')('111.4.229.190')('59.52.3.168')('115.11.21.200')('121.28.97.113')('111.46.39.248')('120.192.122.34')('113.56.44.105')('116.66.238.92')('67.22.254.206')('115.0.24.191')('182.30.107.86')('223.73.153.243')('115.159.103.38')('36.186.75.121')('111.56.188.125')('115.14.93.25')('211.97.110.141')('61.58.96.173')('203.126.212.37')('192.220.125.142')('115.22.20.223')('121.25.160.80')('117.150.98.199')('183.211.172.143')('180.244.18.143')('209.131.3.252')('220.200.1.22')('171.225.130.45')('115.4.78.200')('36.183.59.29')('218.42.159.17')('115.13.39.164')('142.254.161.133')('116.2.211.43')('36.183.126.25')('66.150.171.196')('104.149.148.137')('120.239.82.212')('111.14.182.156')('115.6.63.224')('153.35.83.233')('113.142.1.1')('121.25.82.29')('62.151.203.189')('104.27.46.146')('36.189.46.88')('116.252.54.207')('64.77.240.1')('142.252.102.78')('36.82.224.170')('117.33.191.217')('144.12.164.251')('122.10.93.66')('104.25.84.59')('111.4.242.106')('222.216.51.186')('112.33.13.212')('115.9.240.116')('171.228.0.153')('45.3.47.158')('69.57.193.230')('115.6.104.199')('104.24.237.140')('199.17.84.108')('120.193.17.57')('112.40.38.145')('67.55.90.43')('180.253.57.249')('14.204.253.158')('1.83.241.116')('202.198.37.147')('115.6.31.95')('117.32.14.179')('23.238.237.26')('116.97.76.104')('1.80.2.248')('59.50.185.152')('42.117.228.166')('119.36.22.147')('210.66.18.184')('115.19.192.159')('112.15.128.113')('1.55.138.211')('210.183.19.113')('42.115.43.114')('58.16.171.31')('171.234.78.185')('113.56.43.134')('111.53.182.225')('107.160.215.141')('171.229.231.90')('58.19.84.138')('36.79.88.107')</fill_query>
<!-- invalid values -->
<value tag="error"></value>
<value tag="error"> </value>
<value tag="error">1</value>
<value tag="error">1.</value>
<value tag="error">1.2.</value>
<value tag="error">.2.</value>
<value tag="error">abc</value>
<value tag="error">127.0.0.1/24</value>
<value tag="error"> 127.0.0.1</value>
<value tag="error">127.0.0.1 </value>
<value tag="error">127.0.0.1?</value>
<value tag="error">999.999.999.999</value>
</values>
</substitution>
</substitutions>
<fill_query>insert into ips_v4 select * from ips_v4</fill_query>
<fill_query>insert into ips_v4 select * from ips_v4</fill_query>
<fill_query>insert into ips_v4 select * from ips_v4</fill_query>
<fill_query>insert into ips_v4 select * from ips_v4</fill_query>
<fill_query>insert into ips_v4 select * from ips_v4</fill_query>
<fill_query>insert into ips_v4 select * from ips_v4</fill_query>
<fill_query>insert into ips_v4 select * from ips_v4</fill_query>
<fill_query>insert into ips_v4 select * from ips_v4</fill_query>
<fill_query>insert into ips_v4 select * from ips_v4</fill_query>
<fill_query>insert into ips_v4 select * from ips_v4</fill_query>
<fill_query>insert into ips_v4 select * from ips_v4</fill_query>
<fill_query>insert into ips_v4 select * from ips_v4</fill_query>
<fill_query>insert into ips_v4 select * from ips_v4</fill_query>
<query tag='IPv4StringToNum'>SELECT count() FROM system.numbers WHERE NOT ignore(IPv4StringToNum(materialize('{ipv4_string}')))</query>
<query tag='IPv4NumToString+IPv4StringToNum'>SELECT count() FROM system.numbers WHERE NOT ignore(IPv4NumToString(IPv4StringToNum(materialize('{ipv4_string}'))))</query>
<query tag='IPv4NumToStringClassC+IPv4StringToNum'>SELECT count() FROM system.numbers WHERE NOT ignore(IPv4NumToStringClassC(IPv4StringToNum(materialize('{ipv4_string}'))))</query>
<query tag='IPv4ToIPv6+IPv4StringToNum'>SELECT count() FROM system.numbers WHERE NOT ignore(IPv4ToIPv6(IPv4StringToNum(materialize('{ipv4_string}'))))</query>
</test>
<query tag='IPv4StringToNum'>SELECT count() FROM ips_v4 WHERE NOT ignore(IPv4StringToNum(materialize(ip))) SETTINGS max_threads=1</query>
<query tag='IPv4NumToString+IPv4StringToNum'>SELECT count() FROM ips_v4 WHERE NOT ignore(IPv4NumToString(IPv4StringToNum(materialize(ip)))) SETTINGS max_threads=1</query>
<query tag='IPv4NumToStringClassC+IPv4StringToNum'>SELECT count() FROM ips_v4 WHERE NOT ignore(IPv4NumToStringClassC(IPv4StringToNum(materialize(ip)))) SETTINGS max_threads=1</query>
<query tag='IPv4ToIPv6+IPv4StringToNum'>SELECT count() FROM ips_v4 WHERE NOT ignore(IPv4ToIPv6(IPv4StringToNum(materialize(ip)))) SETTINGS max_threads=1</query>
<drop_query>DROP TABLE IF EXISTS ips_v4</drop_query>
</test>

View File

@ -1,257 +1,42 @@
<test>
<name>IPv6 Functions</name>
<type>once</type>
<tags>
</tags>
<type>loop</type>
<stop_conditions>
<all_of>
<total_time_ms>30000</total_time_ms>
</all_of>
<any_of>
<average_speed_not_changing_for_ms>2000</average_speed_not_changing_for_ms>
<total_time_ms>10000</total_time_ms>
<min_time_not_changing_for_ms>5000</min_time_not_changing_for_ms>
<total_time_ms>60000</total_time_ms>
</any_of>
</stop_conditions>
<metrics>
<max_rows_per_second />
<max_bytes_per_second />
<avg_rows_per_second />
<avg_bytes_per_second />
</metrics>
<!-- The CAIDA UCSD IPv4 Routed /24 DNS Names Dataset - 20181130,
http://www.caida.org/data/active/ipv4_dnsnames_dataset.xml.
Randomly selected entries from first 50000 rows of dataset. -->
<substitutions>
<substitution>
<name>ipv6_string</name>
<values>
<!-- The CAIDA UCSD IPv4 Routed /24 DNS Names Dataset - 20181130,
http://www.caida.org/data/active/ipv4_dnsnames_dataset.xml.
Randomly selected entries from first 50000 rows of dataset. -->
<value>2606:2b00::1</value>
<value>2001:2000:3080:1351::2</value>
<value>2a01:8840:16::1</value>
<value>2001:550:0:1000::9a36:2a61</value>
<value>2001:578:400:4:2000::19</value>
<value>2607:f290::1</value>
<value>2a02:23f0:ffff:8::5</value>
<value>2400:c700:0:158::</value>
<value>2001:67c:24e4:4::250</value>
<value>2a02:2a38:37:5::2</value>
<value>2001:41a8:400:2::13a</value>
<value>2405:9800:9800:66::2</value>
<value>2a07:a343:f210::1</value>
<value>2403:5000:171:46::2</value>
<value>2800:c20:1141::8</value>
<value>2402:7800:40:2::62</value>
<value>2a00:de00::1</value>
<value>2001:688:0:2:1::9e</value>
<value>2001:2000:3080:80::2</value>
<value>2001:428::205:171:200:230</value>
<value>2001:fb1:fe0:9::8</value>
<value>2001:2000:3080:10ca::2</value>
<value>2400:dd0b:1003::2</value>
<value>2001:1a98:6677::9d9d:140a</value>
<value>2001:2000:3018:3b::1</value>
<value>2607:fa70:3:33::2</value>
<value>2001:5b0:23ff:fffa::113</value>
<value>2001:450:2001:1000:0:40:6924:23</value>
<value>2001:418:0:5000::c2d</value>
<value>2a01:b740:a09::1</value>
<value>2607:f0d0:2:2::243</value>
<value>2a01:348::e:1:1</value>
<value>2405:4800::3221:3621:2</value>
<value>2a02:aa08:e000:3100::2</value>
<value>2001:44c8:129:2632:33:0:252:2</value>
<value>2a02:e980:1e::1</value>
<value>2a0a:6f40:2::1</value>
<value>2001:550:2:29::2c9:1</value>
<value>2001:c20:4800::175</value>
<value>2c0f:feb0:1:2::d1</value>
<value>2a0b:7086:fff0::1</value>
<value>2a04:2dc0::16d</value>
<value>2604:7e00::105d</value>
<value>2001:470:1:946::2</value>
<value>2a0c:3240::1</value>
<value>2800:630:4010:8::2</value>
<value>2001:1af8:4040::12</value>
<value>2c0f:fc98:1200::2</value>
<value>2001:470:1:248::2</value>
<value>2620:44:a000::1</value>
<value>2402:800:63ff:40::1</value>
<value>2a02:b000:fff::524</value>
<value>2001:470:0:327::1</value>
<value>2401:7400:8888:2::8</value>
<value>2001:500:55::1</value>
<value>2001:668:0:3::f000:c2</value>
<value>2400:bf40::1</value>
<value>2001:67c:754::1</value>
<value>2402:28c0:100:ffff:ffff:ffff:ffff:ffff</value>
<value>2001:470:0:1fa::2</value>
<value>2001:550:0:1000::9a18:292a</value>
<value>2001:470:1:89e::2</value>
<value>2001:579:6f05:500:9934:5b3e:b7fe:1447</value>
<value>2804:158c::1</value>
<value>2600:140e:6::1</value>
<value>2a00:18e0:0:bb04::82</value>
<value>2a02:2698:5000::1e06</value>
<value>2402:800:63ff:10::7:2</value>
<value>2a02:e980:19::1</value>
<value>2001:4888::342:1:0:0</value>
<value>2607:fc68:0:4:0:2:2711:21</value>
<value>2606:2800:602a::1</value>
<value>2404:c600:1000:2::1d1</value>
<value>2001:578:1400:4::9d</value>
<value>2804:64:0:25::1</value>
<value>2605:3e00::1:2:2</value>
<value>2c0f:fa18:0:4::b</value>
<value>2606:2800:602c:b::d004</value>
<value>2610:18:181:4000::66</value>
<value>2001:48f8:1000:1::16</value>
<value>2408:8000:c000::1</value>
<value>2a03:4200:441:2::4e</value>
<value>2400:dd00:1:200a::2</value>
<value>2a02:e980:83:5b09:ecb8:c669:b336:650e</value>
<value>2001:16a0:2:200a::2</value>
<value>2001:4888:1f:e891:161:26::</value>
<value>2a0c:f743::1</value>
<value>2a02:e980:b::1</value>
<value>2001:578:201:1::601:9</value>
<value>2001:438:ffff::407d:1bc1</value>
<value>2001:920:1833::1</value>
<value>2001:1b70:a1:610::b102:2</value>
<value>2001:13c7:6014::1</value>
<value>2003:0:1203:4001::1</value>
<value>2804:a8:2:c8::d6</value>
<value>2a02:2e00:2080:f000:0:261:1:11</value>
<value>2001:578:20::d</value>
<value>2001:550:2:48::34:1</value>
<value>2a03:9d40:fe00:5::</value>
<value>2403:e800:200:102::2</value>
<create_query>CREATE TABLE IF NOT EXISTS ips_v6(ip String) ENGINE = MergeTree() PARTITION BY tuple() ORDER BY tuple()</create_query>
<!-- The CAIDA UCSD IPv4 Routed /24 DNS Names Dataset 20171130,
http://www.caida.org/data/active/ipv4_dnsnames_dataset.xml.
Randomly selected entries from first 50000 rows of dataset.
IPv4 addresses from dataset are represented in IPv6 form. -->
<value tag="mapped">::ffff:116.253.40.133</value>
<value tag="mapped">::ffff:183.247.232.58</value>
<value tag="mapped">::ffff:116.106.34.242</value>
<value tag="mapped">::ffff:111.56.27.171</value>
<value tag="mapped">::ffff:183.245.137.140</value>
<value tag="mapped">::ffff:183.212.25.70</value>
<value tag="mapped">::ffff:162.144.2.57</value>
<value tag="mapped">::ffff:111.4.229.190</value>
<value tag="mapped">::ffff:59.52.3.168</value>
<value tag="mapped">::ffff:115.11.21.200</value>
<value tag="mapped">::ffff:121.28.97.113</value>
<value tag="mapped">::ffff:111.46.39.248</value>
<value tag="mapped">::ffff:120.192.122.34</value>
<value tag="mapped">::ffff:113.56.44.105</value>
<value tag="mapped">::ffff:116.66.238.92</value>
<value tag="mapped">::ffff:67.22.254.206</value>
<value tag="mapped">::ffff:115.0.24.191</value>
<value tag="mapped">::ffff:182.30.107.86</value>
<value tag="mapped">::ffff:223.73.153.243</value>
<value tag="mapped">::ffff:115.159.103.38</value>
<value tag="mapped">::ffff:36.186.75.121</value>
<value tag="mapped">::ffff:111.56.188.125</value>
<value tag="mapped">::ffff:115.14.93.25</value>
<value tag="mapped">::ffff:211.97.110.141</value>
<value tag="mapped">::ffff:61.58.96.173</value>
<value tag="mapped">::ffff:203.126.212.37</value>
<value tag="mapped">::ffff:192.220.125.142</value>
<value tag="mapped">::ffff:115.22.20.223</value>
<value tag="mapped">::ffff:121.25.160.80</value>
<value tag="mapped">::ffff:117.150.98.199</value>
<value tag="mapped">::ffff:183.211.172.143</value>
<value tag="mapped">::ffff:180.244.18.143</value>
<value tag="mapped">::ffff:209.131.3.252</value>
<value tag="mapped">::ffff:220.200.1.22</value>
<value tag="mapped">::ffff:171.225.130.45</value>
<value tag="mapped">::ffff:115.4.78.200</value>
<value tag="mapped">::ffff:36.183.59.29</value>
<value tag="mapped">::ffff:218.42.159.17</value>
<value tag="mapped">::ffff:115.13.39.164</value>
<value tag="mapped">::ffff:142.254.161.133</value>
<value tag="mapped">::ffff:116.2.211.43</value>
<value tag="mapped">::ffff:36.183.126.25</value>
<value tag="mapped">::ffff:66.150.171.196</value>
<value tag="mapped">::ffff:104.149.148.137</value>
<value tag="mapped">::ffff:120.239.82.212</value>
<value tag="mapped">::ffff:111.14.182.156</value>
<value tag="mapped">::ffff:115.6.63.224</value>
<value tag="mapped">::ffff:153.35.83.233</value>
<value tag="mapped">::ffff:113.142.1.1</value>
<value tag="mapped">::ffff:121.25.82.29</value>
<value tag="mapped">::ffff:62.151.203.189</value>
<value tag="mapped">::ffff:104.27.46.146</value>
<value tag="mapped">::ffff:36.189.46.88</value>
<value tag="mapped">::ffff:116.252.54.207</value>
<value tag="mapped">::ffff:64.77.240.1</value>
<value tag="mapped">::ffff:142.252.102.78</value>
<value tag="mapped">::ffff:36.82.224.170</value>
<value tag="mapped">::ffff:117.33.191.217</value>
<value tag="mapped">::ffff:144.12.164.251</value>
<value tag="mapped">::ffff:122.10.93.66</value>
<value tag="mapped">::ffff:104.25.84.59</value>
<value tag="mapped">::ffff:111.4.242.106</value>
<value tag="mapped">::ffff:222.216.51.186</value>
<value tag="mapped">::ffff:112.33.13.212</value>
<value tag="mapped">::ffff:115.9.240.116</value>
<value tag="mapped">::ffff:171.228.0.153</value>
<value tag="mapped">::ffff:45.3.47.158</value>
<value tag="mapped">::ffff:69.57.193.230</value>
<value tag="mapped">::ffff:115.6.104.199</value>
<value tag="mapped">::ffff:104.24.237.140</value>
<value tag="mapped">::ffff:199.17.84.108</value>
<value tag="mapped">::ffff:120.193.17.57</value>
<value tag="mapped">::ffff:112.40.38.145</value>
<value tag="mapped">::ffff:67.55.90.43</value>
<value tag="mapped">::ffff:180.253.57.249</value>
<value tag="mapped">::ffff:14.204.253.158</value>
<value tag="mapped">::ffff:1.83.241.116</value>
<value tag="mapped">::ffff:202.198.37.147</value>
<value tag="mapped">::ffff:115.6.31.95</value>
<value tag="mapped">::ffff:117.32.14.179</value>
<value tag="mapped">::ffff:23.238.237.26</value>
<value tag="mapped">::ffff:116.97.76.104</value>
<value tag="mapped">::ffff:1.80.2.248</value>
<value tag="mapped">::ffff:59.50.185.152</value>
<value tag="mapped">::ffff:42.117.228.166</value>
<value tag="mapped">::ffff:119.36.22.147</value>
<value tag="mapped">::ffff:210.66.18.184</value>
<value tag="mapped">::ffff:115.19.192.159</value>
<value tag="mapped">::ffff:112.15.128.113</value>
<value tag="mapped">::ffff:1.55.138.211</value>
<value tag="mapped">::ffff:210.183.19.113</value>
<value tag="mapped">::ffff:42.115.43.114</value>
<value tag="mapped">::ffff:58.16.171.31</value>
<value tag="mapped">::ffff:171.234.78.185</value>
<value tag="mapped">::ffff:113.56.43.134</value>
<value tag="mapped">::ffff:111.53.182.225</value>
<value tag="mapped">::ffff:107.160.215.141</value>
<value tag="mapped">::ffff:171.229.231.90</value>
<value tag="mapped">::ffff:58.19.84.138</value>
<value tag="mapped">::ffff:36.79.88.107</value>
<fill_query> INSERT INTO ips_v6 VALUES ('2606:2b00::1')('2001:2000:3080:1351::2')('2a01:8840:16::1')('2001:550:0:1000::9a36:2a61')('2001:578:400:4:2000::19')('2607:f290::1')('2a02:23f0:ffff:8::5')('2400:c700:0:158::')('2001:67c:24e4:4::250')('2a02:2a38:37:5::2')('2001:41a8:400:2::13a')('2405:9800:9800:66::2')('2a07:a343:f210::1')('2403:5000:171:46::2')('2800:c20:1141::8')('2402:7800:40:2::62')('2a00:de00::1')('2001:688:0:2:1::9e')('2001:2000:3080:80::2')('2001:428::205:171:200:230')('2001:fb1:fe0:9::8')('2001:2000:3080:10ca::2')('2400:dd0b:1003::2')('2001:1a98:6677::9d9d:140a')('2001:2000:3018:3b::1')('2607:fa70:3:33::2')('2001:5b0:23ff:fffa::113')('2001:450:2001:1000:0:40:6924:23')('2001:418:0:5000::c2d')('2a01:b740:a09::1')('2607:f0d0:2:2::243')('2a01:348::e:1:1')('2405:4800::3221:3621:2')('2a02:aa08:e000:3100::2')('2001:44c8:129:2632:33:0:252:2')('2a02:e980:1e::1')('2a0a:6f40:2::1')('2001:550:2:29::2c9:1')('2001:c20:4800::175')('2c0f:feb0:1:2::d1')('2a0b:7086:fff0::1')('2a04:2dc0::16d')('2604:7e00::105d')('2001:470:1:946::2')('2a0c:3240::1')('2800:630:4010:8::2')('2001:1af8:4040::12')('2c0f:fc98:1200::2')('2001:470:1:248::2')('2620:44:a000::1')('2402:800:63ff:40::1')('2a02:b000:fff::524')('2001:470:0:327::1')('2401:7400:8888:2::8')('2001:500:55::1')('2001:668:0:3::f000:c2')('2400:bf40::1')('2001:67c:754::1')('2402:28c0:100:ffff:ffff:ffff:ffff:ffff')('2001:470:0:1fa::2')('2001:550:0:1000::9a18:292a')('2001:470:1:89e::2')('2001:579:6f05:500:9934:5b3e:b7fe:1447')('2804:158c::1')('2600:140e:6::1')('2a00:18e0:0:bb04::82')('2a02:2698:5000::1e06')('2402:800:63ff:10::7:2')('2a02:e980:19::1')('2001:4888::342:1:0:0')('2607:fc68:0:4:0:2:2711:21')('2606:2800:602a::1')('2404:c600:1000:2::1d1')('2001:578:1400:4::9d')('2804:64:0:25::1')('2605:3e00::1:2:2')('2c0f:fa18:0:4::b')('2606:2800:602c:b::d004')('2610:18:181:4000::66')('2001:48f8:1000:1::16')('2408:8000:c000::1')('2a03:4200:441:2::4e')('2400:dd00:1:200a::2')('2a02:e980:83:5b09:ecb8:c669:b336:650e')('2001:16a0:2:200a::2')('2001:4888:1f:e891:161:26::')('2a0c:f743::1')('2a02:e980:b::1')('2001:578:201:1::601:9')('2001:438:ffff::407d:1bc1')('2001:920:1833::1')('2001:1b70:a1:610::b102:2')('2001:13c7:6014::1')('2003:0:1203:4001::1')('2804:a8:2:c8::d6')('2a02:2e00:2080:f000:0:261:1:11')('2001:578:20::d')('2001:550:2:48::34:1')('2a03:9d40:fe00:5::')('2403:e800:200:102::2')('::ffff:113.56.44.105')('::ffff:116.66.238.92')('::ffff:67.22.254.206')('::ffff:115.0.24.191')('::ffff:182.30.107.86')('::ffff:223.73.153.243')('::ffff:115.159.103.38')('::ffff:36.186.75.121')('::ffff:111.56.188.125')('::ffff:115.14.93.25')('::ffff:211.97.110.141')('::ffff:61.58.96.173')('::ffff:203.126.212.37')('::ffff:192.220.125.142')('::ffff:115.22.20.223')('::ffff:121.25.160.80')('::ffff:117.150.98.199')('::ffff:183.211.172.143')('::ffff:180.244.18.143')('::ffff:209.131.3.252')('::ffff:220.200.1.22')('::ffff:171.225.130.45')('::ffff:115.4.78.200')('::ffff:36.183.59.29')('::ffff:218.42.159.17')('::ffff:115.13.39.164')('::ffff:142.254.161.133')('::ffff:116.2.211.43')('::ffff:36.183.126.25')('::ffff:66.150.171.196')('::ffff:104.149.148.137')('::ffff:120.239.82.212')('::ffff:111.14.182.156')('::ffff:115.6.63.224')('::ffff:153.35.83.233')('::ffff:113.142.1.1')('::ffff:121.25.82.29')('::ffff:62.151.203.189')('::ffff:104.27.46.146')('::ffff:36.189.46.88')('::ffff:116.252.54.207')('::ffff:64.77.240.1')('::ffff:142.252.102.78')('::ffff:36.82.224.170')('::ffff:117.33.191.217')('::ffff:144.12.164.251')('::ffff:122.10.93.66')('::ffff:104.25.84.59')('::ffff:111.4.242.106')('::ffff:222.216.51.186')('::ffff:112.33.13.212')('::ffff:115.9.240.116')('::ffff:171.228.0.153')('::ffff:45.3.47.158')('::ffff:69.57.193.230')('::ffff:115.6.104.199')('::ffff:104.24.237.140')('::ffff:199.17.84.108')('::ffff:120.193.17.57')('::ffff:112.40.38.145')('::ffff:67.55.90.43')('::ffff:180.253.57.249')('::ffff:14.204.253.158')('::ffff:1.83.241.116')('::ffff:202.198.37.147')('::ffff:115.6.31.95')('::ffff:117.32.14.179')('::ffff:23.238.237.26')('::ffff:116.97.76.104')('::ffff:1.80.2.248')('::ffff:59.50.185.152')('::ffff:42.117.228.166')('::ffff:119.36.22.147')('::ffff:210.66.18.184')('::ffff:115.19.192.159')('::ffff:112.15.128.113')('::ffff:1.55.138.211')('::ffff:210.183.19.113')('::ffff:42.115.43.114')('::ffff:58.16.171.31')('::ffff:171.234.78.185')('::ffff:113.56.43.134')('::ffff:111.53.182.225')('::ffff:107.160.215.141')('::ffff:171.229.231.90')('::ffff:58.19.84.138')('::ffff:36.79.88.107')</fill_query>
<!-- invalid values -->
<value tag="error"></value>
<value tag="error"> </value>
<value tag="error">1</value>
<value tag="error">1.</value>
<value tag="error">1.2.</value>
<value tag="error">.2.</value>
<value tag="error">abc</value>
<value tag="error">ab:cd:ef:gh:ij:kl:mn</value>
<value tag="error">ffffffffffffff</value>
<value tag="error">abcdefghijklmn</value>
<value tag="error">::::::::::::::</value>
<value tag="error">::ffff:127.0.0.1 </value>
<value tag="error"> ::ffff:127.0.0.1</value>
<value tag="error">::ffff:999.999.999.999</value>
</values>
</substitution>
</substitutions>
<fill_query>insert into ips_v6 select * from ips_v6</fill_query>
<fill_query>insert into ips_v6 select * from ips_v6</fill_query>
<fill_query>insert into ips_v6 select * from ips_v6</fill_query>
<fill_query>insert into ips_v6 select * from ips_v6</fill_query>
<fill_query>insert into ips_v6 select * from ips_v6</fill_query>
<fill_query>insert into ips_v6 select * from ips_v6</fill_query>
<fill_query>insert into ips_v6 select * from ips_v6</fill_query>
<fill_query>insert into ips_v6 select * from ips_v6</fill_query>
<fill_query>insert into ips_v6 select * from ips_v6</fill_query>
<fill_query>insert into ips_v6 select * from ips_v6</fill_query>
<fill_query>insert into ips_v6 select * from ips_v6</fill_query>
<fill_query>insert into ips_v6 select * from ips_v6</fill_query>
<fill_query>insert into ips_v6 select * from ips_v6</fill_query>
<query tag="IPv6StringToNum">SELECT count() FROM system.numbers WHERE NOT ignore(IPv6StringToNum(materialize('{ipv6_string}')))</query>
<query tag="IPv6NumToString+IPv6StringToNum">SELECT count() FROM system.numbers WHERE NOT ignore(IPv6NumToString(IPv6StringToNum(materialize('{ipv6_string}'))))</query>
</test>
<query tag="IPv6StringToNum">SELECT count() FROM ips_v6 WHERE NOT ignore(IPv6StringToNum(materialize(ip)))</query>
<query tag="IPv6NumToString+IPv6StringToNum">SELECT count() FROM ips_v6 WHERE NOT ignore(IPv6NumToString(IPv6StringToNum(materialize(ip))))</query>
<drop_query>DROP TABLE IF EXISTS ips_v6</drop_query>
</test>

View File

@ -4,8 +4,6 @@
<create_query>CREATE TABLE IF NOT EXISTS whitespaces(value String) ENGINE = MergeTree() PARTITION BY tuple() ORDER BY tuple()</create_query>
<fill_query> INSERT INTO whitespaces SELECT value FROM (SELECT arrayStringConcat(groupArray(' ')) AS spaces, concat(spaces, toString(any(number)), spaces) AS value FROM numbers(100000000) GROUP BY pow(number, intHash32(number) % 4) % 12345678)</fill_query>
<fill_query> INSERT INTO whitespaces SELECT value FROM (SELECT arrayStringConcat(groupArray(' ')) AS spaces, concat(spaces, toString(any(number)), spaces) AS value FROM numbers(100000000) GROUP BY pow(number, intHash32(number) % 4) % 12345678)</fill_query>
<fill_query> INSERT INTO whitespaces SELECT value FROM (SELECT arrayStringConcat(groupArray(' ')) AS spaces, concat(spaces, toString(any(number)), spaces) AS value FROM numbers(100000000) GROUP BY pow(number, intHash32(number) % 4) % 12345678)</fill_query>
<stop_conditions>
<all_of>

View File

@ -9,11 +9,11 @@
<stop_conditions>
<all_of>
<total_time_ms>10000</total_time_ms>
<total_time_ms>8000</total_time_ms>
</all_of>
<any_of>
<average_speed_not_changing_for_ms>5000</average_speed_not_changing_for_ms>
<total_time_ms>20000</total_time_ms>
<total_time_ms>15000</total_time_ms>
</any_of>
</stop_conditions>
@ -22,7 +22,7 @@
</main_metric>
<settings>
<max_memory_usage>20000000000</max_memory_usage>
<max_memory_usage>30000000000</max_memory_usage>
</settings>
<substitutions>

View File

@ -83,6 +83,48 @@
2019-02-06 19:00:00
2019-02-07 04:00:00
2019-02-06 11:00:00
2019-01-01
2018-01-01
2015-01-01
2019-01-01
2019-01-01
2018-10-01
2019-02-01
2019-01-01
2018-10-01
2019-02-04
2019-01-28
2018-12-31
2019-02-06 00:00:00
2019-02-05 00:00:00
2019-02-03 00:00:00
2019-02-06 22:00:00
2019-02-06 21:00:00
2019-02-06 21:00:00
2019-02-06 03:00:00
2019-02-06 22:57:00
2019-02-06 22:56:00
2019-02-06 22:55:00
2019-02-06 22:40:00
2019-02-06 22:30:00
2019-02-06 22:57:35
2019-02-06 22:57:34
2019-02-06 22:57:35
2019-01-01
2018-01-01
2015-01-01
2019-01-01
2019-01-01
2018-10-01
2019-02-01
2019-01-01
2018-10-01
2019-02-04
2019-01-28
2018-12-31
2019-02-06 00:00:00
2019-02-05 00:00:00
2019-02-03 00:00:00
44
44
44

View File

@ -142,6 +142,50 @@ SELECT toString(toStartOfHour(toDateTime(1549483055), 'Europe/London'), 'Europe/
SELECT toString(toStartOfHour(toDateTime(1549483055), 'Asia/Tokyo'), 'Asia/Tokyo');
SELECT toString(toStartOfHour(toDateTime(1549483055), 'Pacific/Pitcairn'), 'Pacific/Pitcairn');
/* toStartOfInterval */
SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 1 year, 'Europe/Moscow');
SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 2 year, 'Europe/Moscow');
SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 5 year, 'Europe/Moscow');
SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 1 quarter, 'Europe/Moscow');
SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 2 quarter, 'Europe/Moscow');
SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 3 quarter, 'Europe/Moscow');
SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 1 month, 'Europe/Moscow');
SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 2 month, 'Europe/Moscow');
SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 5 month, 'Europe/Moscow');
SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 1 week, 'Europe/Moscow');
SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 2 week, 'Europe/Moscow');
SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 6 week, 'Europe/Moscow');
SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 1 day, 'Europe/Moscow'), 'Europe/Moscow');
SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 2 day, 'Europe/Moscow'), 'Europe/Moscow');
SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 5 day, 'Europe/Moscow'), 'Europe/Moscow');
SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 1 hour, 'Europe/Moscow'), 'Europe/Moscow');
SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 2 hour, 'Europe/Moscow'), 'Europe/Moscow');
SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 6 hour, 'Europe/Moscow'), 'Europe/Moscow');
SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 24 hour, 'Europe/Moscow'), 'Europe/Moscow');
SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 1 minute, 'Europe/Moscow'), 'Europe/Moscow');
SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 2 minute, 'Europe/Moscow'), 'Europe/Moscow');
SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 5 minute, 'Europe/Moscow'), 'Europe/Moscow');
SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 20 minute, 'Europe/Moscow'), 'Europe/Moscow');
SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 90 minute, 'Europe/Moscow'), 'Europe/Moscow');
SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 1 second, 'Europe/Moscow'), 'Europe/Moscow');
SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 2 second, 'Europe/Moscow'), 'Europe/Moscow');
SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 5 second, 'Europe/Moscow'), 'Europe/Moscow');
SELECT toStartOfInterval(toDate(17933), INTERVAL 1 year);
SELECT toStartOfInterval(toDate(17933), INTERVAL 2 year);
SELECT toStartOfInterval(toDate(17933), INTERVAL 5 year);
SELECT toStartOfInterval(toDate(17933), INTERVAL 1 quarter);
SELECT toStartOfInterval(toDate(17933), INTERVAL 2 quarter);
SELECT toStartOfInterval(toDate(17933), INTERVAL 3 quarter);
SELECT toStartOfInterval(toDate(17933), INTERVAL 1 month);
SELECT toStartOfInterval(toDate(17933), INTERVAL 2 month);
SELECT toStartOfInterval(toDate(17933), INTERVAL 5 month);
SELECT toStartOfInterval(toDate(17933), INTERVAL 1 week);
SELECT toStartOfInterval(toDate(17933), INTERVAL 2 week);
SELECT toStartOfInterval(toDate(17933), INTERVAL 6 week);
SELECT toString(toStartOfInterval(toDate(17933), INTERVAL 1 day, 'Europe/Moscow'), 'Europe/Moscow');
SELECT toString(toStartOfInterval(toDate(17933), INTERVAL 2 day, 'Europe/Moscow'), 'Europe/Moscow');
SELECT toString(toStartOfInterval(toDate(17933), INTERVAL 5 day, 'Europe/Moscow'), 'Europe/Moscow');
/* toRelativeYearNum */
SELECT toRelativeYearNum(toDateTime(1412106600), 'Europe/Moscow') - toRelativeYearNum(toDateTime(0), 'Europe/Moscow');

View File

@ -1,5 +1,5 @@
SET compile_expressions = 1;
SET min_count_to_compile = 1;
SET min_count_to_compile_expression = 1;
SET optimize_move_to_prewhere = 0;
SET enable_optimize_predicate_expression=0;

View File

@ -1,5 +1,5 @@
SET compile_expressions = 1;
SET min_count_to_compile = 1;
SET min_count_to_compile_expression = 1;
DROP TABLE IF EXISTS test.time_table;

View File

@ -11,4 +11,12 @@ $CLICKHOUSE_CLIENT -q "select positin(*) from system.functions;" 2>&1 | grep "Ma
$CLICKHOUSE_CLIENT -q "select POSITIO(*) from system.functions;" 2>&1 | grep "Maybe you meant: \['position'" &>/dev/null;
$CLICKHOUSE_CLIENT -q "select fount(*) from system.functions;" 2>&1 | grep "Maybe you meant: \['count'" | grep "Maybe you meant: \['round'" | grep "Or unknown aggregate function" &>/dev/null;
$CLICKHOUSE_CLIENT -q "select positin(*) from system.functions;" 2>&1 | grep -v "Or unknown aggregate function" &>/dev/null;
$CLICKHOUSE_CLIENT -q "select pov(*) from system.functions;" 2>&1 | grep "Maybe you meant: \['pow','cos'\]" &>/dev/null;
$CLICKHOUSE_CLIENT -q "select pov(*) from system.functions;" 2>&1 | grep "Maybe you meant: \['pow'\]" &>/dev/null;
$CLICKHOUSE_CLIENT -q "select getColumnStructure('abc');" 2>&1 | grep "Maybe you meant: \['dumpColumnStructure'\]" &>/dev/null;
$CLICKHOUSE_CLIENT -q "select gutColumnStructure('abc');" 2>&1 | grep "Maybe you meant: \['dumpColumnStructure'\]" &>/dev/null;
$CLICKHOUSE_CLIENT -q "select gupColumnStructure('abc');" 2>&1 | grep "Maybe you meant: \['dumpColumnStructure'\]" &>/dev/null;
$CLICKHOUSE_CLIENT -q "select provideColumnStructure('abc');" 2>&1 | grep "Maybe you meant: \['dumpColumnStructure'\]" &>/dev/null;
$CLICKHOUSE_CLIENT -q "select multiposicionutf7('abc');" 2>&1 | grep "Maybe you meant: \['multiPositionUTF8','multiPosition'\]" &>/dev/null;
$CLICKHOUSE_CLIENT -q "select multiposicionutf7casesensitive('abc');" 2>&1 | grep "Maybe you meant: \['multiPositionCaseInsensitive'\]" &>/dev/null;
$CLICKHOUSE_CLIENT -q "select multiposicionutf7sensitive('abc');" 2>&1 | grep "Maybe you meant: \['multiPositionCaseInsensitive'\]" &>/dev/null;
$CLICKHOUSE_CLIENT -q "select multiPosicionSensitiveUTF8('abc');" 2>&1 | grep "Maybe you meant: \['multiPositionCaseInsensitiveUTF8'\]" &>/dev/null;

View File

@ -0,0 +1,5 @@
CREATE TEMPORARY TABLE t1 (x UInt8);
INSERT INTO t1 VALUES (1);
SELECT * FROM t1;
CREATE TEMPORARY TABLE test.t2 (x UInt8); -- { serverError 442 }

View File

@ -0,0 +1 @@
SELECT arrayWithConstant(-231.37104, -138); -- { serverError 128 }

View File

@ -0,0 +1,4 @@
2019-02-06
2019-02-07
2019-02-08
2021-02-06

View File

@ -0,0 +1,14 @@
SET compile_expressions = 1;
SET min_count_to_compile_expression = 1;
DROP TABLE IF EXISTS test.foo_c;
CREATE TABLE test.foo_c(d DateTime) ENGINE = Memory;
INSERT INTO test.foo_c VALUES ('2019-02-06 01:01:01'),('2019-02-07 01:01:01'),('2019-02-08 01:01:01'),('2021-02-06 01:01:01'),('2093-05-29 01:01:01'),('2100-06-06 01:01:01'),('2100-10-14 01:01:01'),('2100-11-01 01:01:01'),('2100-11-15 01:01:01'),('2100-11-30 01:01:01'),('2100-12-11 01:01:01'),('2100-12-21 01:01:01');
SELECT toDate(d) AS dd FROM test.foo_c WHERE (dd >= '2019-02-06') AND (toDate(d) <= toDate('2019-08-09')) GROUP BY dd ORDER BY dd;
SELECT toDate(d) FROM test.foo_c WHERE (d > toDate('2019-02-10')) AND (d <= toDate('2022-01-01')) ORDER BY d;
DROP TABLE IF EXISTS test.foo_c;

4
debian/changelog vendored
View File

@ -1,5 +1,5 @@
clickhouse (19.2.0) unstable; urgency=low
clickhouse (19.3.0) unstable; urgency=low
* Modified source code
-- <root@yandex-team.ru> Sat, 09 Feb 2019 14:13:07 +0300
-- <root@yandex-team.ru> Mon, 11 Feb 2019 18:13:23 +0300

View File

@ -71,8 +71,8 @@ Please fix this and reinstall this package." >&2
exit 1
fi
if [ -x "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG" ]; then
CLICKHOUSE_DATADIR_FROM_CONFIG=$(su -s $SHELL ${CLICKHOUSE_USER} -c "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path")
if [ -x "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG" ] && [ -f "$CLICKHOUSE_CONFIG" ]; then
CLICKHOUSE_DATADIR_FROM_CONFIG=$(su -s $SHELL ${CLICKHOUSE_USER} -c "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path") ||:
echo "Path to data directory in ${CLICKHOUSE_CONFIG}: ${CLICKHOUSE_DATADIR_FROM_CONFIG}"
fi
CLICKHOUSE_DATADIR_FROM_CONFIG=${CLICKHOUSE_DATADIR_FROM_CONFIG=$CLICKHOUSE_DATADIR}

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/"
ARG version=19.2.0
ARG version=19.3.0
RUN apt-get update \
&& apt-get install --yes --no-install-recommends \

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/"
ARG version=19.2.0
ARG version=19.3.0
ARG gosu_ver=1.10
RUN apt-get update \

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/"
ARG version=19.2.0
ARG version=19.3.0
RUN apt-get update && \
apt-get install -y apt-transport-https dirmngr && \

View File

@ -31,7 +31,7 @@ Ok.
0 rows in set. Elapsed: 0.012 sec.
:) INSERT INTO t_null VALUES (1, NULL)
:) INSERT INTO t_null VALUES (1, NULL), (2, 3)
INSERT INTO t_null VALUES

View File

@ -2,16 +2,16 @@
You can monitor:
- Hardware resources utilization.
- Utilization of hardware resources.
- ClickHouse server metrics.
## Resources Utilization
## Resource Utilization
ClickHouse does not monitor the state of hardware resources by itself.
It is highly recommended to set up monitoring for:
- Processors load and temperature.
- Load and temperature on processors.
You can use [dmesg](https://en.wikipedia.org/wiki/Dmesg), [turbostat](https://www.linux.org/docs/man8/turbostat.html) or other instruments.
@ -26,12 +26,12 @@ To track server events use server logs. See the [logger](#server_settings-logger
ClickHouse collects:
- Different metrics of how the server uses computational resources.
- Common statistics of queries processing.
- Common statistics on query processing.
You can find metrics in tables [system.metrics](#system_tables-metrics), [system.events](#system_tables-events) и [system.asynchronous_metrics](#system_tables-asynchronous_metrics).
You can find metrics in the [system.metrics](#system_tables-metrics), [system.events](#system_tables-events), and [system.asynchronous_metrics](#system_tables-asynchronous_metrics) tables.
You can configure ClickHouse to export metrics to [Graphite](https://github.com/graphite-project). See the [Graphite section](server_settings/settings.md#server_settings-graphite) of ClickHouse server configuration file. Before configuring metrics export, you should set up Graphite by following their official guide https://graphite.readthedocs.io/en/latest/install.html.
You can configure ClickHouse to export metrics to [Graphite](https://github.com/graphite-project). See the [Graphite section](server_settings/settings.md#server_settings-graphite) in the ClickHouse server configuration file. Before configuring export of metrics, you should set up Graphite by following their official guide https://graphite.readthedocs.io/en/latest/install.html.
Also, you can monitor server availability through the HTTP API. Send the `HTTP GET` request to `/`. If server available, it answers `200 OK`.
Additionally, you can monitor server availability through the HTTP API. Send the `HTTP GET` request to `/`. If the server is available, it responds with `200 OK`.
To monitor servers in a cluster configuration, you should set [max_replica_delay_for_distributed_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries) parameter and use HTTP resource `/replicas-delay`. Request to `/replicas-delay` returns `200 OK` if the replica is available and does not delay behind others. If replica delays, it returns the information about the gap.
To monitor servers in a cluster configuration, you should set the [max_replica_delay_for_distributed_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries) parameter and use the HTTP resource `/replicas-delay`. A request to `/replicas-delay` returns `200 OK` if the replica is available and is not delayed behind the other replicas. If a replica is delayed, it returns information about the gap.

View File

@ -2,20 +2,20 @@
## CPU
In case of installation from prebuilt deb-packages use CPU with x86_64 architecture and SSE 4.2 instructions support. To run ClickHouse with processors than does not support SSE 4.2 or has AArch64 or PowerPC64LE architecture, you should build ClickHouse from sources.
For installation from prebuilt deb packages, use a CPU with x86_64 architecture and support for SSE 4.2 instructions. To run ClickHouse with processors than do not support SSE 4.2 or have AArch64 or PowerPC64LE architecture, you should build ClickHouse from sources.
ClickHouse implements parallel data processing and uses all the hardware resources available. When choosing a processor, take into account that ClickHouse works more efficient at configurations with a large number of cores but lower clock rate than at configurations with fewer cores and a higher clock rate. For example, 16 cores with 2600 MHz is preferable than 8 cores with 3600 MHz.
ClickHouse implements parallel data processing and uses all the hardware resources available. When choosing a processor, take into account that ClickHouse works more efficiently at configurations with a large number of cores but a lower clock rate than at configurations with fewer cores and a higher clock rate. For example, 16 cores with 2600 MHz is preferable to 8 cores with 3600 MHz.
Use of **Turbo Boost** and **hyper-threading** technologies is recommended. It significantly improves performance with a typical load.
## RAM
We recommend to use 4GB of RAM as minimum to be able to perform non-trivial queries. The ClickHouse server can run with a much smaller amount of RAM, but it requires memory for queries processing.
We recommend to use a minimum of 4GB of RAM in order to perform non-trivial queries. The ClickHouse server can run with a much smaller amount of RAM, but it requires memory for processing queries.
The required volume of RAM depends on:
- The complexity of queries.
- Amount of the data, that processed in queries.
- The amount of data that is processed in queries.
To calculate the required volume of RAM, you should estimate the size of temporary data for [GROUP BY](../query_language/select.md#select-group-by-clause), [DISTINCT](../query_language/select.md#select-distinct), [JOIN](../query_language/select.md#select-join) and other operations you use.
@ -31,24 +31,24 @@ You need to have 2GB of free disk space to install ClickHouse.
The volume of storage required for your data should be calculated separately. Assessment should include:
- Estimation of a data volume.
- Estimation of the data volume.
You can take the sample of the data and get an average size of a row from it. Then multiply the value with a number of rows you plan to store.
You can take a sample of the data and get the average size of a row from it. Then multiply the value by the number of rows you plan to store.
- Data compression coefficient.
- The data compression coefficient.
To estimate the data compression coefficient, load some sample of your data into ClickHouse and compare the actual size of the data with the size of the table stored. For example, clickstream data are usually compressed by 6-10 times.
To estimate the data compression coefficient, load a sample of your data into ClickHouse and compare the actual size of the data with the size of the table stored. For example, clickstream data is usually compressed by 6-10 times.
To calculate the final volume of data to be stored, apply the compression coefficient to the estimated data volume. If you plan to store data in several replicas, then multiply estimated volume with the number of replicas.
To calculate the final volume of data to be stored, apply the compression coefficient to the estimated data volume. If you plan to store data in several replicas, then multiply the estimated volume by the number of replicas.
## Network
If possible, use networks of 10G of higher class.
If possible, use networks of 10G or higher class.
A bandwidth of the network is critical for processing of distributed queries with a large amount of intermediate data. Also, network speed affects replication processes.
The network bandwidth is critical for processing distributed queries with a large amount of intermediate data. In addition, network speed affects replication processes.
## Software
ClickHouse is developed for Linux family of operating systems. The recommended Linux distribution is Ubuntu. The `tzdata` package should be installed in the system.
ClickHouse is developed for the Linux family of operating systems. The recommended Linux distribution is Ubuntu. The `tzdata` package should be installed in the system.
ClickHouse also can work in other families of operating systems. See details in [Getting started](../getting_started/index.md) section of the documentation.
ClickHouse can also work in other operating system families. See details in the [Getting started](../getting_started/index.md) section of the documentation.

View File

@ -1,8 +1,8 @@
# StripeLog
Engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](log_family.md) article.
This engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](log_family.md) article.
Use this engine in scenarios, when you need to write many tables with the small amount of data (less than 1 million rows).
Use this engine in scenarios when you need to write many tables with a small amount of data (less than 1 million rows).
## Creating a Table {#table_engines-stripelog-creating-a-table}
@ -15,13 +15,13 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
) ENGINE = StripeLog
```
See the detailed description of [CREATE TABLE](../../query_language/create.md#create-table-query) query.
See the detailed description of the [CREATE TABLE](../../query_language/create.md#create-table-query) query.
## Writing the Data {#table_engines-stripelog-writing-the-data}
The `StripeLog` engine stores all the columns in one file. The `Log` and `TinyLog` engines store columns in separate files. For each `INSERT` query, ClickHouse appends data block to the end of a table file, writing columns one by one.
The `StripeLog` engine stores all the columns in one file. For each `INSERT` query, ClickHouse appends the data block to the end of a table file, writing columns one by one.
For each table ClickHouse writes two files:
For each table ClickHouse writes the files:
- `data.bin` — Data file.
- `index.mrk` — File with marks. Marks contain offsets for each column of each data block inserted.
@ -30,7 +30,7 @@ The `StripeLog` engine does not support the `ALTER UPDATE` and `ALTER DELETE` op
## Reading the Data {#table_engines-stripelog-reading-the-data}
File with marks allows ClickHouse parallelize the reading of data. This means that `SELECT` query returns rows in an unpredictable order. Use the `ORDER BY` clause to sort rows.
The file with marks allows ClickHouse to parallelize the reading of data. This means that a `SELECT` query returns rows in an unpredictable order. Use the `ORDER BY` clause to sort rows.
## Example of Use {#table_engines-stripelog-example-of-use}
@ -53,9 +53,9 @@ INSERT INTO stripe_log_table VALUES (now(),'REGULAR','The first regular message'
INSERT INTO stripe_log_table VALUES (now(),'REGULAR','The second regular message'),(now(),'WARNING','The first warning message')
```
We used two `INSERT` queries to create two data block inside the `data.bin` file.
We used two `INSERT` queries to create two data blocks inside the `data.bin` file.
When selecting data, ClickHouse uses multiple threads. Each thread reads the separate data block and returns resulting rows independently as it finished. It causes that the order of blocks of rows in the output does not match the order of the same blocks in the input in the most cases. For example:
ClickHouse uses multiple threads when selecting data. Each thread reads a separate data block and returns resulting rows independently as it finishes. As a result, the order of blocks of rows in the output does not match the order of the same blocks in the input in most cases. For example:
```sql
SELECT * FROM stripe_log_table

View File

@ -15,9 +15,9 @@ The restriction can also be set externally at the datacenter level. You can use
## RAM
For small amounts of data (up to \~200 GB compressed), it is best to use as much memory as the volume of data.
For small amounts of data (up to ~200 GB compressed), it is best to use as much memory as the volume of data.
For large amounts of data and when processing interactive (online) queries, you should use a reasonable amount of RAM (128 GB or more) so the hot data subset will fit in the cache of pages.
Even for data volumes of \~50 TB per server, using 128 GB of RAM significantly improves query performance compared to 64 GB.
Even for data volumes of ~50 TB per server, using 128 GB of RAM significantly improves query performance compared to 64 GB.
Do not disable overcommit. The value `cat /proc/sys/vm/overcommit_memory` should be 0 or 1. Run
```

View File

@ -2,15 +2,15 @@
- [Installation](#troubleshooting-installation-errors)
- [Connecting to the server](#troubleshooting-accepts-no-connections)
- [Queries processing](#troubleshooting-does-not-process-queries)
- [Efficiency of queries processing](#troubleshooting-too-slow)
- [Query processing](#troubleshooting-does-not-process-queries)
- [Efficiency of query processing](#troubleshooting-too-slow)
## Installation {#troubleshooting-installation-errors}
### You Can Not Get Deb-packages from ClickHouse Repository With apt-get
### You Cannot Get Deb Packages from ClickHouse Repository With apt-get
- Check firewall settings.
- If you can not access the repository by any reason, download packages as described in the [Getting started](../getting_started/index.md) article and install them manually with `sudo dpkg -i <packages>` command. Also, you need `tzdata` package.
- If you cannot access the repository for any reason, download packages as described in the [Getting started](../getting_started/index.md) article and install them manually using the `sudo dpkg -i <packages>` command. You will also need the `tzdata` package.
## Connecting to the Server {#troubleshooting-accepts-no-connections}
@ -40,24 +40,24 @@ sudo service clickhouse-server start
The main log of `clickhouse-server` is in `/var/log/clickhouse-server/clickhouse-server.log` by default.
In case of successful start you should see the strings:
If the server started successfully, you should see the strings:
- `<Information> Application: starting up.` — Server started to run.
- `<Information> Application: Ready for connections.` — Server runs and ready for connections.
- `<Information> Application: starting up.` — Server started.
- `<Information> Application: Ready for connections.` — Server is running and ready for connections.
If `clickhouse-server` start failed by the configuration error you should see the `<Error>` string with an error description. For example:
If `clickhouse-server` start failed with a configuration error, you should see the `<Error>` string with an error description. For example:
```
2019.01.11 15:23:25.549505 [ 45 ] {} <Error> ExternalDictionaries: Failed reloading 'event2id' external dictionary: Poco::Exception. Code: 1000, e.code() = 111, e.displayText() = Connection refused, e.what() = Connection refused
```
If you don't see an error at the end of file look through all the file from the string:
If you don't see an error at the end of the file, look through the entire file starting from the string:
```
<Information> Application: starting up.
```
If you try to start the second instance of `clickhouse-server` at the server you see the following log:
If you try to start a second instance of `clickhouse-server` on the server, you see the following log:
```
2019.01.11 15:25:11.151730 [ 1 ] {} <Information> : Starting ClickHouse 19.1.0 with revision 54413
@ -75,7 +75,7 @@ Revision: 54413
**See system.d logs**
If there is no any useful information in `clickhouse-server` logs or there is no any logs, you can see `system.d` logs by the command:
If you don't find any useful information in `clickhouse-server` logs or there aren't any logs, you can view `system.d` logs using the command:
```
sudo journalctl -u clickhouse-server
@ -87,7 +87,7 @@ sudo journalctl -u clickhouse-server
sudo -u clickhouse /usr/bin/clickhouse-server --config-file /etc/clickhouse-server/config.xml
```
This command starts the server as an interactive app with standard parameters of autostart script. In this mode `clickhouse-server` prints all the event messages into the console.
This command starts the server as an interactive app with standard parameters of the autostart script. In this mode `clickhouse-server` prints all the event messages in the console.
### Configuration Parameters
@ -95,7 +95,7 @@ Check:
- Docker settings.
If you run ClickHouse in Docker in IPv6 network, make sure that `network=host` is set.
If you run ClickHouse in Docker in an IPv6 network, make sure that `network=host` is set.
- Endpoint settings.
@ -105,7 +105,7 @@ Check:
- HTTP protocol settings.
Check protocol settings for HTTP API.
Check protocol settings for the HTTP API.
- Secure connection settings.
@ -114,27 +114,27 @@ Check:
- The `tcp_port_secure` setting.
- Settings for SSL sertificates.
Use proper parameters while connecting. For example, use parameter `port_secure` with `clickhouse_client`.
Use proper parameters while connecting. For example, use the `port_secure` parameter with `clickhouse_client`.
- User settings.
You may use the wrong user name or password for it.
You might be using the wrong user name or password.
## Queries Processing {#troubleshooting-does-not-process-queries}
## Query Processing {#troubleshooting-does-not-process-queries}
If ClickHouse can not process the query, it sends the description of an error to the client. In the `clickhouse-client` you get a description of an error in console. If you use HTTP interface, ClickHouse sends error description in response body. For example,
If ClickHouse is not able to process the query, it sends an error description to the client. In the `clickhouse-client` you get a description of the error in the console. If you are using the HTTP interface, ClickHouse sends the error description in the response body. For example:
```bash
$ curl 'http://localhost:8123/' --data-binary "SELECT a"
Code: 47, e.displayText() = DB::Exception: Unknown identifier: a. Note that there are no tables (FROM clause) in your query, context: required_names: 'a' source_tables: table_aliases: private_aliases: column_aliases: public_columns: 'a' masked_columns: array_join_columns: source_columns: , e.what() = DB::Exception
```
If you start `clickhouse-client` with `stack-trace` parameter, ClickHouse returns server stack trace with the description of an error.
If you start `clickhouse-client` with the `stack-trace` parameter, ClickHouse returns the server stack trace with the description of an error.
It is possible that you see the message of connection broken. In this case, you can repeat query. If connection brakes any time you perform the query you should check the server logs for errors.
You might see a message about a broken connection. In this case, you can repeat the query. If the connection breaks every time you perform the query, check the server logs for errors.
## Efficiency of Queries Processing {#troubleshooting-too-slow}
## Efficiency of Query Processing {#troubleshooting-too-slow}
If you see that ClickHouse works too slow, you need to profile the load of the server resources and network for your queries.
If you see that ClickHouse is working too slowly, you need to profile the load on the server resources and network for your queries.
You can use clickhouse-benchmark utility to profile queries. It shows the number of queries processed in a second, the number of rows processed in a second and percentiles of query processing times.
You can use the clickhouse-benchmark utility to profile queries. It shows the number of queries processed per second, the number of rows processed per second, and percentiles of query processing times.

View File

@ -1,6 +1,6 @@
# ClickHouse Update
If ClickHouse is installed from deb-packages, execute the following commands on the server:
If ClickHouse was installed from deb packages, execute the following commands on the server:
```
sudo apt-get update
@ -8,6 +8,6 @@ sudo apt-get install clickhouse-client clickhouse-server
sudo service clickhouse-server restart
```
If you installed ClickHouse not from recommended deb-packages, use corresponding methods of update.
If you installed ClickHouse using something other than the recommended deb packages, use the appropriate update method.
ClickHouse does not support a distributed update. The operation should be performed consecutively at each separate server. Do not update all the servers on cluster at the same time, otherwise cluster became unavailable for some time.
ClickHouse does not support a distributed update. The operation should be performed consecutively on each separate server. Do not update all the servers on a cluster simultaneously, or the cluster will be unavailable for some time.

View File

@ -488,13 +488,13 @@ SELECT arrayDifference([1, 2, 3, 4])
Takes an array, returns an array containing the different elements in all the arrays. For example:
```sql
SELECT arrayDifference([1, 2, 3, 4])
SELECT arrayDistinct([1, 2, 2, 3, 1])
```
```
┌─arrayDifference([1, 2, 3, 4])─┐
│ [0,1,1,1]
└───────────────────────────────┘
┌─arrayDistinct([1, 2, 2, 3, 1])─┐
│ [1,2,3]
└───────────────────────────────
```
## arrayEnumerateDense(arr)

View File

@ -1,7 +1,6 @@
# Functions for searching strings
# Functions for Searching Strings
The search is case-sensitive in all these functions.
The search substring or regular expression must be a constant in all these functions.
The search is case-sensitive by default in all these functions. There are separate variants for case insensitive search.
## position(haystack, needle), locate(haystack, needle)

View File

@ -242,7 +242,7 @@ Prints a table containing the columns:
**query_id** The query identifier. Non-empty only if it was explicitly defined by the user. For distributed processing, the query ID is not passed to remote servers.
This query is identical to: `SELECT * FROM system.processes [INTO OUTFILE filename] [FORMAT format]`.
This query is nearly identical to: `SELECT * FROM system.processes`. The difference is that the `SHOW PROCESSLIST` query does not show itself in a list, when the `SELECT .. FROM system.processes` query does.
Tip (execute in the console):

View File

@ -1,3 +1,19 @@
## Fixed in ClickHouse Release 18.12.13, 2018-09-10
### CVE-2018-14672
Functions for loading CatBoost models allowed path traversal and reading arbitrary files through error messages.
Credits: Andrey Krasichkov of Yandex Information Security Team
## Fixed in ClickHouse Release 18.10.3, 2018-08-13
### CVE-2018-14671
unixODBC allowed loading arbitrary shared objects from the file system which led to a Remote Code Execution vulnerability.
Credits: Andrey Krasichkov and Evgeny Sidorov of Yandex Information Security Team
## Fixed in ClickHouse Release 1.1.54388, 2018-06-28
### CVE-2018-14668

View File

@ -38,7 +38,7 @@ Ok.
0 rows in set. Elapsed: 0.012 sec.
:) INSERT INTO t_null VALUES (1, NULL)
:) INSERT INTO t_null VALUES (1, NULL), (2, 3)
INSERT INTO t_null VALUES

View File

@ -1,7 +1,9 @@
# Log
Отличается от TinyLog тем, что вместе с файлами столбцов лежит небольшой файл "засечек". Засечки пишутся на каждый блок данных и содержат смещение - с какого места нужно читать файл, чтобы пропустить заданное количество строк. Это позволяет читать данные из таблицы в несколько потоков.
При конкуррентном доступе к данным, чтения могут выполняться одновременно, а записи блокируют чтения и друг друга.
Движок относится к семейству движков Log. Смотрите общие свойства и различия движков в статье [Семейство Log](log_family.md).
Отличается от [TinyLog](tinylog.md) тем, что вместе с файлами столбцов лежит небольшой файл "засечек". Засечки пишутся на каждый блок данных и содержат смещение - с какого места нужно читать файл, чтобы пропустить заданное количество строк. Это позволяет читать данные из таблицы в несколько потоков.
При конкурентном доступе к данным, чтения могут выполняться одновременно, а записи блокируют чтения и друг друга.
Движок Log не поддерживает индексы. Также, если при записи в таблицу произошёл сбой, то таблица станет битой, и чтения из неё будут возвращать ошибку. Движок Log подходит для временных данных, write-once таблиц, а также для тестовых и демонстрационных целей.
[Оригинальная статья](https://clickhouse.yandex/docs/ru/operations/table_engines/log/) <!--hide-->

View File

@ -1 +0,0 @@
../../../en/operations/table_engines/log_family.md

View File

@ -0,0 +1,45 @@
#Семейство Log
Движки разработаны для сценариев, когда необходимо записывать много таблиц с небольшим объемом данных (менее 1 миллиона строк).
Движки семейства:
- [StripeLog](stripelog.md)
- [Log](log.md)
- [TinyLog](tinylog.md)
## Общие свойства
Движки:
- Хранят данные на диске.
- Добавляют данные в конец файла при записи.
- Не поддерживают операции [мутации](../../query_language/alter.md#alter-mutations).
- Не поддерживают индексы.
Это означает, что запросы `SELECT` не эффективны для выборки диапазонов данных.
- Записывают данные не атомарно.
Вы можете получить таблицу с повреждёнными данными, если что-то нарушит операцию записи (например, аварийное завершение работы сервера).
## Отличия
Движки `Log` и `StripeLog` поддерживают:
- Блокировки для конкурентного доступа к данным.
Во время выполнения запроса `INSERT` таблица заблокирована и другие запросы на чтение и запись данных ожидают снятия блокировки. При отсутствии запросов на запись данных можно одновременно выполнять любое количество запросов на чтение данных.
- Параллельное чтение данных.
ClickHouse читает данные в несколько потоков. Каждый поток обрабатывает отдельный блок данных.
Движок `Log` сохраняет каждый столбец таблицы в отдельном файле. Движок `StripeLog` хранит все данные в одном файле. Таким образом, движок `StripeLog` использует меньше дескрипторов в операционной системе, а движок `Log` обеспечивает более эффективное считывание данных.
Движок `TinyLog` самый простой в семье и обеспечивает самые низкие функциональность и эффективность. Движок `TinyLog` не поддерживает ни параллельного чтения данных, ни конкурентного доступа к данным. Он хранит каждый столбец в отдельном файле. Движок читает данные медленнее, чем оба других движка с параллельным чтением, и использует почти столько же дескрипторов, сколько и движок `Log`. Его можно использовать в простых сценариях с низкой нагрузкой.
[Оригинальная статья](https://clickhouse.yandex/docs/ru/operations/table_engines/log_family/) <!--hide-->

View File

@ -1 +0,0 @@
../../../en/operations/table_engines/stripelog.md

View File

@ -0,0 +1,88 @@
# StripeLog
Движок относится к семейству движков Log. Смотрите общие свойства и различия движков в статье [Семейство Log](log_family.md).
Движок разработан для сценариев, когда необходимо записывать много таблиц с небольшим объемом данных (менее 1 миллиона строк).
## Создание таблицы {#table_engines-stripelog-creating-a-table}
```
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
column1_name [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
column2_name [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
) ENGINE = StripeLog
```
Смотрите подробное описание запроса [CREATE TABLE](../../query_language/create.md#create-table-query).
## Запись данных {#table_engines-stripelog-writing-the-data}
Движок `StripeLog` хранит все столбцы в одном файле. При каждом запросе `INSERT`, ClickHouse добавляет блок данных в конец файла таблицы, записывая столбцы один за другим.
Для каждой таблицы ClickHouse записывает файлы:
- `data.bin` — файл с данными.
- `index.mrk` — файл с метками. Метки содержат смещения для каждого столбца каждого вставленного блока данных.
Движок `StripeLog` не поддерживает запросы `ALTER UPDATE` и `ALTER DELETE`.
## Чтение данных {#table_engines-stripelog-reading-the-data}
Файл с метками позволяет ClickHouse распараллеливать чтение данных. Это означает, что запрос `SELECT` возвращает строки в непредсказуемом порядке. Используйте секцию `ORDER BY` для сортировки строк.
## Пример использования {#table_engines-stripelog-example-of-use}
Создание таблицы:
```sql
CREATE TABLE stripe_log_table
(
timestamp DateTime,
message_type String,
message String
)
ENGINE = StripeLog
```
Вставка данных:
```sql
INSERT INTO stripe_log_table VALUES (now(),'REGULAR','The first regular message')
INSERT INTO stripe_log_table VALUES (now(),'REGULAR','The second regular message'),(now(),'WARNING','The first warning message')
```
Мы использовали два запроса `INSERT` для создания двух блоков данных внутри файла `data.bin`.
ClickHouse использует несколько потоков при выборе данных. Каждый поток считывает отдельный блок данных и возвращает результирующие строки независимо по мере завершения. В результате порядок блоков строк в выходных данных в большинстве случаев не совпадает с порядком тех же блоков во входных данных. Например:
```sql
SELECT * FROM stripe_log_table
```
```
┌───────────timestamp─┬─message_type─┬─message────────────────────┐
│ 2019-01-18 14:27:32 │ REGULAR │ The second regular message │
│ 2019-01-18 14:34:53 │ WARNING │ The first warning message │
└─────────────────────┴──────────────┴────────────────────────────┘
┌───────────timestamp─┬─message_type─┬─message───────────────────┐
│ 2019-01-18 14:23:43 │ REGULAR │ The first regular message │
└─────────────────────┴──────────────┴───────────────────────────┘
```
Сортировка результатов (по умолчанию по возрастанию):
```sql
SELECT * FROM stripe_log_table ORDER BY timestamp
```
```
┌───────────timestamp─┬─message_type─┬─message────────────────────┐
│ 2019-01-18 14:23:43 │ REGULAR │ The first regular message │
│ 2019-01-18 14:27:32 │ REGULAR │ The second regular message │
│ 2019-01-18 14:34:53 │ WARNING │ The first warning message │
└─────────────────────┴──────────────┴────────────────────────────┘
```
[Оригинальная статья](https://clickhouse.yandex/docs/ru/operations/table_engines/stripelog/) <!--hide-->

View File

@ -1,13 +1,15 @@
# TinyLog
Движок относится к семейству движков Log. Смотрите общие свойства и различия движков в статье [Семейство Log](log_family.md).
Самый простой движок таблиц, который хранит данные на диске.
Каждый столбец хранится в отдельном сжатом файле.
При записи, данные дописываются в конец файлов.
Конкурентный доступ к данным никак не ограничивается:
- если вы одновременно читаете из таблицы и в другом запросе пишете в неё, то чтение будет завершено с ошибкой;
- если вы одновременно пишете в таблицу в нескольких запросах, то данные будут битыми.
- если вы одновременно читаете из таблицы и в другом запросе пишете в неё, то чтение будет завершено с ошибкой;
- если вы одновременно пишете в таблицу в нескольких запросах, то данные будут битыми.
Типичный способ использования этой таблицы - это write-once: сначала один раз только пишем данные, а потом сколько угодно читаем.
Запросы выполняются в один поток. То есть, этот движок предназначен для сравнительно маленьких таблиц (рекомендуется до 1 000 000 строк).

View File

@ -15,9 +15,9 @@ echo 'performance' | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_gover
## Оперативная память
Для небольших объемов данных (до \~200 Гб в сжатом виде) лучше всего использовать столько памяти не меньше, чем объем данных.
Для небольших объемов данных (до ~200 Гб в сжатом виде) лучше всего использовать столько памяти не меньше, чем объем данных.
Для больших объемов данных, при выполнении интерактивных (онлайн) запросов, стоит использовать разумный объем оперативной памяти (128 Гб или более) для того, чтобы горячее подмножество данных поместилось в кеше страниц.
Даже для объемов данных в \~50 Тб на сервер, использование 128 Гб оперативной памяти намного лучше для производительности выполнения запросов, чем 64 Гб.
Даже для объемов данных в ~50 Тб на сервер, использование 128 Гб оперативной памяти намного лучше для производительности выполнения запросов, чем 64 Гб.
Не выключайте overcommit. Значение `cat /proc/sys/vm/overcommit_memory` должно быть 0 or 1. Выполните:

View File

@ -1,7 +1,6 @@
# Функции поиска в строках
Во всех функциях, поиск регистрозависимый.
Во всех функциях, подстрока для поиска или регулярное выражение, должно быть константой.
Во всех функциях, поиск регистрозависимый по-умолчанию. Существуют варианты функций для регистронезависимого поиска.
## position(haystack, needle)
Поиск подстроки `needle` в строке `haystack`.

View File

@ -221,7 +221,7 @@ SHOW DATABASES [INTO OUTFILE filename] [FORMAT format]
SHOW PROCESSLIST [INTO OUTFILE filename] [FORMAT format]
```
Выводит список запросов, выполняющихся в данный момент времени, кроме запросов `SHOW PROCESSLIST`.
Выводит список запросов, выполняющихся в данный момент времени, кроме самих запросов `SHOW PROCESSLIST`.
Выдаёт таблицу, содержащую столбцы:
@ -239,7 +239,7 @@ SHOW PROCESSLIST [INTO OUTFILE filename] [FORMAT format]
**query_id** - идентификатор запроса. Непустой, только если был явно задан пользователем. При распределённой обработке запроса идентификатор запроса не передаётся на удалённые серверы.
Запрос полностью аналогичен запросу: `SELECT * FROM system.processes [INTO OUTFILE filename] [FORMAT format]`.
Этот запрос аналогичен запросу `SELECT * FROM system.processes` за тем исключением, что последний отображает список запросов, включая самого себя.
Полезный совет (выполните в консоли):

View File

@ -69,8 +69,8 @@ nav:
- 'CollapsingMergeTree': 'operations/table_engines/collapsingmergetree.md'
- 'VersionedCollapsingMergeTree': 'operations/table_engines/versionedcollapsingmergetree.md'
- 'GraphiteMergeTree': 'operations/table_engines/graphitemergetree.md'
- 'Log Family':
- 'Introduction': 'operations/table_engines/log_family.md'
- 'Семейство Log':
- 'Введение': 'operations/table_engines/log_family.md'
- 'StripeLog': 'operations/table_engines/stripelog.md'
- 'Log': 'operations/table_engines/log.md'
- 'TinyLog': 'operations/table_engines/tinylog.md'

View File

@ -31,7 +31,7 @@ def test_single_page(input_path, lang):
logging.info('Link to nowhere: %s' % href)
if duplicate_anchor_points:
logging.error('Found %d duplicate anchor points' % duplicate_anchor_points)
logging.warning('Found %d duplicate anchor points' % duplicate_anchor_points)
if links_to_nowhere:
logging.error('Found %d links to nowhere' % links_to_nowhere)
sys.exit(10)

View File

@ -283,15 +283,7 @@ public:
inline time_t toStartOfMinute(time_t t) const { return t / 60 * 60; }
inline time_t toStartOfFiveMinute(time_t t) const { return t / 300 * 300; }
inline time_t toStartOfFifteenMinutes(time_t t) const { return t / 900 * 900; }
inline time_t toStartOfTenMinutes(time_t t) const
{
if (offset_is_whole_number_of_hours_everytime)
return t / 600 * 600;
time_t date = find(t).date;
return date + (t - date) / 600 * 600;
}
inline time_t toStartOfTenMinutes(time_t t) const { return t / 600 * 600; }
inline time_t toStartOfHour(time_t t) const
{
@ -434,6 +426,71 @@ public:
return toRelativeMinuteNum(lut[d].date);
}
inline DayNum toStartOfYearInterval(DayNum d, UInt64 years) const
{
if (years == 1)
return toFirstDayNumOfYear(d);
return years_lut[(lut[d].year - DATE_LUT_MIN_YEAR) / years * years];
}
inline DayNum toStartOfQuarterInterval(DayNum d, UInt64 quarters) const
{
if (quarters == 1)
return toFirstDayNumOfQuarter(d);
return toStartOfMonthInterval(d, quarters * 3);
}
inline DayNum toStartOfMonthInterval(DayNum d, UInt64 months) const
{
if (months == 1)
return toFirstDayNumOfMonth(d);
const auto & date = lut[d];
UInt32 month_total_index = (date.year - DATE_LUT_MIN_YEAR) * 12 + date.month - 1;
return years_months_lut[month_total_index / months * months];
}
inline DayNum toStartOfWeekInterval(DayNum d, UInt64 weeks) const
{
if (weeks == 1)
return toFirstDayNumOfWeek(d);
UInt64 days = weeks * 7;
// January 1st 1970 was Thursday so we need this 4-days offset to make weeks start on Monday.
return DayNum(4 + (d - 4) / days * days);
}
inline time_t toStartOfDayInterval(DayNum d, UInt64 days) const
{
if (days == 1)
return toDate(d);
return lut[d / days * days].date;
}
inline time_t toStartOfHourInterval(time_t t, UInt64 hours) const
{
if (hours == 1)
return toStartOfHour(t);
UInt64 seconds = hours * 3600;
t = t / seconds * seconds;
if (offset_is_whole_number_of_hours_everytime)
return t;
return toStartOfHour(t);
}
inline time_t toStartOfMinuteInterval(time_t t, UInt64 minutes) const
{
if (minutes == 1)
return toStartOfMinute(t);
UInt64 seconds = 60 * minutes;
return t / seconds * seconds;
}
inline time_t toStartOfSecondInterval(time_t t, UInt64 seconds) const
{
if (seconds == 1)
return t;
return t / seconds * seconds;
}
/// Create DayNum from year, month, day of month.
inline DayNum makeDayNum(UInt16 year, UInt8 month, UInt8 day_of_month) const
{