Merge branch 'master' of github.com:yandex/ClickHouse

This commit is contained in:
alesapin 2019-01-18 14:48:18 +03:00
commit d8b01bec80
69 changed files with 26160 additions and 940 deletions

View File

@ -1,7 +1,7 @@
# Use Ninja instead of Unix Makefiles by default.
# https://stackoverflow.com/questions/11269833/cmake-selecting-a-generator-within-cmakelists-txt
#
# Reason: it have better startup time than make and it parallelize jobs more uniformly.
# Reason: it has better startup time than make and it parallelizes jobs more uniformly.
# (when comparing to make with Makefiles that was generated by CMake)
#
# How to install Ninja on Ubuntu:

View File

@ -2,10 +2,10 @@
set(VERSION_REVISION 54413)
set(VERSION_MAJOR 19)
set(VERSION_MINOR 1)
set(VERSION_PATCH 0)
set(VERSION_GITHASH 014e344a36bc19a58621e0add379984cf62b9067)
set(VERSION_DESCRIBE v19.1.0-testing)
set(VERSION_STRING 19.1.0)
set(VERSION_PATCH 1)
set(VERSION_GITHASH 4e7747117123f5a1b027a64865844b4faa10447d)
set(VERSION_DESCRIBE v19.1.1-testing)
set(VERSION_STRING 19.1.1)
# end of autochange
set(VERSION_EXTRA "" CACHE STRING "")

View File

@ -235,6 +235,11 @@ private:
actions.clear();
actions.emplace_back(PatternActionType::KleeneStar);
dfa_states.clear();
dfa_states.emplace_back(true);
pattern_has_time = false;
const char * pos = pattern.data();
const char * begin = pos;
const char * end = pos + pattern.size();
@ -285,6 +290,7 @@ private:
actions.back().type != PatternActionType::KleeneStar)
throw Exception{"Temporal condition should be preceeded by an event condition", ErrorCodes::BAD_ARGUMENTS};
pattern_has_time = true;
actions.emplace_back(type, duration);
}
else
@ -299,6 +305,9 @@ private:
throw Exception{"Event number " + toString(event_number) + " is out of range", ErrorCodes::BAD_ARGUMENTS};
actions.emplace_back(PatternActionType::SpecificEvent, event_number - 1);
dfa_states.back().transition = DFATransition::SpecificEvent;
dfa_states.back().event = event_number - 1;
dfa_states.emplace_back();
}
if (!match(")"))
@ -306,17 +315,88 @@ private:
}
else if (match(".*"))
{
actions.emplace_back(PatternActionType::KleeneStar);
dfa_states.back().has_kleene = true;
}
else if (match("."))
{
actions.emplace_back(PatternActionType::AnyEvent);
dfa_states.back().transition = DFATransition::AnyEvent;
dfa_states.emplace_back();
}
else
throw_exception("Could not parse pattern, unexpected starting symbol");
}
}
protected:
/// Uses a DFA based approach in order to better handle patterns without
/// time assertions.
///
/// NOTE: This implementation relies on the assumption that the pattern are *small*.
///
/// This algorithm performs in O(mn) (with m the number of DFA states and N the number
/// of events) with a memory consumption and memory allocations in O(m). It means that
/// if n >>> m (which is expected to be the case), this algorithm can be considered linear.
template <typename T>
bool match(T & events_it, const T events_end) const
bool dfaMatch(T & events_it, const T events_end) const
{
using ActiveStates = std::vector<bool>;
/// Those two vectors keep track of which states should be considered for the current
/// event as well as the states which should be considered for the next event.
ActiveStates active_states(dfa_states.size(), false);
ActiveStates next_active_states(dfa_states.size(), false);
active_states[0] = true;
/// Keeps track of dead-ends in order not to iterate over all the events to realize that
/// the match failed.
size_t n_active = 1;
for (/* empty */; events_it != events_end && n_active > 0 && !active_states.back(); ++events_it)
{
n_active = 0;
next_active_states.assign(dfa_states.size(), false);
for (size_t state = 0; state < dfa_states.size(); ++state)
{
if (!active_states[state])
{
continue;
}
switch (dfa_states[state].transition)
{
case DFATransition::None:
break;
case DFATransition::AnyEvent:
next_active_states[state + 1] = true;
++n_active;
break;
case DFATransition::SpecificEvent:
if (events_it->second.test(dfa_states[state].event))
{
next_active_states[state + 1] = true;
++n_active;
}
break;
}
if (dfa_states[state].has_kleene)
{
next_active_states[state] = true;
++n_active;
}
}
swap(active_states, next_active_states);
}
return active_states.back();
}
template <typename T>
bool backtrackingMatch(T & events_it, const T events_end) const
{
const auto action_begin = std::begin(actions);
const auto action_end = std::end(actions);
@ -445,10 +525,53 @@ protected:
return action_it == action_end;
}
private:
enum class DFATransition : char
{
/// .-------.
/// | |
/// `-------'
None,
/// .-------. (?[0-9])
/// | | ----------
/// `-------'
SpecificEvent,
/// .-------. .
/// | | ----------
/// `-------'
AnyEvent,
};
struct DFAState
{
DFAState(bool has_kleene = false)
: has_kleene{has_kleene}, event{0}, transition{DFATransition::None}
{}
/// .-------.
/// | | - - -
/// `-------'
/// |_^
bool has_kleene;
/// In the case of a state transitions with a `SpecificEvent`,
/// `event` contains the value of the event.
uint32_t event;
/// The kind of transition out of this state.
DFATransition transition;
};
using DFAStates = std::vector<DFAState>;
protected:
/// `True` if the parsed pattern contains time assertions (?t...), `false` otherwise.
bool pattern_has_time;
private:
std::string pattern;
size_t arg_count;
PatternActions actions;
DFAStates dfa_states;
};
@ -471,7 +594,8 @@ public:
const auto events_end = std::end(data_ref.events_list);
auto events_it = events_begin;
static_cast<ColumnUInt8 &>(to).getData().push_back(match(events_it, events_end));
bool match = pattern_has_time ? backtrackingMatch(events_it, events_end) : dfaMatch(events_it, events_end);
static_cast<ColumnUInt8 &>(to).getData().push_back(match);
}
};
@ -501,7 +625,7 @@ private:
auto events_it = events_begin;
size_t count = 0;
while (events_it != events_end && match(events_it, events_end))
while (events_it != events_end && backtrackingMatch(events_it, events_end))
++count;
return count;

View File

@ -1,15 +1,17 @@
#pragma once
#include <Common/StringSearcher.h>
#include <Common/StringUtils/StringUtils.h>
#include <algorithm>
#include <vector>
#include <stdint.h>
#include <string.h>
#include <Columns/ColumnString.h>
#include <Core/Types.h>
#include <Poco/UTF8Encoding.h>
#include <Poco/Unicode.h>
#include <Common/StringSearcher.h>
#include <Common/StringUtils/StringUtils.h>
#include <common/StringRef.h>
#include <common/unaligned.h>
#include <ext/range.h>
#include <stdint.h>
#include <string.h>
/** Search for a substring in a string by Volnitsky's algorithm
* http://volnitsky.com/project/str_search/
@ -28,117 +30,38 @@
* - if it did not match, we check the next cell of the hash table from the collision resolution chain;
* - if not found, skip to haystack almost the size of the needle bytes;
*
* Unaligned memory access is used.
* MultiVolnitsky - search for multiple substrings in a string:
* - Add bigrams to hash table with string index. Then the usual Volnitsky search is used.
* - We are adding while searching, limiting the number of fallback searchers and the total number of added bigrams
*/
namespace DB
{
/// @todo store lowercase needle to speed up in case there are numerous occurrences of bigrams from needle in haystack
template <typename CRTP>
class VolnitskyBase
namespace VolnitskyTraits
{
protected:
using Offset = UInt8; /// Offset in the needle. For the basic algorithm, the length of the needle must not be greater than 255.
using Id = UInt8; /// Index of the string (within the array of multiple needles), must not be greater than 255.
using Ngram = UInt16; /// n-gram (2 bytes).
const UInt8 * const needle;
const size_t needle_size;
const UInt8 * const needle_end = needle + needle_size;
/// For how long we move, if the n-gram from haystack is not found in the hash table.
const size_t step = needle_size - sizeof(Ngram) + 1;
/** max needle length is 255, max distinct ngrams for case-sensitive is (255 - 1), case-insensitive is 4 * (255 - 1)
* storage of 64K ngrams (n = 2, 128 KB) should be large enough for both cases */
static const size_t hash_size = 64 * 1024; /// Fits into the L2 cache (of common Intel CPUs).
Offset hash[hash_size]; /// Hash table.
/** Fits into the L2 cache (of common Intel CPUs).
* This number is extremely good for compilers as it is numeric_limits<Uint16>::max() and there are optimizations with movzwl and other instructions with 2 bytes
*/
static constexpr size_t hash_size = 64 * 1024;
/// min haystack size to use main algorithm instead of fallback
static constexpr auto min_haystack_size_for_algorithm = 20000;
const bool fallback; /// Do we need to use the fallback algorithm.
static constexpr size_t min_haystack_size_for_algorithm = 20000;
public:
/** haystack_size_hint - the expected total size of the haystack for `search` calls. Optional (zero means unspecified).
* If you specify it small enough, the fallback algorithm will be used,
* since it is considered that it's useless to waste time initializing the hash table.
*/
VolnitskyBase(const char * const needle, const size_t needle_size, size_t haystack_size_hint = 0)
: needle{reinterpret_cast<const UInt8 *>(needle)}, needle_size{needle_size},
fallback{
needle_size < 2 * sizeof(Ngram)
|| needle_size >= std::numeric_limits<Offset>::max()
|| (haystack_size_hint && haystack_size_hint < min_haystack_size_for_algorithm)}
static inline bool isFallbackNeedle(const size_t needle_size, size_t haystack_size_hint = 0)
{
if (fallback)
return;
memset(hash, 0, sizeof(hash));
/// int is used here because unsigned can't be used with condition like `i >= 0`, unsigned always >= 0
for (auto i = static_cast<int>(needle_size - sizeof(Ngram)); i >= 0; --i)
self().putNGram(this->needle + i, i + 1, this->needle);
return needle_size < 2 * sizeof(Ngram) || needle_size >= std::numeric_limits<Offset>::max()
|| (haystack_size_hint && haystack_size_hint < min_haystack_size_for_algorithm);
}
static inline Ngram toNGram(const UInt8 * const pos) { return unalignedLoad<Ngram>(pos); }
/// If not found, the end of the haystack is returned.
const UInt8 * search(const UInt8 * const haystack, const size_t haystack_size) const
{
if (needle_size == 0)
return haystack;
const auto haystack_end = haystack + haystack_size;
if (needle_size == 1 || fallback || haystack_size <= needle_size)
return self().search_fallback(haystack, haystack_end);
/// Let's "apply" the needle to the haystack and compare the n-gram from the end of the needle.
const auto * pos = haystack + needle_size - sizeof(Ngram);
for (; pos <= haystack_end - needle_size; pos += step)
{
/// We look at all the cells of the hash table that can correspond to the n-gram from haystack.
for (size_t cell_num = toNGram(pos) % hash_size; hash[cell_num];
cell_num = (cell_num + 1) % hash_size)
{
/// When found - compare bytewise, using the offset from the hash table.
const auto res = pos - (hash[cell_num] - 1);
if (self().compare(res))
return res;
}
}
/// The remaining tail.
return self().search_fallback(pos - step + 1, haystack_end);
}
const char * search(const char * haystack, size_t haystack_size) const
{
return reinterpret_cast<const char *>(search(reinterpret_cast<const UInt8 *>(haystack), haystack_size));
}
protected:
CRTP & self() { return static_cast<CRTP &>(*this); }
const CRTP & self() const { return const_cast<VolnitskyBase *>(this)->self(); }
static Ngram toNGram(const UInt8 * const pos)
{
return unalignedLoad<Ngram>(pos);
}
void putNGramBase(const Ngram ngram, const int offset)
{
/// Put the offset for the n-gram in the corresponding cell or the nearest free cell.
size_t cell_num = ngram % hash_size;
while (hash[cell_num])
cell_num = (cell_num + 1) % hash_size; /// Search for the next free cell.
hash[cell_num] = offset;
}
void putNGramASCIICaseInsensitive(const UInt8 * const pos, const int offset)
template <typename Callback>
static inline void putNGramASCIICaseInsensitive(const UInt8 * const pos, const int offset, const Callback & putNGramBase)
{
struct Chars
{
@ -186,74 +109,21 @@ protected:
/// 1 combination: 01
putNGramBase(n, offset);
}
};
template <bool CaseSensitive, bool ASCII> struct VolnitskyImpl;
/// Case sensitive comparison
template <bool ASCII> struct VolnitskyImpl<true, ASCII> : VolnitskyBase<VolnitskyImpl<true, ASCII>>
{
VolnitskyImpl(const char * const needle_, const size_t needle_size_, const size_t haystack_size_hint = 0)
: VolnitskyBase<VolnitskyImpl<true, ASCII>>{needle_, needle_size_, haystack_size_hint},
fallback_searcher{needle_, needle_size_}
template <bool CaseSensitive, bool ASCII, typename Callback>
static inline void putNGram(const UInt8 * const pos, const int offset, [[maybe_unused]] const UInt8 * const begin, const Callback & putNGramBase)
{
}
void putNGram(const UInt8 * const pos, const int offset, const UInt8 * const /*begin*/)
if constexpr (CaseSensitive)
{
this->putNGramBase(this->toNGram(pos), offset);
putNGramBase(toNGram(pos), offset);
}
bool compare(const UInt8 * const pos) const
else
{
/// @todo: maybe just use memcmp for this case and rely on internal SSE optimization as in case with memcpy?
return fallback_searcher.compare(pos);
}
const UInt8 * search_fallback(const UInt8 * const haystack, const UInt8 * const haystack_end) const
if constexpr (ASCII)
{
return fallback_searcher.search(haystack, haystack_end);
putNGramASCIICaseInsensitive(pos, offset, putNGramBase);
}
ASCIICaseSensitiveStringSearcher fallback_searcher;
};
/// Case-insensitive ASCII
template <> struct VolnitskyImpl<false, true> : VolnitskyBase<VolnitskyImpl<false, true>>
{
VolnitskyImpl(const char * const needle_, const size_t needle_size_, const size_t haystack_size_hint = 0)
: VolnitskyBase{needle_, needle_size_, haystack_size_hint}, fallback_searcher{needle_, needle_size_}
{
}
void putNGram(const UInt8 * const pos, const int offset, const UInt8 * const /*begin*/)
{
putNGramASCIICaseInsensitive(pos, offset);
}
bool compare(const UInt8 * const pos) const
{
return fallback_searcher.compare(pos);
}
const UInt8 * search_fallback(const UInt8 * const haystack, const UInt8 * const haystack_end) const
{
return fallback_searcher.search(haystack, haystack_end);
}
ASCIICaseInsensitiveStringSearcher fallback_searcher;
};
/// Case-sensitive UTF-8
template <> struct VolnitskyImpl<false, false> : VolnitskyBase<VolnitskyImpl<false, false>>
{
VolnitskyImpl(const char * const needle_, const size_t needle_size_, const size_t haystack_size_hint = 0)
: VolnitskyBase{needle_, needle_size_, haystack_size_hint}, fallback_searcher{needle_, needle_size_}
{
}
void putNGram(const UInt8 * const pos, const int offset, const UInt8 * const begin)
else
{
struct Chars
{
@ -263,16 +133,14 @@ template <> struct VolnitskyImpl<false, false> : VolnitskyBase<VolnitskyImpl<fal
union
{
Ngram n;
VolnitskyTraits::Ngram n;
Chars chars;
};
n = toNGram(pos);
if (isascii(chars.c0) && isascii(chars.c1))
{
putNGramASCIICaseInsensitive(pos, offset);
}
putNGramASCIICaseInsensitive(pos, offset, putNGramBase);
else
{
/** n-gram (in the case of n = 2)
@ -435,25 +303,389 @@ template <> struct VolnitskyImpl<false, false> : VolnitskyBase<VolnitskyImpl<fal
}
}
}
}
}
}
bool compare(const UInt8 * const pos) const
/// @todo store lowercase needle to speed up in case there are numerous occurrences of bigrams from needle in haystack
template <bool CaseSensitive, bool ASCII, typename FallbackSearcher>
class VolnitskyBase
{
protected:
const UInt8 * const needle;
const size_t needle_size;
const UInt8 * const needle_end = needle + needle_size;
/// For how long we move, if the n-gram from haystack is not found in the hash table.
const size_t step = needle_size - sizeof(VolnitskyTraits::Ngram) + 1;
/** max needle length is 255, max distinct ngrams for case-sensitive is (255 - 1), case-insensitive is 4 * (255 - 1)
* storage of 64K ngrams (n = 2, 128 KB) should be large enough for both cases */
VolnitskyTraits::Offset hash[VolnitskyTraits::hash_size]; /// Hash table.
const bool fallback; /// Do we need to use the fallback algorithm.
FallbackSearcher fallback_searcher;
public:
/** haystack_size_hint - the expected total size of the haystack for `search` calls. Optional (zero means unspecified).
* If you specify it small enough, the fallback algorithm will be used,
* since it is considered that it's useless to waste time initializing the hash table.
*/
VolnitskyBase(const char * const needle, const size_t needle_size, size_t haystack_size_hint = 0)
: needle{reinterpret_cast<const UInt8 *>(needle)}
, needle_size{needle_size}
, fallback{VolnitskyTraits::isFallbackNeedle(needle_size, haystack_size_hint)}
, fallback_searcher{needle, needle_size}
{
return fallback_searcher.compare(pos);
if (fallback)
return;
memset(hash, 0, sizeof(hash));
auto callback = [this](const VolnitskyTraits::Ngram ngram, const int offset) { return this->putNGramBase(ngram, offset); };
/// ssize_t is used here because unsigned can't be used with condition like `i >= 0`, unsigned always >= 0
for (auto i = static_cast<ssize_t>(needle_size - sizeof(VolnitskyTraits::Ngram)); i >= 0; --i)
VolnitskyTraits::putNGram<CaseSensitive, ASCII>(this->needle + i, i + 1, this->needle, callback);
}
const UInt8 * search_fallback(const UInt8 * const haystack, const UInt8 * const haystack_end) const
/// If not found, the end of the haystack is returned.
const UInt8 * search(const UInt8 * const haystack, const size_t haystack_size) const
{
if (needle_size == 0)
return haystack;
const auto haystack_end = haystack + haystack_size;
if (fallback || haystack_size <= needle_size)
return fallback_searcher.search(haystack, haystack_end);
/// Let's "apply" the needle to the haystack and compare the n-gram from the end of the needle.
const auto * pos = haystack + needle_size - sizeof(VolnitskyTraits::Ngram);
for (; pos <= haystack_end - needle_size; pos += step)
{
/// We look at all the cells of the hash table that can correspond to the n-gram from haystack.
for (size_t cell_num = VolnitskyTraits::toNGram(pos) % VolnitskyTraits::hash_size; hash[cell_num];
cell_num = (cell_num + 1) % VolnitskyTraits::hash_size)
{
/// When found - compare bytewise, using the offset from the hash table.
const auto res = pos - (hash[cell_num] - 1);
/// pointer in the code is always padded array so we can use pagesafe semantics
if (fallback_searcher.compare(res))
return res;
}
}
UTF8CaseInsensitiveStringSearcher fallback_searcher;
return fallback_searcher.search(pos - step + 1, haystack_end);
}
const char * search(const char * haystack, size_t haystack_size) const
{
return reinterpret_cast<const char *>(search(reinterpret_cast<const UInt8 *>(haystack), haystack_size));
}
protected:
void putNGramBase(const VolnitskyTraits::Ngram ngram, const int offset)
{
/// Put the offset for the n-gram in the corresponding cell or the nearest free cell.
size_t cell_num = ngram % VolnitskyTraits::hash_size;
while (hash[cell_num])
cell_num = (cell_num + 1) % VolnitskyTraits::hash_size; /// Search for the next free cell.
hash[cell_num] = offset;
}
};
using Volnitsky = VolnitskyImpl<true, true>;
using VolnitskyUTF8 = VolnitskyImpl<true, false>; /// exactly same as Volnitsky
using VolnitskyCaseInsensitive = VolnitskyImpl<false, true>; /// ignores non-ASCII bytes
using VolnitskyCaseInsensitiveUTF8 = VolnitskyImpl<false, false>;
template <bool CaseSensitive, bool ASCII, typename FallbackSearcher>
class MultiVolnitskyBase
{
private:
/// needles and their offsets
const std::vector<StringRef> & needles;
/// fallback searchers
std::vector<size_t> fallback_needles;
std::vector<FallbackSearcher> fallback_searchers;
/// because std::pair<> is not POD
struct OffsetId
{
VolnitskyTraits::Id id;
VolnitskyTraits::Offset off;
};
OffsetId hash[VolnitskyTraits::hash_size];
/// step for each bunch of strings
size_t step;
/// last index of offsets that was not processed
size_t last;
/// limit for adding to hashtable. In worst case with case insentive search, the table will be filled at most as half
static constexpr size_t small_limit = VolnitskyTraits::hash_size / 8;
public:
MultiVolnitskyBase(const std::vector<StringRef> & needles_) : needles{needles_}, step{0}, last{0}
{
fallback_searchers.reserve(needles.size());
}
template <typename ResultType, typename AnsCallback>
void searchAll(
const ColumnString::Chars & haystack_data,
const ColumnString::Offsets & haystack_offsets,
const AnsCallback & ansCallback,
ResultType & ans)
{
const size_t haystack_string_size = haystack_offsets.size();
const size_t needles_size = needles.size();
/// something can be uninitialized after
std::fill(ans.begin(), ans.end(), 0);
while (!reset())
{
size_t fallback_size = fallback_needles.size();
size_t prev_offset = 0;
for (size_t j = 0, from = 0; j < haystack_string_size; ++j, from += needles_size)
{
const auto * haystack = &haystack_data[prev_offset];
const auto * haystack_end = haystack + haystack_offsets[j] - prev_offset - 1;
for (size_t i = 0; i < fallback_size; ++i)
{
const UInt8 * ptr = fallback_searchers[fallback_needles[i]].search(haystack, haystack_end);
if (ptr != haystack_end)
ans[from + fallback_needles[i]] = ansCallback(haystack, ptr);
}
/// check if we have one non empty volnitsky searcher
if (step != std::numeric_limits<size_t>::max())
{
const auto * pos = haystack + step - sizeof(VolnitskyTraits::Ngram);
for (; pos <= haystack_end - sizeof(VolnitskyTraits::Ngram); pos += step)
{
for (size_t cell_num = VolnitskyTraits::toNGram(pos) % VolnitskyTraits::hash_size; hash[cell_num].off;
cell_num = (cell_num + 1) % VolnitskyTraits::hash_size)
{
if (pos >= haystack + hash[cell_num].off - 1)
{
const auto * res = pos - (hash[cell_num].off - 1);
const size_t ind = hash[cell_num].id;
if (ans[from + ind] == 0 && res + needles[ind].size <= haystack_end)
{
if (fallback_searchers[ind].compare(res))
{
ans[from + ind] = ansCallback(haystack, res);
}
}
}
}
}
}
prev_offset = haystack_offsets[j];
}
}
}
template <typename ResultType>
void search(const ColumnString::Chars & haystack_data, const ColumnString::Offsets & haystack_offsets, ResultType & ans)
{
auto callback = [this](const UInt8 * haystack, const UInt8 * haystack_end) -> bool
{
return this->searchOne(haystack, haystack_end);
};
searchInternal(haystack_data, haystack_offsets, callback, ans);
}
template <typename ResultType>
void searchIndex(const ColumnString::Chars & haystack_data, const ColumnString::Offsets & haystack_offsets, ResultType & ans)
{
auto callback = [this](const UInt8 * haystack, const UInt8 * haystack_end) -> size_t
{
return this->searchOneIndex(haystack, haystack_end);
};
searchInternal(haystack_data, haystack_offsets, callback, ans);
}
private:
/**
* This function is needed to initialize hash table
* Returns `true` if there is nothing to initialize
* and `false` if we have something to initialize and initializes it.
* This function is a kind of fallback if there are many needles.
* We actually destroy the hash table and initialize it with uninitialized needles
* and search through the haystack again.
* The actual usage of this function is like this:
* while (!reset())
* {
* search inside the haystack with the known needles
* }
*/
bool reset()
{
if (last == needles.size())
return true;
memset(hash, 0, sizeof(hash));
fallback_needles.clear();
step = std::numeric_limits<size_t>::max();
size_t buf = 0;
size_t size = needles.size();
for (; last < size; ++last)
{
const char * cur_needle_data = needles[last].data;
const size_t cur_needle_size = needles[last].size;
/// save the indices of fallback searchers
if (VolnitskyTraits::isFallbackNeedle(cur_needle_size))
{
fallback_needles.push_back(last);
}
else
{
/// put all bigrams
auto callback = [this](const VolnitskyTraits::Ngram ngram, const int offset)
{
return this->putNGramBase(ngram, offset, this->last);
};
buf += cur_needle_size - sizeof(VolnitskyTraits::Ngram) + 1;
/// this is the condition when we actually need to stop and start searching with known needles
if (buf > small_limit)
break;
step = std::min(step, cur_needle_size - sizeof(VolnitskyTraits::Ngram) + 1);
for (auto i = static_cast<int>(cur_needle_size - sizeof(VolnitskyTraits::Ngram)); i >= 0; --i)
{
VolnitskyTraits::putNGram<CaseSensitive, ASCII>(
reinterpret_cast<const UInt8 *>(cur_needle_data) + i,
i + 1,
reinterpret_cast<const UInt8 *>(cur_needle_data),
callback);
}
}
fallback_searchers.emplace_back(cur_needle_data, cur_needle_size);
}
return false;
}
template <typename OneSearcher, typename ResultType>
inline void searchInternal(
const ColumnString::Chars & haystack_data,
const ColumnString::Offsets & haystack_offsets,
const OneSearcher & searchFallback,
ResultType & ans)
{
const size_t haystack_string_size = haystack_offsets.size();
while (!reset())
{
size_t prev_offset = 0;
for (size_t j = 0; j < haystack_string_size; ++j)
{
const auto * haystack = &haystack_data[prev_offset];
const auto * haystack_end = haystack + haystack_offsets[j] - prev_offset - 1;
ans[j] = searchFallback(haystack, haystack_end);
prev_offset = haystack_offsets[j];
}
}
}
inline bool searchOne(const UInt8 * haystack, const UInt8 * haystack_end) const
{
const size_t fallback_size = fallback_needles.size();
for (size_t i = 0; i < fallback_size; ++i)
if (fallback_searchers[fallback_needles[i]].search(haystack, haystack_end) != haystack_end)
return true;
/// check if we have one non empty volnitsky searcher
if (step != std::numeric_limits<size_t>::max())
{
const auto * pos = haystack + step - sizeof(VolnitskyTraits::Ngram);
for (; pos <= haystack_end - sizeof(VolnitskyTraits::Ngram); pos += step)
{
for (size_t cell_num = VolnitskyTraits::toNGram(pos) % VolnitskyTraits::hash_size; hash[cell_num].off;
cell_num = (cell_num + 1) % VolnitskyTraits::hash_size)
{
if (pos >= haystack + hash[cell_num].off - 1)
{
const auto res = pos - (hash[cell_num].off - 1);
const size_t ind = hash[cell_num].id;
if (res + needles[ind].size <= haystack_end && fallback_searchers[ind].compare(res))
return true;
}
}
}
}
return false;
}
inline size_t searchOneIndex(const UInt8 * haystack, const UInt8 * haystack_end) const
{
const size_t fallback_size = fallback_needles.size();
size_t ans = std::numeric_limits<size_t>::max();
for (size_t i = 0; i < fallback_size; ++i)
if (fallback_searchers[fallback_needles[i]].search(haystack, haystack_end) != haystack_end)
ans = std::min(ans, fallback_needles[i]);
/// check if we have one non empty volnitsky searcher
if (step != std::numeric_limits<size_t>::max())
{
const auto * pos = haystack + step - sizeof(VolnitskyTraits::Ngram);
for (; pos <= haystack_end - sizeof(VolnitskyTraits::Ngram); pos += step)
{
for (size_t cell_num = VolnitskyTraits::toNGram(pos) % VolnitskyTraits::hash_size; hash[cell_num].off;
cell_num = (cell_num + 1) % VolnitskyTraits::hash_size)
{
if (pos >= haystack + hash[cell_num].off - 1)
{
const auto res = pos - (hash[cell_num].off - 1);
const size_t ind = hash[cell_num].id;
if (res + needles[ind].size <= haystack_end && fallback_searchers[ind].compare(res))
ans = std::min(ans, ind);
}
}
}
}
/*
* if nothing was found, ans + 1 will be equal to zero and we can
* assign it into the result because we need to return the position starting with one
*/
return ans + 1;
}
void putNGramBase(const VolnitskyTraits::Ngram ngram, const int offset, const size_t num)
{
size_t cell_num = ngram % VolnitskyTraits::hash_size;
while (hash[cell_num].off)
cell_num = (cell_num + 1) % VolnitskyTraits::hash_size;
hash[cell_num] = {static_cast<VolnitskyTraits::Id>(num), static_cast<VolnitskyTraits::Offset>(offset)};
}
};
using Volnitsky = VolnitskyBase<true, true, ASCIICaseSensitiveStringSearcher>;
using VolnitskyUTF8 = VolnitskyBase<true, false, ASCIICaseSensitiveStringSearcher>; /// exactly same as Volnitsky
using VolnitskyCaseInsensitive = VolnitskyBase<false, true, ASCIICaseInsensitiveStringSearcher>; /// ignores non-ASCII bytes
using VolnitskyCaseInsensitiveUTF8 = VolnitskyBase<false, false, UTF8CaseInsensitiveStringSearcher>;
using MultiVolnitsky = MultiVolnitskyBase<true, true, ASCIICaseSensitiveStringSearcher>;
using MultiVolnitskyUTF8 = MultiVolnitskyBase<true, false, ASCIICaseSensitiveStringSearcher>;
using MultiVolnitskyCaseInsensitive = MultiVolnitskyBase<false, true, ASCIICaseInsensitiveStringSearcher>;
using MultiVolnitskyCaseInsensitiveUTF8 = MultiVolnitskyBase<false, false, UTF8CaseInsensitiveStringSearcher>;
}

View File

@ -61,6 +61,7 @@ struct Request
{
Request() = default;
Request(const Request &) = default;
Request & operator=(const Request &) = default;
virtual ~Request() = default;
virtual String getPath() const = 0;
virtual void addRootPath(const String & /* root_path */) {}
@ -76,6 +77,7 @@ struct Response
int32_t error = 0;
Response() = default;
Response(const Response &) = default;
Response & operator=(const Response &) = default;
virtual ~Response() = default;
virtual void removeRootPath(const String & /* root_path */) {}
};

View File

@ -23,6 +23,7 @@ namespace ErrorCodes
extern const int BAD_TYPE_OF_FIELD;
extern const int BAD_ARGUMENTS;
extern const int THERE_IS_NO_COLUMN;
extern const int LOGICAL_ERROR;
}
static String getSchemaPath(const String & schema_dir, const String & schema_file)
@ -39,7 +40,7 @@ CapnProtoRowInputStream::NestedField split(const Block & header, size_t i)
if (name.size() > 0 && name[0] == '.')
name.erase(0, 1);
boost::split(field.tokens, name, boost::is_any_of("."));
boost::split(field.tokens, name, boost::is_any_of("._"));
return field;
}
@ -109,44 +110,62 @@ capnp::StructSchema::Field getFieldOrThrow(capnp::StructSchema node, const std::
throw Exception("Field " + field + " doesn't exist in schema " + node.getShortDisplayName().cStr(), ErrorCodes::THERE_IS_NO_COLUMN);
}
void CapnProtoRowInputStream::createActions(const NestedFieldList & sortedFields, capnp::StructSchema reader)
{
String last;
size_t level = 0;
capnp::StructSchema::Field parent;
for (const auto & field : sortedFields)
void CapnProtoRowInputStream::createActions(const NestedFieldList & sorted_fields, capnp::StructSchema reader)
{
/// Columns in a table can map to fields in Cap'n'Proto or to structs.
/// Store common parents and their tokens in order to backtrack.
std::vector<capnp::StructSchema::Field> parents;
std::vector<std::string> parent_tokens;
capnp::StructSchema cur_reader = reader;
for (const auto & field : sorted_fields)
{
// Move to a different field in the same structure, keep parent
if (level > 0 && field.tokens[level - 1] != last)
if (field.tokens.empty())
throw Exception("Logical error in CapnProtoRowInputStream", ErrorCodes::LOGICAL_ERROR);
// Backtrack to common parent
while (field.tokens.size() < parent_tokens.size() + 1
|| !std::equal(parent_tokens.begin(), parent_tokens.end(), field.tokens.begin()))
{
auto child = getFieldOrThrow(parent.getContainingStruct(), field.tokens[level - 1]);
reader = child.getType().asStruct();
actions.push_back({Action::POP});
actions.push_back({Action::PUSH, child});
}
// Descend to a nested structure
for (; level < field.tokens.size() - 1; ++level)
parents.pop_back();
parent_tokens.pop_back();
if (parents.empty())
{
auto node = getFieldOrThrow(reader, field.tokens[level]);
cur_reader = reader;
break;
}
else
cur_reader = parents.back().getType().asStruct();
}
// Go forward
while (parent_tokens.size() + 1 < field.tokens.size())
{
const auto & token = field.tokens[parents.size()];
auto node = getFieldOrThrow(cur_reader, token);
if (node.getType().isStruct())
{
// Descend to field structure
last = field.tokens[level];
parent = node;
reader = parent.getType().asStruct();
actions.push_back({Action::PUSH, parent});
parents.emplace_back(node);
parent_tokens.emplace_back(token);
cur_reader = node.getType().asStruct();
actions.push_back({Action::PUSH, node});
}
else if (node.getType().isList())
{
break; // Collect list
}
else
throw Exception("Field " + field.tokens[level] + "is neither Struct nor List", ErrorCodes::BAD_TYPE_OF_FIELD);
throw Exception("Field " + token + " is neither Struct nor List", ErrorCodes::BAD_TYPE_OF_FIELD);
}
// Read field from the structure
auto node = getFieldOrThrow(reader, field.tokens[level]);
auto node = getFieldOrThrow(cur_reader, field.tokens[parents.size()]);
if (node.getType().isList() && actions.size() > 0 && actions.back().field == node)
{
// The field list here flattens Nested elements into multiple arrays
@ -168,7 +187,6 @@ void CapnProtoRowInputStream::createActions(const NestedFieldList & sortedFields
CapnProtoRowInputStream::CapnProtoRowInputStream(ReadBuffer & istr_, const Block & header_, const String & schema_dir, const String & schema_file, const String & root_object)
: istr(istr_), header(header_), parser(std::make_shared<SchemaParser>())
{
// Parse the schema and fetch the root object
#pragma GCC diagnostic push
@ -188,14 +206,8 @@ CapnProtoRowInputStream::CapnProtoRowInputStream(ReadBuffer & istr_, const Block
for (size_t i = 0; i < num_columns; ++i)
list.push_back(split(header, i));
// Reorder list to make sure we don't have to backtrack
std::sort(list.begin(), list.end(), [](const NestedField & a, const NestedField & b)
{
if (a.tokens.size() == b.tokens.size())
return a.tokens < b.tokens;
return a.tokens.size() < b.tokens.size();
});
// Order list first by value of strings then by length of string vector.
std::sort(list.begin(), list.end(), [](const NestedField & a, const NestedField & b) { return a.tokens < b.tokens; });
createActions(list, root);
}

View File

@ -1,28 +1,28 @@
#include <Functions/FunctionsStringSearch.h>
#include <memory>
#include <mutex>
#include <Poco/UTF8String.h>
#include <Columns/ColumnFixedString.h>
#include <Common/Volnitsky.h>
#include <Common/config.h>
#include <DataTypes/DataTypeFixedString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/Regexps.h>
#include <IO/WriteHelpers.h>
#include <Common/config.h>
#include <re2/re2.h>
#include <re2/stringpiece.h>
#include <Poco/UTF8String.h>
#include <Common/Volnitsky.h>
#include <algorithm>
#include <memory>
#if USE_RE2_ST
#include <re2_st/re2.h> // Y_IGNORE
# include <re2_st/re2.h> // Y_IGNORE
#else
#define re2_st re2
# define re2_st re2
#endif
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
@ -35,7 +35,10 @@ namespace ErrorCodes
struct PositionCaseSensitiveASCII
{
/// For searching single substring inside big-enough contiguous chunk of data. Coluld have slightly expensive initialization.
using SearcherInBigHaystack = VolnitskyImpl<true, true>;
using SearcherInBigHaystack = Volnitsky;
/// For search many substrings in one string
using MultiSearcherInBigHaystack = MultiVolnitsky;
/// For searching single substring, that is different each time. This object is created for each row of data. It must have cheap initialization.
using SearcherInSmallHaystack = LibCASCIICaseSensitiveStringSearcher;
@ -50,23 +53,24 @@ struct PositionCaseSensitiveASCII
return SearcherInSmallHaystack(needle_data, needle_size);
}
/// Number of code points between 'begin' and 'end' (this has different behaviour for ASCII and UTF-8).
static size_t countChars(const char * begin, const char * end)
static MultiSearcherInBigHaystack createMultiSearcherInBigHaystack(const std::vector<StringRef> & needles)
{
return end - begin;
return MultiSearcherInBigHaystack(needles);
}
/// Number of code points between 'begin' and 'end' (this has different behaviour for ASCII and UTF-8).
static size_t countChars(const char * begin, const char * end) { return end - begin; }
/// Convert string to lowercase. Only for case-insensitive search.
/// Implementation is permitted to be inefficient because it is called for single string.
static void toLowerIfNeed(std::string &)
{
}
static void toLowerIfNeed(std::string &) {}
};
struct PositionCaseInsensitiveASCII
{
/// `Volnitsky` is not used here, because one person has measured that this is better. It will be good if you question it.
using SearcherInBigHaystack = ASCIICaseInsensitiveStringSearcher;
using MultiSearcherInBigHaystack = MultiVolnitskyCaseInsensitive;
using SearcherInSmallHaystack = LibCASCIICaseInsensitiveStringSearcher;
static SearcherInBigHaystack createSearcherInBigHaystack(const char * needle_data, size_t needle_size, size_t /*haystack_size_hint*/)
@ -79,20 +83,20 @@ struct PositionCaseInsensitiveASCII
return SearcherInSmallHaystack(needle_data, needle_size);
}
static size_t countChars(const char * begin, const char * end)
static MultiSearcherInBigHaystack createMultiSearcherInBigHaystack(const std::vector<StringRef> & needles)
{
return end - begin;
return MultiSearcherInBigHaystack(needles);
}
static void toLowerIfNeed(std::string & s)
{
std::transform(std::begin(s), std::end(s), std::begin(s), tolower);
}
static size_t countChars(const char * begin, const char * end) { return end - begin; }
static void toLowerIfNeed(std::string & s) { std::transform(std::begin(s), std::end(s), std::begin(s), tolower); }
};
struct PositionCaseSensitiveUTF8
{
using SearcherInBigHaystack = VolnitskyImpl<true, false>;
using SearcherInBigHaystack = VolnitskyUTF8;
using MultiSearcherInBigHaystack = MultiVolnitskyUTF8;
using SearcherInSmallHaystack = LibCASCIICaseSensitiveStringSearcher;
static SearcherInBigHaystack createSearcherInBigHaystack(const char * needle_data, size_t needle_size, size_t haystack_size_hint)
@ -105,6 +109,11 @@ struct PositionCaseSensitiveUTF8
return SearcherInSmallHaystack(needle_data, needle_size);
}
static MultiSearcherInBigHaystack createMultiSearcherInBigHaystack(const std::vector<StringRef> & needles)
{
return MultiSearcherInBigHaystack(needles);
}
static size_t countChars(const char * begin, const char * end)
{
size_t res = 0;
@ -114,14 +123,13 @@ struct PositionCaseSensitiveUTF8
return res;
}
static void toLowerIfNeed(std::string &)
{
}
static void toLowerIfNeed(std::string &) {}
};
struct PositionCaseInsensitiveUTF8
{
using SearcherInBigHaystack = VolnitskyImpl<false, false>;
using SearcherInBigHaystack = VolnitskyCaseInsensitiveUTF8;
using MultiSearcherInBigHaystack = MultiVolnitskyCaseInsensitiveUTF8;
using SearcherInSmallHaystack = UTF8CaseInsensitiveStringSearcher; /// TODO Very suboptimal.
static SearcherInBigHaystack createSearcherInBigHaystack(const char * needle_data, size_t needle_size, size_t haystack_size_hint)
@ -134,6 +142,11 @@ struct PositionCaseInsensitiveUTF8
return SearcherInSmallHaystack(needle_data, needle_size);
}
static MultiSearcherInBigHaystack createMultiSearcherInBigHaystack(const std::vector<StringRef> & needles)
{
return MultiSearcherInBigHaystack(needles);
}
static size_t countChars(const char * begin, const char * end)
{
size_t res = 0;
@ -143,10 +156,7 @@ struct PositionCaseInsensitiveUTF8
return res;
}
static void toLowerIfNeed(std::string & s)
{
Poco::UTF8::toLowerInPlace(s);
}
static void toLowerIfNeed(std::string & s) { Poco::UTF8::toLowerInPlace(s); }
};
template <typename Impl>
@ -155,10 +165,8 @@ struct PositionImpl
using ResultType = UInt64;
/// Find one substring in many strings.
static void vector_constant(const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
const std::string & needle,
PaddedPODArray<UInt64> & res)
static void vector_constant(
const ColumnString::Chars & data, const ColumnString::Offsets & offsets, const std::string & needle, PaddedPODArray<UInt64> & res)
{
const UInt8 * begin = data.data();
const UInt8 * pos = begin;
@ -210,7 +218,8 @@ struct PositionImpl
}
/// Search each time for a different single substring inside each time different string.
static void vector_vector(const ColumnString::Chars & haystack_data,
static void vector_vector(
const ColumnString::Chars & haystack_data,
const ColumnString::Offsets & haystack_offsets,
const ColumnString::Chars & needle_data,
const ColumnString::Offsets & needle_offsets,
@ -234,8 +243,8 @@ struct PositionImpl
else
{
/// It is assumed that the StringSearcher is not very difficult to initialize.
typename Impl::SearcherInSmallHaystack searcher
= Impl::createSearcherInSmallHaystack(reinterpret_cast<const char *>(&needle_data[prev_needle_offset]),
typename Impl::SearcherInSmallHaystack searcher = Impl::createSearcherInSmallHaystack(
reinterpret_cast<const char *>(&needle_data[prev_needle_offset]),
needle_offsets[i] - prev_needle_offset - 1); /// zero byte at the end
/// searcher returns a pointer to the found substring or to the end of `haystack`.
@ -244,7 +253,9 @@ struct PositionImpl
if (pos != haystack_size)
{
res[i] = 1 + Impl::countChars(reinterpret_cast<const char *>(&haystack_data[prev_haystack_offset]),
res[i] = 1
+ Impl::countChars(
reinterpret_cast<const char *>(&haystack_data[prev_haystack_offset]),
reinterpret_cast<const char *>(&haystack_data[prev_haystack_offset + pos]));
}
else
@ -256,8 +267,9 @@ struct PositionImpl
}
}
/// Find many substrings in one line.
static void constant_vector(const String & haystack,
/// Find many substrings in single string.
static void constant_vector(
const String & haystack,
const ColumnString::Chars & needle_data,
const ColumnString::Offsets & needle_offsets,
PaddedPODArray<UInt64> & res)
@ -281,7 +293,8 @@ struct PositionImpl
typename Impl::SearcherInSmallHaystack searcher = Impl::createSearcherInSmallHaystack(
reinterpret_cast<const char *>(&needle_data[prev_needle_offset]), needle_offsets[i] - prev_needle_offset - 1);
size_t pos = searcher.search(reinterpret_cast<const UInt8 *>(haystack.data()),
size_t pos = searcher.search(
reinterpret_cast<const UInt8 *>(haystack.data()),
reinterpret_cast<const UInt8 *>(haystack.data()) + haystack.size())
- reinterpret_cast<const UInt8 *>(haystack.data());
@ -298,6 +311,56 @@ struct PositionImpl
}
};
template <typename Impl>
struct MultiPositionImpl
{
using ResultType = UInt64;
static void vector_constant(
const ColumnString::Chars & haystack_data,
const ColumnString::Offsets & haystack_offsets,
const std::vector<StringRef> & needles,
PaddedPODArray<UInt64> & res)
{
auto resCallback = [](const UInt8 * start, const UInt8 * end) -> UInt64
{
return 1 + Impl::countChars(reinterpret_cast<const char *>(start), reinterpret_cast<const char *>(end));
};
Impl::createMultiSearcherInBigHaystack(needles).searchAll(haystack_data, haystack_offsets, resCallback, res);
}
};
template <typename Impl>
struct MultiSearchImpl
{
using ResultType = UInt64;
static void vector_constant(
const ColumnString::Chars & haystack_data,
const ColumnString::Offsets & haystack_offsets,
const std::vector<StringRef> & needles,
PaddedPODArray<UInt64> & res)
{
Impl::createMultiSearcherInBigHaystack(needles).search(haystack_data, haystack_offsets, res);
}
};
template <typename Impl>
struct FirstMatchImpl
{
using ResultType = UInt64;
static void vector_constant(
const ColumnString::Chars & haystack_data,
const ColumnString::Offsets & haystack_offsets,
const std::vector<StringRef> & needles,
PaddedPODArray<UInt64> & res)
{
Impl::createMultiSearcherInBigHaystack(needles).searchIndex(haystack_data, haystack_offsets, res);
}
};
/// Is the LIKE expression reduced to finding a substring in a string?
inline bool likePatternIsStrstr(const String & pattern, String & res)
@ -348,10 +411,8 @@ struct MatchImpl
{
using ResultType = UInt8;
static void vector_constant(const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
const std::string & pattern,
PaddedPODArray<UInt8> & res)
static void vector_constant(
const ColumnString::Chars & data, const ColumnString::Offsets & offsets, const std::string & pattern, PaddedPODArray<UInt8> & res)
{
if (offsets.empty())
return;
@ -467,13 +528,14 @@ struct MatchImpl
size_t str_size = (i != 0 ? offsets[i] - offsets[i - 1] : offsets[0]) - 1;
/** Even in the case of `required_substring_is_prefix` use UNANCHORED check for regexp,
* so that it can match when `required_substring` occurs into the line several times,
* so that it can match when `required_substring` occurs into the string several times,
* and at the first occurrence, the regexp is not a match.
*/
if (required_substring_is_prefix)
res[i] = revert
^ regexp->getRE2()->Match(re2_st::StringPiece(str_data, str_size),
^ regexp->getRE2()->Match(
re2_st::StringPiece(str_data, str_size),
reinterpret_cast<const char *>(pos) - str_data,
str_size,
re2_st::RE2::UNANCHORED,
@ -504,13 +566,15 @@ struct MatchImpl
res = revert ^ regexp->match(data);
}
template <typename... Args> static void vector_vector(Args &&...)
template <typename... Args>
static void vector_vector(Args &&...)
{
throw Exception("Functions 'like' and 'match' don't support non-constant needle argument", ErrorCodes::ILLEGAL_COLUMN);
}
/// Search different needles in single haystack.
template <typename... Args> static void constant_vector(Args &&...)
template <typename... Args>
static void constant_vector(Args &&...)
{
throw Exception("Functions 'like' and 'match' don't support non-constant needle argument", ErrorCodes::ILLEGAL_COLUMN);
}
@ -519,7 +583,8 @@ struct MatchImpl
struct ExtractImpl
{
static void vector(const ColumnString::Chars & data,
static void vector(
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
const std::string & pattern,
ColumnString::Chars & res_data,
@ -613,16 +678,17 @@ struct ReplaceRegexpImpl
for (const auto & it : instructions)
if (it.first >= num_captures)
throw Exception("Invalid replace instruction in replacement string. Id: " + toString(it.first) + ", but regexp has only "
+ toString(num_captures - 1)
+ " subpatterns",
throw Exception(
"Invalid replace instruction in replacement string. Id: " + toString(it.first) + ", but regexp has only "
+ toString(num_captures - 1) + " subpatterns",
ErrorCodes::BAD_ARGUMENTS);
return instructions;
}
static void processString(const re2_st::StringPiece & input,
static void processString(
const re2_st::StringPiece & input,
ColumnString::Chars & res_data,
ColumnString::Offset & res_offset,
re2_st::RE2 & searcher,
@ -687,7 +753,8 @@ struct ReplaceRegexpImpl
}
static void vector(const ColumnString::Chars & data,
static void vector(
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
const std::string & needle,
const std::string & replacement,
@ -715,7 +782,8 @@ struct ReplaceRegexpImpl
}
}
static void vector_fixed(const ColumnString::Chars & data,
static void vector_fixed(
const ColumnString::Chars & data,
size_t n,
const std::string & needle,
const std::string & replacement,
@ -749,7 +817,8 @@ struct ReplaceRegexpImpl
template <bool replace_one = false>
struct ReplaceStringImpl
{
static void vector(const ColumnString::Chars & data,
static void vector(
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
const std::string & needle,
const std::string & replacement,
@ -791,7 +860,7 @@ struct ReplaceStringImpl
if (i == offsets.size())
break;
/// Is it true that this line no longer needs to perform transformations.
/// Is it true that this string no longer needs to perform transformations.
bool can_finish_current_string = false;
/// We check that the entry does not go through the boundaries of strings.
@ -824,7 +893,8 @@ struct ReplaceStringImpl
/// Note: this function converts fixed-length strings to variable-length strings
/// and each variable-length string should ends with zero byte.
static void vector_fixed(const ColumnString::Chars & data,
static void vector_fixed(
const ColumnString::Chars & data,
size_t n,
const std::string & needle,
const std::string & replacement,
@ -851,7 +921,8 @@ struct ReplaceStringImpl
const UInt8 * match = searcher.search(pos, end - pos);
#define COPY_REST_OF_CURRENT_STRING() \
do { \
do \
{ \
const size_t len = begin + n * (i + 1) - pos; \
res_data.resize(res_data.size() + len + 1); \
memcpy(&res_data[res_offset], pos, len); \
@ -878,7 +949,7 @@ struct ReplaceStringImpl
memcpy(&res_data[res_offset], pos, match - pos);
res_offset += (match - pos);
/// Is it true that this line no longer needs to perform conversions.
/// Is it true that this string no longer needs to perform conversions.
bool can_finish_current_string = false;
/// We check that the entry does not pass through the boundaries of strings.
@ -935,20 +1006,11 @@ class FunctionStringReplace : public IFunction
{
public:
static constexpr auto name = Name::name;
static FunctionPtr create(const Context &)
{
return std::make_shared<FunctionStringReplace>();
}
static FunctionPtr create(const Context &) { return std::make_shared<FunctionStringReplace>(); }
String getName() const override
{
return name;
}
String getName() const override { return name; }
size_t getNumberOfArguments() const override
{
return 3;
}
size_t getNumberOfArguments() const override { return 3; }
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; }
@ -956,15 +1018,18 @@ public:
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (!isStringOrFixedString(arguments[0]))
throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName(),
throw Exception(
"Illegal type " + arguments[0]->getName() + " of first argument of function " + getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (!isStringOrFixedString(arguments[1]))
throw Exception("Illegal type " + arguments[1]->getName() + " of second argument of function " + getName(),
throw Exception(
"Illegal type " + arguments[1]->getName() + " of second argument of function " + getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (!isStringOrFixedString(arguments[2]))
throw Exception("Illegal type " + arguments[2]->getName() + " of third argument of function " + getName(),
throw Exception(
"Illegal type " + arguments[2]->getName() + " of third argument of function " + getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return std::make_shared<DataTypeString>();
@ -1025,6 +1090,54 @@ struct NamePositionCaseInsensitiveUTF8
{
static constexpr auto name = "positionCaseInsensitiveUTF8";
};
struct NameMultiPosition
{
static constexpr auto name = "multiPosition";
};
struct NameMultiPositionUTF8
{
static constexpr auto name = "multiPositionUTF8";
};
struct NameMultiPositionCaseInsensitive
{
static constexpr auto name = "multiPositionCaseInsensitive";
};
struct NameMultiPositionCaseInsensitiveUTF8
{
static constexpr auto name = "multiPositionCaseInsensitiveUTF8";
};
struct NameMultiSearch
{
static constexpr auto name = "multiSearch";
};
struct NameMultiSearchUTF8
{
static constexpr auto name = "multiSearchUTF8";
};
struct NameMultiSearchCaseInsensitive
{
static constexpr auto name = "multiSearchCaseInsensitive";
};
struct NameMultiSearchCaseInsensitiveUTF8
{
static constexpr auto name = "multiSearchCaseInsensitiveUTF8";
};
struct NameFirstMatch
{
static constexpr auto name = "firstMatch";
};
struct NameFirstMatchUTF8
{
static constexpr auto name = "firstMatchUTF8";
};
struct NameFirstMatchCaseInsensitive
{
static constexpr auto name = "firstMatchCaseInsensitive";
};
struct NameFirstMatchCaseInsensitiveUTF8
{
static constexpr auto name = "firstMatchCaseInsensitiveUTF8";
};
struct NameMatch
{
static constexpr auto name = "match";
@ -1064,6 +1177,27 @@ using FunctionPositionCaseInsensitive = FunctionsStringSearch<PositionImpl<Posit
using FunctionPositionCaseInsensitiveUTF8
= FunctionsStringSearch<PositionImpl<PositionCaseInsensitiveUTF8>, NamePositionCaseInsensitiveUTF8>;
using FunctionMultiPosition = FunctionsMultiStringPosition<MultiPositionImpl<PositionCaseSensitiveASCII>, NameMultiPosition>;
using FunctionMultiPositionUTF8 = FunctionsMultiStringPosition<MultiPositionImpl<PositionCaseSensitiveUTF8>, NameMultiPositionUTF8>;
using FunctionMultiPositionCaseInsensitive
= FunctionsMultiStringPosition<MultiPositionImpl<PositionCaseInsensitiveASCII>, NameMultiPositionCaseInsensitive>;
using FunctionMultiPositionCaseInsensitiveUTF8
= FunctionsMultiStringPosition<MultiPositionImpl<PositionCaseInsensitiveUTF8>, NameMultiPositionCaseInsensitiveUTF8>;
using FunctionMultiSearch = FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseSensitiveASCII>, NameMultiSearch>;
using FunctionMultiSearchUTF8 = FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseSensitiveUTF8>, NameMultiSearchUTF8>;
using FunctionMultiSearchCaseInsensitive
= FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseInsensitiveASCII>, NameMultiSearchCaseInsensitive>;
using FunctionMultiSearchCaseInsensitiveUTF8
= FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseInsensitiveUTF8>, NameMultiSearchCaseInsensitiveUTF8>;
using FunctionFirstMatch = FunctionsMultiStringSearch<FirstMatchImpl<PositionCaseSensitiveASCII>, NameFirstMatch>;
using FunctionFirstMatchUTF8 = FunctionsMultiStringSearch<FirstMatchImpl<PositionCaseSensitiveUTF8>, NameFirstMatchUTF8>;
using FunctionFirstMatchCaseInsensitive
= FunctionsMultiStringSearch<FirstMatchImpl<PositionCaseInsensitiveASCII>, NameFirstMatchCaseInsensitive>;
using FunctionFirstMatchCaseInsensitiveUTF8
= FunctionsMultiStringSearch<FirstMatchImpl<PositionCaseInsensitiveUTF8>, NameFirstMatchCaseInsensitiveUTF8>;
using FunctionMatch = FunctionsStringSearch<MatchImpl<false>, NameMatch>;
using FunctionLike = FunctionsStringSearch<MatchImpl<true>, NameLike>;
using FunctionNotLike = FunctionsStringSearch<MatchImpl<true, true>, NameNotLike>;
@ -1080,14 +1214,32 @@ void registerFunctionsStringSearch(FunctionFactory & factory)
factory.registerFunction<FunctionReplaceAll>();
factory.registerFunction<FunctionReplaceRegexpOne>();
factory.registerFunction<FunctionReplaceRegexpAll>();
factory.registerFunction<FunctionPosition>(FunctionFactory::CaseInsensitive);
factory.registerFunction<FunctionPositionUTF8>();
factory.registerFunction<FunctionPositionCaseInsensitive>();
factory.registerFunction<FunctionPositionCaseInsensitiveUTF8>();
factory.registerFunction<FunctionMultiPosition>();
factory.registerFunction<FunctionMultiPositionUTF8>();
factory.registerFunction<FunctionMultiPositionCaseInsensitive>();
factory.registerFunction<FunctionMultiPositionCaseInsensitiveUTF8>();
factory.registerFunction<FunctionMultiSearch>();
factory.registerFunction<FunctionMultiSearchUTF8>();
factory.registerFunction<FunctionMultiSearchCaseInsensitive>();
factory.registerFunction<FunctionMultiSearchCaseInsensitiveUTF8>();
factory.registerFunction<FunctionFirstMatch>();
factory.registerFunction<FunctionFirstMatchUTF8>();
factory.registerFunction<FunctionFirstMatchCaseInsensitive>();
factory.registerFunction<FunctionFirstMatchCaseInsensitiveUTF8>();
factory.registerFunction<FunctionMatch>();
factory.registerFunction<FunctionLike>();
factory.registerFunction<FunctionNotLike>();
factory.registerFunction<FunctionExtract>();
factory.registerAlias("locate", NamePosition::name, FunctionFactory::CaseInsensitive);
factory.registerAlias("replace", NameReplaceAll::name, FunctionFactory::CaseInsensitive);
}

View File

@ -1,17 +1,20 @@
#pragma once
#include <Columns/ColumnArray.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnVector.h>
#include <Core/Field.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/IFunction.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <IO/WriteHelpers.h>
#include <common/StringRef.h>
namespace DB
{
/** Search and replace functions in strings:
*
* position(haystack, needle) - the normal search for a substring in a string, returns the position (in bytes) of the found substring starting with 1, or 0 if no substring is found.
@ -35,12 +38,28 @@ namespace DB
*
* replaceRegexpOne(haystack, pattern, replacement) - replaces the pattern with the specified regexp, only the first occurrence.
* replaceRegexpAll(haystack, pattern, replacement) - replaces the pattern with the specified type, all occurrences.
*
* multiPosition(haystack, [pattern_1, pattern_2, ..., pattern_n]) -- find first occurences (positions) of all the const patterns inside haystack
* multiPositionUTF8(haystack, [pattern_1, pattern_2, ..., pattern_n])
* multiPositionCaseInsensitive(haystack, [pattern_1, pattern_2, ..., pattern_n])
* multiPositionCaseInsensitiveUTF8(haystack, [pattern_1, pattern_2, ..., pattern_n])
*
* multiSearch(haystack, [pattern_1, pattern_2, ..., pattern_n]) -- find any of the const patterns inside haystack and return 0 or 1
* multiSearchUTF8(haystack, [pattern_1, pattern_2, ..., pattern_n])
* multiSearchCaseInsensitive(haystack, [pattern_1, pattern_2, ..., pattern_n])
* multiSearchCaseInsensitiveUTF8(haystack, [pattern_1, pattern_2, ..., pattern_n])
* firstMatch(haystack, [pattern_1, pattern_2, ..., pattern_n]) -- returns the first index of the matched string or zero if nothing was found
* firstMatchUTF8(haystack, [pattern_1, pattern_2, ..., pattern_n])
* firstMatchCaseInsensitive(haystack, [pattern_1, pattern_2, ..., pattern_n])
* firstMatchCaseInsensitiveUTF8(haystack, [pattern_1, pattern_2, ..., pattern_n])
*/
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ILLEGAL_COLUMN;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
template <typename Impl, typename Name>
@ -48,20 +67,11 @@ class FunctionsStringSearch : public IFunction
{
public:
static constexpr auto name = Name::name;
static FunctionPtr create(const Context &)
{
return std::make_shared<FunctionsStringSearch>();
}
static FunctionPtr create(const Context &) { return std::make_shared<FunctionsStringSearch>(); }
String getName() const override
{
return name;
}
String getName() const override { return name; }
size_t getNumberOfArguments() const override
{
return 2;
}
size_t getNumberOfArguments() const override { return 2; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
@ -90,7 +100,8 @@ public:
{
ResultType res{};
Impl::constant_constant(col_haystack_const->getValue<String>(), col_needle_const->getValue<String>(), res);
block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst(col_haystack_const->size(), toField(res));
block.getByPosition(result).column
= block.getByPosition(result).type->createColumnConst(col_haystack_const->size(), toField(res));
return;
}
@ -103,20 +114,22 @@ public:
const ColumnString * col_needle_vector = checkAndGetColumn<ColumnString>(&*column_needle);
if (col_haystack_vector && col_needle_vector)
Impl::vector_vector(col_haystack_vector->getChars(),
Impl::vector_vector(
col_haystack_vector->getChars(),
col_haystack_vector->getOffsets(),
col_needle_vector->getChars(),
col_needle_vector->getOffsets(),
vec_res);
else if (col_haystack_vector && col_needle_const)
Impl::vector_constant(col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), col_needle_const->getValue<String>(), vec_res);
Impl::vector_constant(
col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), col_needle_const->getValue<String>(), vec_res);
else if (col_haystack_const && col_needle_vector)
Impl::constant_vector(col_haystack_const->getValue<String>(), col_needle_vector->getChars(), col_needle_vector->getOffsets(), vec_res);
Impl::constant_vector(
col_haystack_const->getValue<String>(), col_needle_vector->getChars(), col_needle_vector->getOffsets(), vec_res);
else
throw Exception("Illegal columns " + block.getByPosition(arguments[0]).column->getName() + " and "
+ block.getByPosition(arguments[1]).column->getName()
+ " of arguments of function "
+ getName(),
throw Exception(
"Illegal columns " + block.getByPosition(arguments[0]).column->getName() + " and "
+ block.getByPosition(arguments[1]).column->getName() + " of arguments of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
block.getByPosition(result).column = std::move(col_res);
@ -129,20 +142,11 @@ class FunctionsStringSearchToString : public IFunction
{
public:
static constexpr auto name = Name::name;
static FunctionPtr create(const Context &)
{
return std::make_shared<FunctionsStringSearchToString>();
}
static FunctionPtr create(const Context &) { return std::make_shared<FunctionsStringSearchToString>(); }
String getName() const override
{
return name;
}
String getName() const override { return name; }
size_t getNumberOfArguments() const override
{
return 2;
}
size_t getNumberOfArguments() const override { return 2; }
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
@ -186,4 +190,156 @@ public:
}
};
template <typename Impl, typename Name>
class FunctionsMultiStringPosition : public IFunction
{
public:
static constexpr auto name = Name::name;
static FunctionPtr create(const Context &) { return std::make_shared<FunctionsMultiStringPosition>(); }
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 2; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (arguments.size() + 1 >= std::numeric_limits<UInt8>::max())
throw Exception(
"Number of arguments for function " + getName() + " doesn't match: passed " + std::to_string(arguments.size())
+ ", should be at most 255.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
if (!isString(arguments[0]))
throw Exception(
"Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[1].get());
if (!array_type || !checkAndGetDataType<DataTypeString>(array_type->getNestedType().get()))
throw Exception(
"Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>());
}
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
{
using ResultType = typename Impl::ResultType;
const ColumnPtr & column_haystack = block.getByPosition(arguments[0]).column;
const ColumnString * col_haystack_vector = checkAndGetColumn<ColumnString>(&*column_haystack);
const ColumnPtr & arr_ptr = block.getByPosition(arguments[1]).column;
const ColumnConst * col_const_arr = checkAndGetColumnConst<ColumnArray>(arr_ptr.get());
if (!col_const_arr)
throw Exception(
"Illegal column " + block.getByPosition(arguments[1]).column->getName() + ". The array is not const",
ErrorCodes::ILLEGAL_COLUMN);
Array src_arr = col_const_arr->getValue<Array>();
std::vector<StringRef> refs;
for (const auto & el : src_arr)
refs.emplace_back(el.get<String>());
const size_t column_haystack_size = column_haystack->size();
auto col_res = ColumnVector<ResultType>::create();
auto col_offsets = ColumnArray::ColumnOffsets::create(column_haystack_size);
auto & vec_res = col_res->getData();
auto & offsets_res = col_offsets->getData();
vec_res.resize(column_haystack_size * refs.size());
if (col_haystack_vector)
Impl::vector_constant(col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), refs, vec_res);
else
throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName(), ErrorCodes::ILLEGAL_COLUMN);
size_t refs_size = refs.size();
size_t accum = refs_size;
for (size_t i = 0; i < column_haystack_size; ++i, accum += refs_size)
offsets_res[i] = accum;
block.getByPosition(result).column = ColumnArray::create(std::move(col_res), std::move(col_offsets));
}
};
template <typename Impl, typename Name>
class FunctionsMultiStringSearch : public IFunction
{
public:
static constexpr auto name = Name::name;
static FunctionPtr create(const Context &) { return std::make_shared<FunctionsMultiStringSearch>(); }
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 2; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (arguments.size() + 1 >= std::numeric_limits<UInt8>::max())
throw Exception(
"Number of arguments for function " + getName() + " doesn't match: passed " + std::to_string(arguments.size())
+ ", should be at most 255.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
if (!isString(arguments[0]))
throw Exception(
"Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[1].get());
if (!array_type || !checkAndGetDataType<DataTypeString>(array_type->getNestedType().get()))
throw Exception(
"Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return std::make_shared<DataTypeNumber<typename Impl::ResultType>>();
}
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
{
using ResultType = typename Impl::ResultType;
const ColumnPtr & column_haystack = block.getByPosition(arguments[0]).column;
const ColumnString * col_haystack_vector = checkAndGetColumn<ColumnString>(&*column_haystack);
const ColumnPtr & arr_ptr = block.getByPosition(arguments[1]).column;
const ColumnConst * col_const_arr = checkAndGetColumnConst<ColumnArray>(arr_ptr.get());
if (!col_const_arr)
throw Exception(
"Illegal column " + block.getByPosition(arguments[1]).column->getName() + ". The array is not const",
ErrorCodes::ILLEGAL_COLUMN);
Array src_arr = col_const_arr->getValue<Array>();
std::vector<StringRef> refs;
refs.reserve(src_arr.size());
for (const auto & el : src_arr)
refs.emplace_back(el.get<String>());
const size_t column_haystack_size = column_haystack->size();
auto col_res = ColumnVector<ResultType>::create();
auto & vec_res = col_res->getData();
vec_res.resize(column_haystack_size);
if (col_haystack_vector)
Impl::vector_constant(col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), refs, vec_res);
else
throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName(), ErrorCodes::ILLEGAL_COLUMN);
block.getByPosition(result).column = std::move(col_res);
}
};
}

View File

@ -446,13 +446,11 @@ void ActionsVisitor::visit(const ASTPtr & ast)
for (size_t j = 0; j < lambda_arg_asts.size(); ++j)
{
ASTIdentifier * lambda_identifier = typeid_cast<ASTIdentifier *>(lambda_arg_asts[j].get());
if (!lambda_identifier)
auto opt_arg_name = getIdentifierName(lambda_arg_asts[j]);
if (!opt_arg_name)
throw Exception("lambda argument declarations must be identifiers", ErrorCodes::TYPE_MISMATCH);
String arg_name = lambda_identifier->name;
lambda_arguments.emplace_back(arg_name, lambda_type->getArgumentTypes()[j]);
lambda_arguments.emplace_back(*opt_arg_name, lambda_type->getArgumentTypes()[j]);
}
actions_stack.pushLevel(lambda_arguments);
@ -541,9 +539,6 @@ void ActionsVisitor::makeSet(const ASTFunction * node, const Block & sample_bloc
const ASTIdentifier * identifier = typeid_cast<const ASTIdentifier *>(arg.get());
if (typeid_cast<const ASTSubquery *>(arg.get()) || identifier)
{
/// We get the stream of blocks for the subquery. Create Set and put it in place of the subquery.
String set_id = arg->getColumnName();
/// A special case is if the name of the table is specified on the right side of the IN statement,
/// and the table has the type Set (a previously prepared set).
if (identifier)
@ -563,6 +558,9 @@ void ActionsVisitor::makeSet(const ASTFunction * node, const Block & sample_bloc
}
}
/// We get the stream of blocks for the subquery. Create Set and put it in place of the subquery.
String set_id = arg->getColumnName();
SubqueryForSet & subquery_for_set = subqueries_for_sets[set_id];
/// If you already created a Set with the same subquery / table.

View File

@ -10,6 +10,7 @@
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/DumpASTNode.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>
namespace DB
{
@ -90,19 +91,15 @@ private:
void visit(ASTTableExpression & table_expression, ASTPtr &) const
{
if (table_expression.database_and_table_name)
{
tryVisit<ASTIdentifier>(table_expression.database_and_table_name);
if (table_expression.database_and_table_name->children.size() != 2)
throw Exception("Logical error: more than two components in table expression", ErrorCodes::LOGICAL_ERROR);
}
else if (table_expression.subquery)
tryVisit<ASTSubquery>(table_expression.subquery);
}
/// @note It expects that only table (not column) identifiers are visited.
void visit(const ASTIdentifier & identifier, ASTPtr & ast) const
{
if (ast->children.empty())
if (identifier.name_parts.empty())
ast = createTableIdentifier(database_name, identifier.name);
}

View File

@ -67,12 +67,13 @@ Cluster::Address::Address(const Poco::Util::AbstractConfiguration & config, cons
}
Cluster::Address::Address(const String & host_port_, const String & user_, const String & password_, UInt16 clickhouse_port)
Cluster::Address::Address(const String & host_port_, const String & user_, const String & password_, UInt16 clickhouse_port, bool secure_)
: user(user_), password(password_)
{
auto parsed_host_port = parseAddress(host_port_, clickhouse_port);
host_name = parsed_host_port.first;
port = parsed_host_port.second;
secure = secure_ ? Protocol::Secure::Enable : Protocol::Secure::Disable;
initially_resolved_address = DNSResolver::instance().resolveAddress(parsed_host_port.first, parsed_host_port.second);
is_local = isLocal(*this, initially_resolved_address, clickhouse_port);
@ -319,7 +320,7 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, const Setting
Cluster::Cluster(const Settings & settings, const std::vector<std::vector<String>> & names,
const String & username, const String & password, UInt16 clickhouse_port, bool treat_local_as_remote)
const String & username, const String & password, UInt16 clickhouse_port, bool treat_local_as_remote, bool secure)
{
UInt32 current_shard_num = 1;
@ -327,7 +328,7 @@ Cluster::Cluster(const Settings & settings, const std::vector<std::vector<String
{
Addresses current;
for (auto & replica : shard)
current.emplace_back(replica, username, password, clickhouse_port);
current.emplace_back(replica, username, password, clickhouse_port, secure);
addresses_with_failover.emplace_back(current);

View File

@ -24,7 +24,7 @@ public:
/// This parameter is needed only to check that some address is local (points to ourself).
Cluster(const Settings & settings, const std::vector<std::vector<String>> & names,
const String & username, const String & password,
UInt16 clickhouse_port, bool treat_local_as_remote);
UInt16 clickhouse_port, bool treat_local_as_remote, bool secure = false);
Cluster(const Cluster &) = delete;
Cluster & operator=(const Cluster &) = delete;
@ -69,7 +69,7 @@ public:
Address() = default;
Address(const Poco::Util::AbstractConfiguration & config, const String & config_prefix);
Address(const String & host_port_, const String & user_, const String & password_, UInt16 clickhouse_port);
Address(const String & host_port_, const String & user_, const String & password_, UInt16 clickhouse_port, bool secure_ = false);
/// Returns 'escaped_host_name:port'
String toString() const;

View File

@ -13,7 +13,7 @@ namespace DB
/// Checks that ast is ASTIdentifier and remove num_qualifiers_to_strip components from left.
/// Example: 'database.table.name' -> (num_qualifiers_to_strip = 2) -> 'name'.
void stripIdentifier(DB::ASTPtr & ast, size_t num_qualifiers_to_strip)
void stripIdentifier(const DB::ASTPtr & ast, size_t num_qualifiers_to_strip)
{
ASTIdentifier * identifier = typeid_cast<ASTIdentifier *>(ast.get());
@ -22,29 +22,15 @@ void stripIdentifier(DB::ASTPtr & ast, size_t num_qualifiers_to_strip)
if (num_qualifiers_to_strip)
{
size_t num_components = identifier->children.size();
/// plain column
if (num_components - num_qualifiers_to_strip == 1)
{
DB::String node_alias = identifier->tryGetAlias();
ast = identifier->children.back();
if (!node_alias.empty())
ast->setAlias(node_alias);
}
else
/// nested column
{
identifier->children.erase(identifier->children.begin(), identifier->children.begin() + num_qualifiers_to_strip);
identifier->name_parts.erase(identifier->name_parts.begin(), identifier->name_parts.begin() + num_qualifiers_to_strip);
DB::String new_name;
for (const auto & child : identifier->children)
for (const auto & part : identifier->name_parts)
{
if (!new_name.empty())
new_name += '.';
new_name += static_cast<const ASTIdentifier &>(*child.get()).name;
}
identifier->name = new_name;
new_name += part;
}
identifier->name.swap(new_name);
}
}
@ -52,32 +38,16 @@ void stripIdentifier(DB::ASTPtr & ast, size_t num_qualifiers_to_strip)
size_t getNumComponentsToStripInOrderToTranslateQualifiedName(const ASTIdentifier & identifier,
const DatabaseAndTableWithAlias & names)
{
size_t num_qualifiers_to_strip = 0;
/// It is compound identifier
if (!identifier.children.empty())
{
size_t num_components = identifier.children.size();
/// database.table.column
if (num_components >= 3
&& !names.database.empty()
&& *getIdentifierName(identifier.children[0]) == names.database
&& *getIdentifierName(identifier.children[1]) == names.table)
{
num_qualifiers_to_strip = 2;
}
if (doesIdentifierBelongTo(identifier, names.database, names.table))
return 2;
/// table.column or alias.column. If num_components > 2, it is like table.nested.column.
if (num_components >= 2
&& ((!names.table.empty() && *getIdentifierName(identifier.children[0]) == names.table)
|| (!names.alias.empty() && *getIdentifierName(identifier.children[0]) == names.alias)))
{
num_qualifiers_to_strip = 1;
}
}
/// table.column or alias.column.
if (doesIdentifierBelongTo(identifier, names.table) ||
doesIdentifierBelongTo(identifier, names.alias))
return 1;
return num_qualifiers_to_strip;
return 0;
}
@ -87,13 +57,13 @@ DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTIdentifier & ident
table = identifier.name;
alias = identifier.tryGetAlias();
if (!identifier.children.empty())
if (!identifier.name_parts.empty())
{
if (identifier.children.size() != 2)
throw Exception("Logical error: number of components in table expression not equal to two", ErrorCodes::LOGICAL_ERROR);
if (identifier.name_parts.size() != 2)
throw Exception("Logical error: 2 components expected in table expression '" + identifier.name + "'", ErrorCodes::LOGICAL_ERROR);
getIdentifierName(identifier.children[0], database);
getIdentifierName(identifier.children[1], table);
database = identifier.name_parts[0];
table = identifier.name_parts[1];
}
}
@ -118,6 +88,22 @@ DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTTableExpression &
throw Exception("Logical error: no known elements in ASTTableExpression", ErrorCodes::LOGICAL_ERROR);
}
bool DatabaseAndTableWithAlias::satisfies(const DatabaseAndTableWithAlias & db_table, bool table_may_be_an_alias)
{
/// table.*, alias.* or database.table.*
if (database.empty())
{
if (!db_table.table.empty() && table == db_table.table)
return true;
if (!db_table.alias.empty())
return (alias == db_table.alias) || (table_may_be_an_alias && table == db_table.alias);
}
return database == db_table.database && table == db_table.table;
}
String DatabaseAndTableWithAlias::getQualifiedNamePrefix() const
{
if (alias.empty() && table.empty())
@ -133,17 +119,7 @@ void DatabaseAndTableWithAlias::makeQualifiedName(const ASTPtr & ast) const
String prefix = getQualifiedNamePrefix();
identifier->name.insert(identifier->name.begin(), prefix.begin(), prefix.end());
Names qualifiers;
if (!alias.empty())
qualifiers.push_back(alias);
else
{
qualifiers.push_back(database);
qualifiers.push_back(table);
}
for (const auto & qualifier : qualifiers)
identifier->children.emplace_back(std::make_shared<ASTIdentifier>(qualifier));
addIdentifierQualifier(*identifier, database, table, alias);
}
}
@ -209,21 +185,13 @@ std::optional<DatabaseAndTableWithAlias> getDatabaseAndTable(const ASTSelectQuer
return DatabaseAndTableWithAlias(database_and_table_name);
}
ASTPtr getTableFunctionOrSubquery(const ASTSelectQuery & select, size_t table_number)
ASTPtr extractTableExpression(const ASTSelectQuery & select, size_t table_number)
{
const ASTTableExpression * table_expression = getTableExpression(select, table_number);
if (table_expression)
if (const ASTTableExpression * table_expression = getTableExpression(select, table_number))
{
#if 1 /// TODO: It hides some logical error in InterpreterSelectQuery & distributed tables
if (table_expression->database_and_table_name)
{
if (table_expression->database_and_table_name->children.empty())
return table_expression->database_and_table_name;
if (table_expression->database_and_table_name->children.size() == 2)
return table_expression->database_and_table_name->children[1];
}
#endif
if (table_expression->table_function)
return table_expression->table_function;

View File

@ -2,8 +2,9 @@
#include <memory>
#include <optional>
#include <Core/Types.h>
#include <Parsers/ASTSelectQuery.h>
namespace DB
{
@ -33,9 +34,12 @@ struct DatabaseAndTableWithAlias
/// If ast is ASTIdentifier, prepend getQualifiedNamePrefix() to it's name.
void makeQualifiedName(const ASTPtr & ast) const;
/// Check if it satisfies another db_table name. @note opterion is not symmetric.
bool satisfies(const DatabaseAndTableWithAlias & table, bool table_may_be_an_alias);
};
void stripIdentifier(DB::ASTPtr & ast, size_t num_qualifiers_to_strip);
void stripIdentifier(const DB::ASTPtr & ast, size_t num_qualifiers_to_strip);
size_t getNumComponentsToStripInOrderToTranslateQualifiedName(const ASTIdentifier & identifier,
const DatabaseAndTableWithAlias & names);
@ -44,6 +48,6 @@ std::vector<DatabaseAndTableWithAlias> getDatabaseAndTables(const ASTSelectQuery
std::optional<DatabaseAndTableWithAlias> getDatabaseAndTable(const ASTSelectQuery & select, size_t table_number);
std::vector<const ASTTableExpression *> getSelectTablesExpression(const ASTSelectQuery & select_query);
ASTPtr getTableFunctionOrSubquery(const ASTSelectQuery & select, size_t table_number);
ASTPtr extractTableExpression(const ASTSelectQuery & select, size_t table_number);
}

View File

@ -1,6 +1,7 @@
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTExpressionList.h>

View File

@ -310,7 +310,7 @@ void ExpressionAnalyzer::makeSetsForIndexImpl(const ASTPtr & node, const Block &
if (!prepared_sets.count(arg->range)) /// Not already prepared.
{
if (typeid_cast<ASTSubquery *>(arg.get()) || typeid_cast<ASTIdentifier *>(arg.get()))
if (typeid_cast<ASTSubquery *>(arg.get()) || isIdentifier(arg))
{
if (settings.use_index_for_in_with_subqueries)
tryMakeSetForIndexFromSubquery(arg);

View File

@ -55,7 +55,7 @@ public:
ASTPtr table_name;
ASTPtr subquery_or_table_name;
if (typeid_cast<const ASTIdentifier *>(subquery_or_table_name_or_table_expression.get()))
if (isIdentifier(subquery_or_table_name_or_table_expression))
{
table_name = subquery_or_table_name_or_table_expression;
subquery_or_table_name = table_name;
@ -86,7 +86,7 @@ public:
if (table_name)
{
/// If this is already an external table, you do not need to add anything. Just remember its presence.
if (external_tables.end() != external_tables.find(static_cast<const ASTIdentifier &>(*table_name).name))
if (external_tables.end() != external_tables.find(*getIdentifierName(table_name)))
return;
}

View File

@ -86,20 +86,17 @@ BlockInputStreamPtr InterpreterDescribeQuery::executeImpl()
String database_name;
String table_name;
auto identifier = table_expression->database_and_table_name;
if (identifier->children.size() > 2)
auto identifier = typeid_cast<const ASTIdentifier *>(table_expression->database_and_table_name.get());
if (identifier->name_parts.size() > 2)
throw Exception("Logical error: more than two components in table expression", ErrorCodes::LOGICAL_ERROR);
if (identifier->children.size() > 1)
if (identifier->name_parts.size() > 1)
{
auto database_ptr = identifier->children[0];
auto table_ptr = identifier->children[1];
getIdentifierName(database_ptr, database_name);
getIdentifierName(table_ptr, table_name);
database_name = identifier->name_parts[0];
table_name = identifier->name_parts[1];
}
else
getIdentifierName(identifier, table_name);
table_name = identifier->name;
table = context.getTable(database_name, table_name);
}

View File

@ -147,14 +147,22 @@ InterpreterSelectQuery::InterpreterSelectQuery(
max_streams = settings.max_threads;
ASTPtr table_expression = getTableFunctionOrSubquery(query, 0);
ASTPtr table_expression = extractTableExpression(query, 0);
bool is_table_func = false;
bool is_subquery = false;
if (table_expression)
{
is_table_func = typeid_cast<const ASTFunction *>(table_expression.get());
is_subquery = typeid_cast<const ASTSelectWithUnionQuery *>(table_expression.get());
}
if (input)
{
/// Read from prepared input.
source_header = input->getHeader();
}
else if (table_expression && typeid_cast<const ASTSelectWithUnionQuery *>(table_expression.get()))
else if (is_subquery)
{
/// Read from subquery.
interpreter_subquery = std::make_unique<InterpreterSelectWithUnionQuery>(
@ -164,7 +172,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
}
else if (!storage)
{
if (table_expression && typeid_cast<const ASTFunction *>(table_expression.get()))
if (is_table_func)
{
/// Read from table function.
storage = context.getQueryContext().executeTableFunction(table_expression);
@ -208,7 +216,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
if (query_analyzer->isRewriteSubqueriesPredicate())
{
/// remake interpreter_subquery when PredicateOptimizer is rewrite subqueries and main table is subquery
if (table_expression && typeid_cast<ASTSelectWithUnionQuery *>(table_expression.get()))
if (is_subquery)
interpreter_subquery = std::make_unique<InterpreterSelectWithUnionQuery>(
table_expression, getSubqueryContext(context), required_columns, QueryProcessingStage::Complete, subquery_depth + 1,
only_analyze);
@ -921,7 +929,7 @@ void InterpreterSelectQuery::executeFetchColumns(
/// If we need less number of columns that subquery have - update the interpreter.
if (required_columns.size() < source_header.columns())
{
ASTPtr subquery = getTableFunctionOrSubquery(query, 0);
ASTPtr subquery = extractTableExpression(query, 0);
if (!subquery)
throw Exception("Subquery expected", ErrorCodes::LOGICAL_ERROR);
@ -1396,7 +1404,7 @@ bool hasWithTotalsInAnySubqueryInFromClause(const ASTSelectQuery & query)
* In other cases, totals will be computed on the initiating server of the query, and it is not necessary to read the data to the end.
*/
if (auto query_table = getTableFunctionOrSubquery(query, 0))
if (auto query_table = extractTableExpression(query, 0))
{
if (auto ast_union = typeid_cast<const ASTSelectWithUnionQuery *>(query_table.get()))
{

View File

@ -1,6 +1,5 @@
#include <Common/typeid_cast.h>
#include <Interpreters/JoinToSubqueryTransformVisitor.h>
#include <Interpreters/SemanticSelectQuery.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTIdentifier.h>
@ -19,6 +18,7 @@ namespace ErrorCodes
extern const int TOO_DEEP_AST;
}
#if 0
/// Attach additional semantic info to generated select.
struct AppendSemanticVisitorData
{
@ -35,6 +35,7 @@ struct AppendSemanticVisitorData
done = true;
}
};
#endif
/// Replaces one table element with pair.
struct RewriteTablesVisitorData
@ -124,7 +125,7 @@ void JoinToSubqueryTransformMatcher::visit(ASTSelectQuery & select, ASTPtr & ast
if (!left)
return;
SemanticSelectQuery::hideNames(select, hidden_names, subquery_name);
//SemanticSelectQuery::hideNames(select, hidden_names, subquery_name);
}
select.tables = std::make_shared<ASTTablesInSelectQuery>();
@ -135,11 +136,15 @@ void JoinToSubqueryTransformMatcher::visit(ASTSelectQuery & select, ASTPtr & ast
data.done = true;
}
ASTPtr JoinToSubqueryTransformMatcher::replaceJoin(ASTSelectQuery & select, ASTPtr ast_left, ASTPtr ast_right, const String & subquery_alias)
ASTPtr JoinToSubqueryTransformMatcher::replaceJoin(ASTSelectQuery &, ASTPtr ast_left, ASTPtr ast_right, const String & subquery_alias)
{
#if 0
using RewriteMatcher = LinkedMatcher<
OneTypeMatcher<RewriteTablesVisitorData>,
OneTypeMatcher<AppendSemanticVisitorData>>;
#else
using RewriteMatcher = OneTypeMatcher<RewriteTablesVisitorData>;
#endif
using RewriteVisitor = InDepthNodeVisitor<RewriteMatcher, true>;
auto left = typeid_cast<const ASTTablesInSelectQueryElement *>(ast_left.get());
@ -160,8 +165,12 @@ ASTPtr JoinToSubqueryTransformMatcher::replaceJoin(ASTSelectQuery & select, ASTP
if (!res)
throw Exception("Cannot parse rewrite query", ErrorCodes::LOGICAL_ERROR);
#if 0
RewriteVisitor::Data visitor_data =
std::make_pair<RewriteTablesVisitorData, AppendSemanticVisitorData>({ast_left, ast_right}, {select.semantic});
#else
RewriteVisitor::Data visitor_data{ast_left, ast_right};
#endif
RewriteVisitor(visitor_data).visit(res);
return res;
}

View File

@ -133,8 +133,12 @@ void PredicateExpressionsOptimizer::getDependenciesAndQualifiedOfExpression(cons
{
if (const auto identifier = typeid_cast<ASTIdentifier *>(expression.get()))
{
if (!identifier->children.empty())
dependencies_and_qualified.emplace_back(std::pair(identifier, expression->getAliasOrColumnName()));
String table_alias;
if (!identifier->name_parts.empty())
{
if (!tables_with_aliases.empty())
table_alias = tables_with_aliases[0].getQualifiedNamePrefix();
}
else
{
size_t best_table_pos = 0;
@ -153,9 +157,11 @@ void PredicateExpressionsOptimizer::getDependenciesAndQualifiedOfExpression(cons
}
}
String qualified_name = tables_with_aliases[best_table_pos].getQualifiedNamePrefix() + expression->getAliasOrColumnName();
dependencies_and_qualified.emplace_back(std::pair(identifier, qualified_name));
table_alias = tables_with_aliases[best_table_pos].getQualifiedNamePrefix();
}
String qualified_name = table_alias + expression->getAliasOrColumnName();
dependencies_and_qualified.emplace_back(std::pair(identifier, qualified_name));
}
else
{
@ -356,31 +362,17 @@ ASTs PredicateExpressionsOptimizer::evaluateAsterisk(ASTSelectQuery * select_que
if (qualified_asterisk->children.size() != 1)
throw Exception("Logical error: qualified asterisk must have exactly one child", ErrorCodes::LOGICAL_ERROR);
ASTIdentifier * ident = typeid_cast<ASTIdentifier *>(qualified_asterisk->children[0].get());
if (!ident)
throw Exception("Logical error: qualified asterisk must have identifier as its child", ErrorCodes::LOGICAL_ERROR);
DatabaseAndTableWithAlias ident_db_and_name(qualified_asterisk->children[0]);
size_t num_components = ident->children.size();
if (num_components > 2)
throw Exception("Qualified asterisk cannot have more than two qualifiers", ErrorCodes::UNKNOWN_ELEMENT_IN_AST);
for (auto it = tables_expression.begin(); it != tables_expression.end(); ++it)
for (auto it = tables_expression.begin(); it != tables_expression.end();)
{
const ASTTableExpression * table_expression = *it;
DatabaseAndTableWithAlias database_and_table_with_alias(*table_expression, context.getCurrentDatabase());
/// database.table.*
if (num_components == 2 && !database_and_table_with_alias.database.empty()
&& static_cast<const ASTIdentifier &>(*ident->children[0]).name == database_and_table_with_alias.database
&& static_cast<const ASTIdentifier &>(*ident->children[1]).name == database_and_table_with_alias.table)
continue;
/// table.* or alias.*
else if (num_components == 0
&& ((!database_and_table_with_alias.table.empty() && ident->name == database_and_table_with_alias.table)
|| (!database_and_table_with_alias.alias.empty() && ident->name == database_and_table_with_alias.alias)))
continue;
if (ident_db_and_name.satisfies(database_and_table_with_alias, true))
++it;
else
/// It's not a required table
tables_expression.erase(it);
it = tables_expression.erase(it); /// It's not a required table
}
}

View File

@ -168,18 +168,11 @@ void QueryNormalizer::visit(ASTExpressionList & node, const ASTPtr &, Data & dat
}
else if (const auto * qualified_asterisk = typeid_cast<const ASTQualifiedAsterisk *>(child.get()))
{
const ASTIdentifier * identifier = typeid_cast<const ASTIdentifier *>(qualified_asterisk->children[0].get());
size_t num_components = identifier->children.size();
DatabaseAndTableWithAlias ident_db_and_name(qualified_asterisk->children[0]);
for (const auto & [table_name, table_columns] : tables_with_columns)
{
if ((num_components == 2 /// database.table.*
&& !table_name.database.empty() /// This is normal (not a temporary) table.
&& static_cast<const ASTIdentifier &>(*identifier->children[0]).name == table_name.database
&& static_cast<const ASTIdentifier &>(*identifier->children[1]).name == table_name.table)
|| (num_components == 0 /// t.*
&& ((!table_name.table.empty() && identifier->name == table_name.table) /// table.*
|| (!table_name.alias.empty() && identifier->name == table_name.alias)))) /// alias.*
if (ident_db_and_name.satisfies(table_name, true))
{
for (const auto & column_name : table_columns)
node.children.emplace_back(std::make_shared<ASTIdentifier>(column_name));

View File

@ -1,43 +0,0 @@
#pragma once
#include <Parsers/IAST.h>
#include <Parsers/ASTSelectQuery.h>
namespace DB
{
/// Additional information for ASTSelectQuery
class SemanticSelectQuery : public ISemantic
{
public:
SemanticPtr clone() const override { return std::make_shared<SemanticSelectQuery>(*this); }
std::vector<String> getPossibleNames(const String & name) const
{
std::vector<String> res;
res.push_back(name);
for (auto it = hidings.find(name); it != hidings.end(); it = hidings.find(it->second))
res.push_back(it->second);
return res;
}
static void hideNames(ASTSelectQuery & select, const std::vector<String> & hidden, const String & new_name)
{
if (!select.semantic)
select.semantic = std::make_shared<SemanticSelectQuery>();
auto & sema = static_cast<SemanticSelectQuery &>(*select.semantic);
sema.hideNames(hidden, new_name);
}
private:
std::unordered_map<String, String> hidings;
void hideNames(const std::vector<String> & hidden, const String & new_name)
{
for (auto & name : hidden)
hidings.emplace(name, new_name);
}
};
}

View File

@ -89,38 +89,17 @@ std::vector<ASTPtr *> TranslateQualifiedNamesMatcher::visit(const ASTIdentifier
std::vector<ASTPtr *> TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAsterisk & , const ASTPtr & ast, Data & data)
{
const std::vector<DatabaseAndTableWithAlias> & tables = data.tables;
if (ast->children.size() != 1)
throw Exception("Logical error: qualified asterisk must have exactly one child", ErrorCodes::LOGICAL_ERROR);
ASTIdentifier * ident = typeid_cast<ASTIdentifier *>(ast->children[0].get());
if (!ident)
throw Exception("Logical error: qualified asterisk must have identifier as its child", ErrorCodes::LOGICAL_ERROR);
auto & ident = ast->children[0];
size_t num_components = ident->children.size();
if (num_components > 2)
throw Exception("Qualified asterisk cannot have more than two qualifiers", ErrorCodes::UNKNOWN_ELEMENT_IN_AST);
/// @note it could contain table alias as table name.
DatabaseAndTableWithAlias db_and_table(ident);
DatabaseAndTableWithAlias db_and_table(*ident);
for (const auto & table_names : tables)
{
/// database.table.*, table.* or alias.*
if (num_components == 2)
{
if (!table_names.database.empty() &&
db_and_table.database == table_names.database &&
db_and_table.table == table_names.table)
for (const auto & known_table : data.tables)
if (db_and_table.satisfies(known_table, true))
return {};
}
else if (num_components == 0)
{
if ((!table_names.table.empty() && db_and_table.table == table_names.table) ||
(!table_names.alias.empty() && db_and_table.table == table_names.alias))
return {};
}
}
throw Exception("Unknown qualified identifier: " + ident->getAliasOrColumnName(), ErrorCodes::UNKNOWN_IDENTIFIER);
}

View File

@ -2,6 +2,7 @@
#include <vector>
#include <Core/Names.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>
#include <Interpreters/InDepthNodeVisitor.h>

View File

@ -18,14 +18,14 @@ void ASTIdentifier::formatImplWithoutAlias(const FormatSettings & settings, Form
/// A simple or compound identifier?
if (children.size() > 1)
if (name_parts.size() > 1)
{
for (size_t i = 0, size = children.size(); i < size; ++i)
for (size_t i = 0, size = name_parts.size(); i < size; ++i)
{
if (i != 0)
settings.ostr << '.';
format_element(static_cast<const ASTIdentifier &>(*children[i].get()).name);
format_element(name_parts[i]);
}
}
else
@ -44,11 +44,7 @@ ASTPtr createTableIdentifier(const String & database_name, const String & table_
if (database_name.empty())
return ASTIdentifier::createSpecial(table_name);
ASTPtr database = ASTIdentifier::createSpecial(database_name);
ASTPtr table = ASTIdentifier::createSpecial(table_name);
ASTPtr database_and_table = ASTIdentifier::createSpecial(database_name + "." + table_name);
database_and_table->children = {database, table};
ASTPtr database_and_table = ASTIdentifier::createSpecial(database_name + "." + table_name, {database_name, table_name});
return database_and_table;
}
@ -117,4 +113,35 @@ void setIdentifierSpecial(ASTPtr & ast)
id->setSpecial();
}
void addIdentifierQualifier(ASTIdentifier & identifier, const String & database, const String & table, const String & alias)
{
if (!alias.empty())
{
identifier.name_parts.emplace_back(alias);
}
else
{
if (!database.empty())
identifier.name_parts.emplace_back(database);
identifier.name_parts.emplace_back(table);
}
}
bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & database, const String & table)
{
size_t num_components = identifier.name_parts.size();
if (num_components >= 3)
return identifier.name_parts[0] == database &&
identifier.name_parts[1] == table;
return false;
}
bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & table)
{
size_t num_components = identifier.name_parts.size();
if (num_components >= 2)
return identifier.name_parts[0] == table;
return false;
}
}

View File

@ -12,11 +12,14 @@ namespace DB
class ASTIdentifier : public ASTWithAlias
{
public:
/// name. The composite identifier here will have a concatenated name (of the form a.b.c), and individual components will be available inside the children.
/// The composite identifier will have a concatenated name (of the form a.b.c),
/// and individual components will be available inside the name_parts.
String name;
std::vector<String> name_parts;
ASTIdentifier(const String & name_)
ASTIdentifier(const String & name_, std::vector<String> && name_parts_ = {})
: name(name_)
, name_parts(name_parts_)
, special(false)
{
range = StringRange(name.data(), name.data() + name.size());
@ -37,11 +40,13 @@ protected:
void appendColumnNameImpl(WriteBuffer & ostr) const override;
private:
using ASTWithAlias::children; /// ASTIdentifier is child free
bool special; /// TODO: it would be ptr to semantic here
static std::shared_ptr<ASTIdentifier> createSpecial(const String & name_)
static std::shared_ptr<ASTIdentifier> createSpecial(const String & name, std::vector<String> && name_parts = {})
{
auto ret = std::make_shared<ASTIdentifier>(name_);
auto ret = std::make_shared<ASTIdentifier>(name, std::move(name_parts));
ret->special = true;
return ret;
}
@ -77,5 +82,8 @@ std::optional<String> getTableIdentifierName(const ASTIdentifier & node);
std::optional<String> getTableIdentifierName(const ASTPtr & ast);
void setIdentifierSpecial(ASTPtr & ast);
void addIdentifierQualifier(ASTIdentifier & identifier, const String & database, const String & table, const String & alias);
bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & table_or_alias);
bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & database, const String & table);
}

View File

@ -51,8 +51,6 @@ ASTPtr ASTSelectQuery::clone() const
#undef CLONE
if (semantic)
res->semantic = semantic->clone();
return res;
}

View File

@ -169,19 +169,19 @@ bool ParserCompoundIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & ex
return false;
String name;
std::vector<String> parts;
const ASTExpressionList & list = static_cast<const ASTExpressionList &>(*id_list.get());
for (const auto & child : list.children)
{
if (!name.empty())
name += '.';
name += *getIdentifierName(child);
parts.emplace_back(*getIdentifierName(child));
name += parts.back();
}
node = std::make_shared<ASTIdentifier>(name);
/// In `children`, remember the identifiers-components, if there are more than one.
if (list.children.size() > 1)
node->children.insert(node->children.end(), list.children.begin(), list.children.end());
if (parts.size() == 1)
parts.clear();
node = std::make_shared<ASTIdentifier>(name, std::move(parts));
return true;
}

View File

@ -31,20 +31,6 @@ class IAST;
using ASTPtr = std::shared_ptr<IAST>;
using ASTs = std::vector<ASTPtr>;
class ISemantic;
using SemanticPtr = std::shared_ptr<ISemantic>;
/// Interfase to set additional information to IAST. Derived classes should be named according to their AST nodes' types:
/// ASTIdentifier => SemanticIdentifer, ASTSome => SemanticSome, ...
class ISemantic
{
public:
virtual ~ISemantic() = default;
ISemantic() = default;
ISemantic(const ISemantic &) = default;
virtual SemanticPtr clone() const = 0;
};
class WriteBuffer;
@ -58,7 +44,6 @@ public:
/// This pointer does not allow it to be deleted while the range refers to it.
StringPtr owned_string;
SemanticPtr semantic;
virtual ~IAST() = default;
IAST() = default;

View File

@ -219,6 +219,8 @@ public:
/// If commit() was not called, deletes temporary files, canceling the ALTER.
~AlterDataPartTransaction();
const String & getPartName() const { return data_part->name; }
/// Review the changes before the commit.
const NamesAndTypesList & getNewColumns() const { return new_columns; }
const DataPart::Checksums & getNewChecksums() const { return new_checksums; }

View File

@ -312,7 +312,11 @@ String MergeTreeDataPartChecksums::getTotalChecksumHex() const
void MinimalisticDataPartChecksums::serialize(WriteBuffer & to) const
{
writeString("checksums format version: 5\n", to);
serializeWithoutHeader(to);
}
void MinimalisticDataPartChecksums::serializeWithoutHeader(WriteBuffer & to) const
{
writeVarUInt(num_compressed_files, to);
writeVarUInt(num_uncompressed_files, to);
@ -337,26 +341,31 @@ bool MinimalisticDataPartChecksums::deserialize(ReadBuffer & in)
if (format_version < MINIMAL_VERSION_WITH_MINIMALISTIC_CHECKSUMS)
{
auto full_checksums_ptr = std::make_unique<MergeTreeDataPartChecksums>();
if (!full_checksums_ptr->read(in, format_version))
MergeTreeDataPartChecksums new_full_checksums;
if (!new_full_checksums.read(in, format_version))
return false;
computeTotalChecksums(*full_checksums_ptr);
full_checksums = std::move(full_checksums_ptr);
computeTotalChecksums(new_full_checksums);
full_checksums = std::move(new_full_checksums);
return true;
}
if (format_version > MINIMAL_VERSION_WITH_MINIMALISTIC_CHECKSUMS)
throw Exception("Unknown checksums format version: " + DB::toString(format_version), ErrorCodes::UNKNOWN_FORMAT);
deserializeWithoutHeader(in);
return true;
}
void MinimalisticDataPartChecksums::deserializeWithoutHeader(ReadBuffer & in)
{
readVarUInt(num_compressed_files, in);
readVarUInt(num_uncompressed_files, in);
readPODBinary(hash_of_all_files, in);
readPODBinary(hash_of_uncompressed_files, in);
readPODBinary(uncompressed_hash_of_compressed_files, in);
return true;
}
void MinimalisticDataPartChecksums::computeTotalChecksums(const MergeTreeDataPartChecksums & full_checksums_)
@ -410,7 +419,7 @@ String MinimalisticDataPartChecksums::getSerializedString(const MergeTreeDataPar
return checksums.getSerializedString();
}
void MinimalisticDataPartChecksums::checkEqual(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files)
void MinimalisticDataPartChecksums::checkEqual(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files) const
{
if (full_checksums && rhs.full_checksums)
full_checksums->checkEqual(*rhs.full_checksums, check_uncompressed_hash_in_compressed_files);
@ -419,7 +428,7 @@ void MinimalisticDataPartChecksums::checkEqual(const MinimalisticDataPartChecksu
checkEqualImpl(rhs, check_uncompressed_hash_in_compressed_files);
}
void MinimalisticDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files)
void MinimalisticDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files) const
{
if (full_checksums)
full_checksums->checkEqual(rhs, check_uncompressed_hash_in_compressed_files);
@ -430,7 +439,7 @@ void MinimalisticDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums
checkEqualImpl(rhs_minimalistic, check_uncompressed_hash_in_compressed_files);
}
void MinimalisticDataPartChecksums::checkEqualImpl(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files)
void MinimalisticDataPartChecksums::checkEqualImpl(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files) const
{
if (num_compressed_files != rhs.num_compressed_files || num_uncompressed_files != rhs.num_uncompressed_files)
{

View File

@ -4,7 +4,7 @@
#include <IO/WriteBuffer.h>
#include <city.h>
#include <map>
#include <memory>
#include <optional>
class SipHash;
@ -112,7 +112,7 @@ struct MinimalisticDataPartChecksums
}
/// Is set only for old formats
std::unique_ptr<MergeTreeDataPartChecksums> full_checksums;
std::optional<MergeTreeDataPartChecksums> full_checksums;
static constexpr size_t MINIMAL_VERSION_WITH_MINIMALISTIC_CHECKSUMS = 5;
@ -120,15 +120,17 @@ struct MinimalisticDataPartChecksums
void computeTotalChecksums(const MergeTreeDataPartChecksums & full_checksums);
bool deserialize(ReadBuffer & in);
void deserializeWithoutHeader(ReadBuffer & in);
static MinimalisticDataPartChecksums deserializeFrom(const String & s);
void serialize(WriteBuffer & to) const;
void serializeWithoutHeader(WriteBuffer & to) const;
String getSerializedString();
static String getSerializedString(const MergeTreeDataPartChecksums & full_checksums, bool minimalistic);
void checkEqual(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files);
void checkEqual(const MergeTreeDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files);
void checkEqualImpl(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files);
void checkEqual(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files) const;
void checkEqual(const MergeTreeDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files) const;
void checkEqualImpl(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files) const;
};

View File

@ -148,6 +148,13 @@ struct MergeTreeSettings
*/ \
M(SettingBool, use_minimalistic_checksums_in_zookeeper, true) \
\
/** Store part header (checksums and columns) in a compact format and a single part znode \
* instead of separate znodes (<part>/columns and <part>/checksums). \
* This can dramatically reduce snapshot size in ZooKeeper. \
* Before enabling check that all replicas support new format. \
*/ \
M(SettingBool, use_minimalistic_part_header_in_zookeeper, false) \
\
/** How many records about mutations that are done to keep. \
* If zero, then keep all of them */ \
M(SettingUInt64, finished_mutations_to_keep, 100) \

View File

@ -1,5 +1,6 @@
#include <Storages/MergeTree/ReplicatedMergeTreeAlterThread.h>
#include <Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h>
#include <Storages/MergeTree/ReplicatedMergeTreePartHeader.h>
#include <Storages/ColumnsDescription.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Common/setThreadName.h>
@ -155,32 +156,9 @@ void ReplicatedMergeTreeAlterThread::run()
if (!transaction)
continue;
storage.updatePartHeaderInZooKeeperAndCommit(zookeeper, *transaction);
++changed_parts;
/// Update part metadata in ZooKeeper.
Coordination::Requests ops;
ops.emplace_back(zkutil::makeSetRequest(
storage.replica_path + "/parts/" + part->name + "/columns", transaction->getNewColumns().toString(), -1));
ops.emplace_back(zkutil::makeSetRequest(
storage.replica_path + "/parts/" + part->name + "/checksums",
storage.getChecksumsForZooKeeper(transaction->getNewChecksums()),
-1));
try
{
zookeeper->multi(ops);
}
catch (const Coordination::Exception & e)
{
/// The part does not exist in ZK. We will add to queue for verification - maybe the part is superfluous, and it must be removed locally.
if (e.code == Coordination::ZNONODE)
storage.enqueuePartForCheck(part->name);
throw;
}
/// Apply file changes.
transaction->commit();
}
/// Columns sizes could be quietly changed in case of MODIFY/ADD COLUMN

View File

@ -1,5 +1,6 @@
#include <Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h>
#include <Storages/MergeTree/checkDataPart.h>
#include <Storages/MergeTree/ReplicatedMergeTreePartHeader.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Common/setThreadName.h>
@ -204,22 +205,34 @@ void ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name)
auto zookeeper = storage.getZooKeeper();
auto table_lock = storage.lockStructure(false);
auto local_part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksums(
part->columns, part->checksums);
String part_path = storage.replica_path + "/parts/" + part_name;
String part_znode;
/// If the part is in ZooKeeper, check its data with its checksums, and them with ZooKeeper.
if (zookeeper->exists(storage.replica_path + "/parts/" + part_name))
if (zookeeper->tryGet(part_path, part_znode))
{
LOG_WARNING(log, "Checking data of part " << part_name << ".");
try
{
auto zk_checksums = MinimalisticDataPartChecksums::deserializeFrom(
zookeeper->get(storage.replica_path + "/parts/" + part_name + "/checksums"));
zk_checksums.checkEqual(part->checksums, true);
ReplicatedMergeTreePartHeader zk_part_header;
if (!part_znode.empty())
zk_part_header = ReplicatedMergeTreePartHeader::fromString(part_znode);
else
{
String columns_znode = zookeeper->get(part_path + "/columns");
String checksums_znode = zookeeper->get(part_path + "/checksums");
zk_part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksumsZNodes(
columns_znode, checksums_znode);
}
auto zk_columns = NamesAndTypesList::parse(
zookeeper->get(storage.replica_path + "/parts/" + part_name + "/columns"));
if (part->columns != zk_columns)
if (local_part_header.getColumnsHash() != zk_part_header.getColumnsHash())
throw Exception("Columns of local part " + part_name + " are different from ZooKeeper", ErrorCodes::TABLE_DIFFERS_TOO_MUCH);
zk_part_header.getChecksums().checkEqual(local_part_header.getChecksums(), true);
checkDataPart(
storage.data.getFullPath() + part_name,
storage.data.index_granularity,

View File

@ -0,0 +1,66 @@
#include <Storages/MergeTree/ReplicatedMergeTreePartHeader.h>
#include <Core/NamesAndTypes.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadBufferFromString.h>
#include <Common/SipHash.h>
#include <Common/StringUtils/StringUtils.h>
namespace DB
{
static std::array<char, 16> getSipHash(const String & str)
{
SipHash hash;
hash.update(str.data(), str.size());
std::array<char, 16> result;
hash.get128(result.data());
return result;
}
ReplicatedMergeTreePartHeader ReplicatedMergeTreePartHeader::fromColumnsAndChecksumsZNodes(
const String & columns_znode, const String & checksums_znode)
{
auto columns_hash = getSipHash(columns_znode);
auto checksums = MinimalisticDataPartChecksums::deserializeFrom(checksums_znode);
return ReplicatedMergeTreePartHeader(std::move(columns_hash), std::move(checksums));
}
ReplicatedMergeTreePartHeader ReplicatedMergeTreePartHeader::fromColumnsAndChecksums(
const NamesAndTypesList & columns,
const MergeTreeDataPartChecksums & full_checksums)
{
MinimalisticDataPartChecksums checksums;
checksums.computeTotalChecksums(full_checksums);
return ReplicatedMergeTreePartHeader(getSipHash(columns.toString()), std::move(checksums));
}
void ReplicatedMergeTreePartHeader::read(ReadBuffer & in)
{
in >> "part header format version: 1\n";
in.readStrict(columns_hash.data(), columns_hash.size());
checksums.deserializeWithoutHeader(in);
}
ReplicatedMergeTreePartHeader ReplicatedMergeTreePartHeader::fromString(const String & str)
{
ReadBufferFromString in(str);
ReplicatedMergeTreePartHeader result;
result.read(in);
return result;
}
void ReplicatedMergeTreePartHeader::write(WriteBuffer & out) const
{
writeString("part header format version: 1\n", out);
out.write(columns_hash.data(), columns_hash.size());
checksums.serializeWithoutHeader(out);
}
String ReplicatedMergeTreePartHeader::toString() const
{
WriteBufferFromOwnString out;
write(out);
return out.str();
}
}

View File

@ -0,0 +1,50 @@
#pragma once
#include <Storages/MergeTree/MergeTreeDataPartChecksum.h>
#include <Core/Types.h>
#include <IO/WriteBuffer.h>
#include <IO/ReadBuffer.h>
#include <IO/Operators.h>
#include <array>
namespace DB
{
class NamesAndTypesList;
/// This class provides a compact representation of part metadata (available columns and checksums)
/// that is intended to be stored in the part znode in ZooKeeper.
/// It can also be initialized from the legacy format (from the contents of separate <part>/columns
/// and <part>/checksums znodes).
class ReplicatedMergeTreePartHeader
{
public:
ReplicatedMergeTreePartHeader() = default;
static ReplicatedMergeTreePartHeader fromColumnsAndChecksumsZNodes(
const String & columns_znode, const String & checksums_znode);
static ReplicatedMergeTreePartHeader fromColumnsAndChecksums(
const NamesAndTypesList & columns, const MergeTreeDataPartChecksums & full_checksums);
void read(ReadBuffer & in);
static ReplicatedMergeTreePartHeader fromString(const String & str);
void write(WriteBuffer & out) const;
String toString() const;
const std::array<char, 16> & getColumnsHash() const { return columns_hash; }
const MinimalisticDataPartChecksums & getChecksums() const { return checksums; }
private:
ReplicatedMergeTreePartHeader(std::array<char, 16> columns_hash_, MinimalisticDataPartChecksums checksums_)
: columns_hash(std::move(columns_hash_)), checksums(std::move(checksums_))
{
}
std::array<char, 16> columns_hash;
MinimalisticDataPartChecksums checksums;
};
}

View File

@ -30,7 +30,7 @@ namespace ErrorCodes
static void extractDependentTable(ASTSelectQuery & query, String & select_database_name, String & select_table_name)
{
auto db_and_table = getDatabaseAndTable(query, 0);
ASTPtr subquery = getTableFunctionOrSubquery(query, 0);
ASTPtr subquery = extractTableExpression(query, 0);
if (!db_and_table && !subquery)
return;
@ -69,7 +69,7 @@ static void checkAllowedQueries(const ASTSelectQuery & query)
if (query.prewhere_expression || query.final() || query.sample_size())
throw Exception("MATERIALIZED VIEW cannot have PREWHERE, SAMPLE or FINAL.", DB::ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW);
ASTPtr subquery = getTableFunctionOrSubquery(query, 0);
ASTPtr subquery = extractTableExpression(query, 0);
if (!subquery)
return;

View File

@ -8,13 +8,14 @@
#include <Storages/ColumnsDescription.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/MergeTree/MergeTreeDataPart.h>
#include <Storages/MergeTree/MergeList.h>
#include <Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h>
#include <Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h>
#include <Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h>
#include <Storages/MergeTree/ReplicatedMergeTreeMutationEntry.h>
#include <Storages/MergeTree/MergeList.h>
#include <Storages/MergeTree/ReplicatedMergeTreeAddress.h>
#include <Storages/MergeTree/ReplicatedMergeTreeQuorumAddedParts.h>
#include <Storages/MergeTree/ReplicatedMergeTreePartHeader.h>
#include <Databases/IDatabase.h>
@ -566,11 +567,8 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks)
}
}
else
{
LOG_ERROR(log, "Fetching missing part " << missing_name);
parts_to_fetch.push_back(missing_name);
}
}
for (const String & name : parts_to_fetch)
expected_parts.erase(name);
@ -671,25 +669,49 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks)
removePartsFromZooKeeper(zookeeper, Strings(expected_parts.begin(), expected_parts.end()));
}
/// Add to the queue job to pick up the missing parts from other replicas and remove from ZK the information that we have them.
for (const String & name : parts_to_fetch)
/// Add to the queue jobs to pick up the missing parts from other replicas and remove from ZK the information that we have them.
std::vector<std::future<Coordination::ExistsResponse>> exists_futures;
exists_futures.reserve(parts_to_fetch.size());
for (const String & part_name : parts_to_fetch)
{
LOG_ERROR(log, "Removing missing part from ZooKeeper and queueing a fetch: " << name);
String part_path = replica_path + "/parts/" + part_name;
exists_futures.emplace_back(zookeeper->asyncExists(part_path));
}
std::vector<std::future<Coordination::MultiResponse>> enqueue_futures;
enqueue_futures.reserve(parts_to_fetch.size());
for (size_t i = 0; i < parts_to_fetch.size(); ++i)
{
const String & part_name = parts_to_fetch[i];
LOG_ERROR(log, "Removing locally missing part from ZooKeeper and queueing a fetch: " << part_name);
Coordination::Requests ops;
time_t part_create_time = 0;
Coordination::ExistsResponse exists_resp = exists_futures[i].get();
if (!exists_resp.error)
{
part_create_time = exists_resp.stat.ctime / 1000;
removePartFromZooKeeper(part_name, ops, exists_resp.stat.numChildren > 0);
}
LogEntry log_entry;
log_entry.type = LogEntry::GET_PART;
log_entry.source_replica = "";
log_entry.new_part_name = name;
log_entry.create_time = tryGetPartCreateTime(zookeeper, replica_path, name);
log_entry.new_part_name = part_name;
log_entry.create_time = part_create_time;
/// We assume that this occurs before the queue is loaded (queue.initialize).
Coordination::Requests ops;
removePartFromZooKeeper(name, ops);
ops.emplace_back(zkutil::makeCreateRequest(
replica_path + "/queue/queue-", log_entry.toString(), zkutil::CreateMode::PersistentSequential));
zookeeper->multi(ops);
enqueue_futures.emplace_back(zookeeper->asyncMulti(ops));
}
for (auto & future : enqueue_futures)
future.get();
/// Remove extra local parts.
for (const MergeTreeData::DataPartPtr & part : unexpected_parts)
{
@ -708,18 +730,19 @@ void StorageReplicatedMergeTree::checkPartChecksumsAndAddCommitOps(const zkutil:
check(part->columns);
int expected_columns_version = columns_version;
auto local_part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksums(
part->columns, part->checksums);
Strings replicas = zookeeper->getChildren(zookeeper_path + "/replicas");
std::shuffle(replicas.begin(), replicas.end(), rng);
String expected_columns_str = part->columns.toString();
bool has_been_alredy_added = false;
bool has_been_already_added = false;
for (const String & replica : replicas)
{
Coordination::Stat stat_before, stat_after;
String current_part_path = zookeeper_path + "/replicas/" + replica + "/parts/" + part_name;
String columns_str;
if (!zookeeper->tryGet(current_part_path + "/columns", columns_str, &stat_before))
String part_zk_str;
if (!zookeeper->tryGet(current_part_path, part_zk_str))
{
if (absent_replicas_paths)
absent_replicas_paths->emplace(current_part_path);
@ -727,30 +750,41 @@ void StorageReplicatedMergeTree::checkPartChecksumsAndAddCommitOps(const zkutil:
continue;
}
if (columns_str != expected_columns_str)
ReplicatedMergeTreePartHeader replica_part_header;
if (!part_zk_str.empty())
replica_part_header = ReplicatedMergeTreePartHeader::fromString(part_zk_str);
else
{
LOG_INFO(log, "Not checking checksums of part " << part_name << " with replica " << replica
<< " because columns are different");
continue;
}
Coordination::Stat columns_stat_before, columns_stat_after;
String columns_str;
String checksums_str;
/// Let's check that the node's version with the columns did not change while we were reading the checksums.
/// This ensures that the columns and the checksum refer to the same data.
if (!zookeeper->tryGet(current_part_path + "/checksums", checksums_str) ||
!zookeeper->exists(current_part_path + "/columns", &stat_after) ||
stat_before.version != stat_after.version)
if (!zookeeper->tryGet(current_part_path + "/columns", columns_str, &columns_stat_before) ||
!zookeeper->tryGet(current_part_path + "/checksums", checksums_str) ||
!zookeeper->exists(current_part_path + "/columns", &columns_stat_after) ||
columns_stat_before.version != columns_stat_after.version)
{
LOG_INFO(log, "Not checking checksums of part " << part_name << " with replica " << replica
<< " because part changed while we were reading its checksums");
continue;
}
auto zk_checksums = MinimalisticDataPartChecksums::deserializeFrom(checksums_str);
zk_checksums.checkEqual(part->checksums, true);
replica_part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksumsZNodes(
columns_str, checksums_str);
}
if (replica_part_header.getColumnsHash() != local_part_header.getColumnsHash())
{
LOG_INFO(log, "Not checking checksums of part " << part_name << " with replica " << replica
<< " because columns are different");
continue;
}
replica_part_header.getChecksums().checkEqual(local_part_header.getChecksums(), true);
if (replica == replica_name)
has_been_alredy_added = true;
has_been_already_added = true;
/// If we verify checksums in "sequential manner" (i.e. recheck absence of checksums on other replicas when commit)
/// then it is enough to verify checksums on at least one replica since checksums on other replicas must be the same.
@ -761,12 +795,20 @@ void StorageReplicatedMergeTree::checkPartChecksumsAndAddCommitOps(const zkutil:
}
}
if (!has_been_alredy_added)
if (!has_been_already_added)
{
String part_path = replica_path + "/parts/" + part_name;
ops.emplace_back(zkutil::makeCheckRequest(
zookeeper_path + "/columns", expected_columns_version));
if (data.settings.use_minimalistic_part_header_in_zookeeper)
{
ops.emplace_back(zkutil::makeCreateRequest(
part_path, local_part_header.toString(), zkutil::CreateMode::Persistent));
}
else
{
ops.emplace_back(zkutil::makeCreateRequest(
part_path, "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(
@ -774,6 +816,7 @@ void StorageReplicatedMergeTree::checkPartChecksumsAndAddCommitOps(const zkutil:
ops.emplace_back(zkutil::makeCreateRequest(
part_path + "/checksums", getChecksumsForZooKeeper(part->checksums), zkutil::CreateMode::Persistent));
}
}
else
{
LOG_WARNING(log, "checkPartAndAddToZooKeeper: node " << replica_path + "/parts/" + part_name << " already exists."
@ -1510,16 +1553,8 @@ void StorageReplicatedMergeTree::executeClearColumnInPartition(const LogEntry &
if (!transaction)
continue;
/// Update part metadata in ZooKeeper.
Coordination::Requests ops;
ops.emplace_back(zkutil::makeSetRequest(
replica_path + "/parts/" + part->name + "/columns", transaction->getNewColumns().toString(), -1));
ops.emplace_back(zkutil::makeSetRequest(
replica_path + "/parts/" + part->name + "/checksums", getChecksumsForZooKeeper(transaction->getNewChecksums()), -1));
updatePartHeaderInZooKeeperAndCommit(zookeeper, *transaction);
zookeeper->multi(ops);
transaction->commit();
++modified_parts;
}
@ -2322,12 +2357,15 @@ bool StorageReplicatedMergeTree::createLogEntryToMutatePart(const MergeTreeDataP
}
void StorageReplicatedMergeTree::removePartFromZooKeeper(const String & part_name, Coordination::Requests & ops)
void StorageReplicatedMergeTree::removePartFromZooKeeper(const String & part_name, Coordination::Requests & ops, bool has_children)
{
String part_path = replica_path + "/parts/" + part_name;
if (has_children)
{
ops.emplace_back(zkutil::makeRemoveRequest(part_path + "/checksums", -1));
ops.emplace_back(zkutil::makeRemoveRequest(part_path + "/columns", -1));
}
ops.emplace_back(zkutil::makeRemoveRequest(part_path, -1));
}
@ -2338,19 +2376,26 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n
String part_path = replica_path + "/parts/" + part_name;
Coordination::Requests ops;
time_t part_create_time = 0;
Coordination::Stat stat;
if (zookeeper->exists(part_path, &stat))
{
part_create_time = stat.ctime / 1000;
removePartFromZooKeeper(part_name, ops, stat.numChildren > 0);
}
LogEntryPtr log_entry = std::make_shared<LogEntry>();
log_entry->type = LogEntry::GET_PART;
log_entry->create_time = tryGetPartCreateTime(zookeeper, replica_path, part_name);
log_entry->create_time = part_create_time;
log_entry->source_replica = "";
log_entry->new_part_name = part_name;
Coordination::Requests ops;
ops.emplace_back(zkutil::makeCreateRequest(
replica_path + "/queue/queue-", log_entry->toString(),
zkutil::CreateMode::PersistentSequential));
removePartFromZooKeeper(part_name, ops);
auto results = zookeeper->multi(ops);
String path_created = dynamic_cast<const Coordination::CreateResponse &>(*results[0]).path_created;
@ -2691,8 +2736,18 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Strin
MinimalisticDataPartChecksums source_part_checksums;
source_part_checksums.computeTotalChecksums(source_part->checksums);
String desired_checksums_str = getZooKeeper()->get(source_replica_path + "/parts/" + part_name + "/checksums");
auto desired_checksums = MinimalisticDataPartChecksums::deserializeFrom(desired_checksums_str);
MinimalisticDataPartChecksums desired_checksums;
auto zookeeper = getZooKeeper();
String part_path = source_replica_path + "/parts/" + part_name;
String part_znode = zookeeper->get(part_path);
if (!part_znode.empty())
desired_checksums = ReplicatedMergeTreePartHeader::fromString(part_znode).getChecksums();
else
{
String desired_checksums_str = zookeeper->get(part_path + "/checksums");
desired_checksums = MinimalisticDataPartChecksums::deserializeFrom(desired_checksums_str);
}
if (source_part_checksums == desired_checksums)
{
LOG_TRACE(log, "Found local part " << source_part->name << " with the same checksums as " << part_name);
@ -4450,32 +4505,40 @@ bool StorageReplicatedMergeTree::tryRemovePartsFromZooKeeperWithRetries(MergeTre
bool StorageReplicatedMergeTree::tryRemovePartsFromZooKeeperWithRetries(const Strings & part_names, size_t max_retries)
{
using MultiFuture = std::future<Coordination::MultiResponse>;
size_t num_tries = 0;
bool sucess = false;
bool success = false;
while (!sucess && (max_retries == 0 || num_tries < max_retries))
while (!success && (max_retries == 0 || num_tries < max_retries))
{
std::vector<MultiFuture> futures;
futures.reserve(part_names.size());
++num_tries;
sucess = true;
try
{
++num_tries;
success = true;
auto zookeeper = getZooKeeper();
std::vector<std::future<Coordination::ExistsResponse>> exists_futures;
exists_futures.reserve(part_names.size());
for (const String & part_name : part_names)
{
Coordination::Requests ops;
removePartFromZooKeeper(part_name, ops);
futures.emplace_back(zookeeper->tryAsyncMulti(ops));
String part_path = replica_path + "/parts/" + part_name;
exists_futures.emplace_back(zookeeper->asyncExists(part_path));
}
for (auto & future : futures)
std::vector<std::future<Coordination::MultiResponse>> remove_futures;
remove_futures.reserve(part_names.size());
for (size_t i = 0; i < part_names.size(); ++i)
{
Coordination::ExistsResponse exists_resp = exists_futures[i].get();
if (!exists_resp.error)
{
Coordination::Requests ops;
removePartFromZooKeeper(part_names[i], ops, exists_resp.stat.numChildren > 0);
remove_futures.emplace_back(zookeeper->tryAsyncMulti(ops));
}
}
for (auto & future : remove_futures)
{
auto response = future.get();
@ -4484,7 +4547,7 @@ bool StorageReplicatedMergeTree::tryRemovePartsFromZooKeeperWithRetries(const St
if (Coordination::isHardwareError(response.error))
{
sucess = false;
success = false;
continue;
}
@ -4493,7 +4556,7 @@ bool StorageReplicatedMergeTree::tryRemovePartsFromZooKeeperWithRetries(const St
}
catch (Coordination::Exception & e)
{
sucess = false;
success = false;
if (Coordination::isHardwareError(e.code))
tryLogCurrentException(log, __PRETTY_FUNCTION__);
@ -4501,69 +4564,78 @@ bool StorageReplicatedMergeTree::tryRemovePartsFromZooKeeperWithRetries(const St
throw;
}
if (!sucess && num_tries < max_retries)
if (!success && num_tries < max_retries)
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
}
return sucess;
return success;
}
/// TODO: rewrite this code using async Multi ops after final ZooKeeper library update
void StorageReplicatedMergeTree::removePartsFromZooKeeper(zkutil::ZooKeeperPtr & zookeeper, const Strings & part_names,
NameSet * parts_should_be_retried)
void StorageReplicatedMergeTree::removePartsFromZooKeeper(
zkutil::ZooKeeperPtr & zookeeper, const Strings & part_names, NameSet * parts_should_be_retried)
{
std::vector<std::future<Coordination::ExistsResponse>> exists_futures;
exists_futures.reserve(part_names.size());
for (const String & part_name : part_names)
{
String part_path = replica_path + "/parts/" + part_name;
exists_futures.emplace_back(zookeeper->asyncExists(part_path));
}
std::vector<std::future<Coordination::MultiResponse>> remove_futures;
remove_futures.reserve(part_names.size());
try
{
for (size_t i = 0; i < part_names.size(); ++i)
{
Coordination::ExistsResponse exists_resp = exists_futures[i].get();
if (!exists_resp.error)
{
Coordination::Requests ops;
auto it_first_node_in_batch = part_names.cbegin();
for (auto it = part_names.cbegin(); it != part_names.cend(); ++it)
{
removePartFromZooKeeper(*it, ops);
auto it_next = std::next(it);
if (ops.size() >= zkutil::MULTI_BATCH_SIZE || it_next == part_names.cend())
{
Coordination::Responses unused_responses;
auto code = zookeeper->tryMultiNoThrow(ops, unused_responses);
ops.clear();
if (code == Coordination::ZNONODE)
{
/// Fallback
LOG_DEBUG(log, "ZooKeeper nodes for some parts in the batch are missing, will remove part nodes one by one");
for (auto it_in_batch = it_first_node_in_batch; it_in_batch != it_next; ++it_in_batch)
{
Coordination::Requests cur_ops;
removePartFromZooKeeper(*it_in_batch, cur_ops);
auto cur_code = zookeeper->tryMultiNoThrow(cur_ops, unused_responses);
if (cur_code == Coordination::ZNONODE)
{
LOG_DEBUG(log, "There is no part " << *it_in_batch << " in ZooKeeper, it was only in filesystem");
removePartFromZooKeeper(part_names[i], ops, exists_resp.stat.numChildren > 0);
remove_futures.emplace_back(zookeeper->tryAsyncMulti(ops));
}
else if (parts_should_be_retried && Coordination::isHardwareError(cur_code))
else
{
parts_should_be_retried->emplace(*it_in_batch);
}
else if (cur_code)
{
LOG_WARNING(log, "Cannot remove part " << *it_in_batch << " from ZooKeeper: " << zkutil::ZooKeeper::error2string(cur_code));
LOG_DEBUG(log,
"There is no part " << part_names[i] << " in ZooKeeper, it was only in filesystem");
// emplace invalid future so that the total number of futures is the same as part_names.size();
remove_futures.emplace_back();
}
}
}
else if (parts_should_be_retried && Coordination::isHardwareError(code))
catch (const Coordination::Exception & e)
{
for (auto it_in_batch = it_first_node_in_batch; it_in_batch != it_next; ++it_in_batch)
parts_should_be_retried->emplace(*it_in_batch);
}
else if (code)
{
LOG_WARNING(log, "There was a problem with deleting " << (it_next - it_first_node_in_batch)
<< " nodes from ZooKeeper: " << ::zkutil::ZooKeeper::error2string(code));
if (parts_should_be_retried && Coordination::isHardwareError(e.code))
parts_should_be_retried->insert(part_names.begin(), part_names.end());
throw;
}
it_first_node_in_batch = it_next;
for (size_t i = 0; i < remove_futures.size(); ++i)
{
auto & future = remove_futures[i];
if (!future.valid())
continue;
auto response = future.get();
if (response.error == Coordination::ZOK)
continue;
else if (response.error == Coordination::ZNONODE)
{
LOG_DEBUG(log,
"There is no part " << part_names[i] << " in ZooKeeper, it was only in filesystem");
continue;
}
else if (Coordination::isHardwareError(response.error))
{
if (parts_should_be_retried)
parts_should_be_retried->insert(part_names[i]);
continue;
}
else
LOG_WARNING(log, "Cannot remove part " << part_names[i] << " from ZooKeeper: "
<< zkutil::ZooKeeper::error2string(response.error));
}
}
@ -4809,6 +4881,16 @@ void StorageReplicatedMergeTree::getCommitPartOps(
ops.emplace_back(zkutil::makeCheckRequest(
zookeeper_path + "/columns",
columns_version));
if (data.settings.use_minimalistic_part_header_in_zookeeper)
{
ops.emplace_back(zkutil::makeCreateRequest(
replica_path + "/parts/" + part->name,
ReplicatedMergeTreePartHeader::fromColumnsAndChecksums(part->columns, part->checksums).toString(),
zkutil::CreateMode::Persistent));
}
else
{
ops.emplace_back(zkutil::makeCreateRequest(
replica_path + "/parts/" + part->name,
"",
@ -4821,6 +4903,59 @@ void StorageReplicatedMergeTree::getCommitPartOps(
replica_path + "/parts/" + part->name + "/checksums",
getChecksumsForZooKeeper(part->checksums),
zkutil::CreateMode::Persistent));
}
}
void StorageReplicatedMergeTree::updatePartHeaderInZooKeeperAndCommit(
const zkutil::ZooKeeperPtr & zookeeper,
MergeTreeData::AlterDataPartTransaction & transaction)
{
String part_path = replica_path + "/parts/" + transaction.getPartName();
bool need_delete_columns_and_checksums_nodes = false;
try
{
if (data.settings.use_minimalistic_part_header_in_zookeeper)
{
auto part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksums(
transaction.getNewColumns(), transaction.getNewChecksums());
Coordination::Stat stat;
zookeeper->set(part_path, part_header.toString(), -1, &stat);
need_delete_columns_and_checksums_nodes = stat.numChildren > 0;
}
else
{
Coordination::Requests ops;
ops.emplace_back(zkutil::makeSetRequest(
part_path, String(), -1));
ops.emplace_back(zkutil::makeSetRequest(
part_path + "/columns", transaction.getNewColumns().toString(), -1));
ops.emplace_back(zkutil::makeSetRequest(
part_path + "/checksums", getChecksumsForZooKeeper(transaction.getNewChecksums()), -1));
zookeeper->multi(ops);
}
}
catch (const Coordination::Exception & e)
{
/// The part does not exist in ZK. We will add to queue for verification - maybe the part is superfluous, and it must be removed locally.
if (e.code == Coordination::ZNONODE)
enqueuePartForCheck(transaction.getPartName());
throw;
}
/// Apply file changes.
transaction.commit();
/// Legacy <part_path>/columns and <part_path>/checksums znodes are not needed anymore and can be deleted.
if (need_delete_columns_and_checksums_nodes)
{
Coordination::Requests ops;
ops.emplace_back(zkutil::makeRemoveRequest(part_path + "/columns", -1));
ops.emplace_back(zkutil::makeRemoveRequest(part_path + "/checksums", -1));
zookeeper->multi(ops);
}
}
ReplicatedMergeTreeAddress StorageReplicatedMergeTree::getReplicatedMergeTreeAddress() const

View File

@ -372,8 +372,14 @@ private:
MergeTreeData::MutableDataPartPtr & part,
const String & block_id_path = "") const;
/// Updates info about part columns and checksums in ZooKeeper and commits transaction if successful.
void updatePartHeaderInZooKeeperAndCommit(
const zkutil::ZooKeeperPtr & zookeeper,
MergeTreeData::AlterDataPartTransaction & transaction);
/// Adds actions to `ops` that remove a part from ZooKeeper.
void removePartFromZooKeeper(const String & part_name, Coordination::Requests & ops);
/// Set has_children to true for "old-style" parts (those with /columns and /checksums child znodes).
void removePartFromZooKeeper(const String & part_name, Coordination::Requests & ops, bool has_children);
/// Quickly removes big set of parts from ZooKeeper (using async multi queries)
void removePartsFromZooKeeper(zkutil::ZooKeeperPtr & zookeeper, const Strings & part_names,

View File

@ -24,3 +24,6 @@ target_link_libraries (get_current_inserts_in_replicated PRIVATE dbms clickhouse
add_executable (get_abandonable_lock_in_all_partitions get_abandonable_lock_in_all_partitions.cpp)
target_link_libraries (get_abandonable_lock_in_all_partitions PRIVATE dbms clickhouse_common_config clickhouse_common_zookeeper)
add_executable (transform_part_zk_nodes transform_part_zk_nodes.cpp)
target_link_libraries (transform_part_zk_nodes PRIVATE dbms clickhouse_common_config clickhouse_common_zookeeper)

View File

@ -0,0 +1,130 @@
#include <Storages/MergeTree/ReplicatedMergeTreePartHeader.h>
#include <Storages/MergeTree/MergeTreeDataPartChecksum.h>
#include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h>
#include <Common/Exception.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Common/ZooKeeper/KeeperException.h>
#include <boost/program_options.hpp>
#include <list>
#include <iostream>
int main(int argc, char ** argv)
try
{
boost::program_options::options_description desc("Allowed options");
desc.add_options()
("help,h", "produce help message")
("address,a", boost::program_options::value<std::string>()->required(),
"addresses of ZooKeeper instances, comma separated. Example: example01e.yandex.ru:2181")
("path,p", boost::program_options::value<std::string>()->required(),
"where to start")
;
boost::program_options::variables_map options;
boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options);
if (options.count("help"))
{
std::cout << "Transform contents of part nodes in ZooKeeper to more compact storage scheme." << std::endl;
std::cout << "Usage: " << argv[0] << " [options]" << std::endl;
std::cout << desc << std::endl;
return 1;
}
zkutil::ZooKeeper zookeeper(options.at("address").as<std::string>());
std::string initial_path = options.at("path").as<std::string>();
struct Node
{
Node(
std::string path_,
std::future<Coordination::GetResponse> get_future_,
std::future<Coordination::ListResponse> children_future_,
Node * parent_)
: path(std::move(path_))
, get_future(std::move(get_future_))
, children_future(std::move(children_future_))
, parent(parent_)
{
}
std::string path;
std::future<Coordination::GetResponse> get_future;
std::future<Coordination::ListResponse> children_future;
Node * parent = nullptr;
std::future<Coordination::MultiResponse> set_future;
};
std::list<Node> nodes_queue;
nodes_queue.emplace_back(
initial_path, zookeeper.asyncGet(initial_path), zookeeper.asyncGetChildren(initial_path), nullptr);
for (auto it = nodes_queue.begin(); it != nodes_queue.end(); ++it)
{
Coordination::GetResponse get_response;
Coordination::ListResponse children_response;
try
{
get_response = it->get_future.get();
children_response = it->children_future.get();
}
catch (const Coordination::Exception & e)
{
if (e.code == Coordination::ZNONODE)
continue;
throw;
}
if (get_response.stat.ephemeralOwner)
continue;
if (it->path.find("/parts/") != std::string::npos
&& !endsWith(it->path, "/columns")
&& !endsWith(it->path, "/checksums"))
{
if (!children_response.names.empty())
{
auto part_header = DB::ReplicatedMergeTreePartHeader::fromColumnsAndChecksumsZNodes(
zookeeper.get(it->path + "/columns"), zookeeper.get(it->path + "/checksums"));
Coordination::Requests ops;
ops.emplace_back(zkutil::makeRemoveRequest(it->path + "/columns", -1));
ops.emplace_back(zkutil::makeRemoveRequest(it->path + "/checksums", -1));
ops.emplace_back(zkutil::makeSetRequest(it->path, part_header.toString(), -1));
it->set_future = zookeeper.asyncMulti(ops);
}
}
else
{
for (const auto & name : children_response.names)
{
std::string child_path = it->path == "/" ? it->path + name : it->path + '/' + name;
nodes_queue.emplace_back(
child_path, zookeeper.asyncGet(child_path), zookeeper.asyncGetChildren(child_path),
&(*it));
}
}
}
for (auto it = nodes_queue.begin(); it != nodes_queue.end(); ++it)
{
if (it->set_future.valid())
{
it->set_future.get();
std::cerr << it->path << " changed!" << std::endl;
}
}
}
catch (...)
{
std::cerr << DB::getCurrentExceptionMessage(true) << '\n';
throw;
}

View File

@ -12,6 +12,7 @@
#include <Common/typeid_cast.h>
#include <Common/parseRemoteDescription.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <Core/Defines.h>
namespace DB
@ -152,7 +153,8 @@ StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & ast_function, const C
if (names.empty())
throw Exception("Shard list is empty after parsing first argument", ErrorCodes::BAD_ARGUMENTS);
cluster = std::make_shared<Cluster>(context.getSettings(), names, username, password, context.getTCPPort(), false);
auto maybe_secure_port = context.getTCPPortSecure();
cluster = std::make_shared<Cluster>(context.getSettings(), names, username, password, (secure ? (maybe_secure_port ? *maybe_secure_port : DBMS_DEFAULT_SECURE_PORT) : context.getTCPPort()), false, secure);
}
auto structure_remote_table = getStructureOfRemoteTable(*cluster, remote_database, remote_table, context, remote_table_function_ptr);
@ -177,8 +179,8 @@ StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & ast_function, const C
}
TableFunctionRemote::TableFunctionRemote(const std::string & name_)
: name(name_)
TableFunctionRemote::TableFunctionRemote(const std::string & name_, bool secure)
: name{name_}, secure{secure}
{
is_cluster_function = name == "cluster";
@ -193,6 +195,7 @@ TableFunctionRemote::TableFunctionRemote(const std::string & name_)
void registerTableFunctionRemote(TableFunctionFactory & factory)
{
factory.registerFunction("remote", [] () -> TableFunctionPtr { return std::make_shared<TableFunctionRemote>("remote"); });
factory.registerFunction("remoteSecure", [] () -> TableFunctionPtr { return std::make_shared<TableFunctionRemote>("remote", /* secure = */ true); });
factory.registerFunction("cluster", [] () -> TableFunctionPtr { return std::make_shared<TableFunctionRemote>("cluster"); });
}

View File

@ -16,7 +16,7 @@ namespace DB
class TableFunctionRemote : public ITableFunction
{
public:
explicit TableFunctionRemote(const std::string & name_ = "remote");
explicit TableFunctionRemote(const std::string & name_ = "remote", bool secure = false);
std::string getName() const override { return name; }
@ -26,6 +26,7 @@ private:
std::string name;
bool is_cluster_function;
std::string help_message;
bool secure;
};
}

View File

@ -0,0 +1,46 @@
<test>
<name>Constant column string search</name>
<tags>
<tag>search</tag>
</tags>
<preconditions>
<table_exists>hits_100m_single</table_exists>
</preconditions>
<type>loop</type>
<stop_conditions>
<all_of>
<iterations>5</iterations>
<min_time_not_changing_for_ms>10000</min_time_not_changing_for_ms>
</all_of>
<any_of>
<iterations>50</iterations>
<total_time_ms>60000</total_time_ms>
</any_of>
</stop_conditions>
<query><![CDATA[select count(position(URL, 'yandex')), count(position(URL, 'google')) FROM hits_100m_single]]></query>
<query><![CDATA[select count(multiPosition(URL, ['yandex', 'google'])) FROM hits_100m_single]]></query>
<query><![CDATA[select count(match(URL, 'yandex|google')) FROM hits_100m_single]]></query>
<query><![CDATA[select sum(match(URL, 'yandex')), sum(match(URL, 'google')), sum(match(URL, 'yahoo')), sum(match(URL, 'pikabu')) FROM hits_100m_single]]></query>
<query><![CDATA[select sum(multiSearch(URL, ['yandex', 'google', 'yahoo', 'pikabu'])) from hits_100m_single]]></query>
<query><![CDATA[select sum(match(URL, 'yandex|google|yahoo|pikabu')) FROM hits_100m_single]]></query>
<query><![CDATA[select sum(match(URL, 'yandex')), sum(match(URL, 'google')), sum(match(URL, 'http')) FROM hits_100m_single]]></query>
<query><![CDATA[select sum(multiSearch(URL, ['yandex', 'google', 'http'])) from hits_100m_single]]></query>
<query><![CDATA[select sum(match(URL, 'yandex|google|http')) FROM hits_100m_single]]></query>
<query><![CDATA[select sum(match(URL, 'yandex')), sum(match(URL, 'google')), sum(match(URL, 'facebook')), sum(match(URL, 'wikipedia')), sum(match(URL, 'reddit')) FROM hits_100m_single]]></query>
<query><![CDATA[select sum(multiSearch(URL, ['yandex', 'google', 'facebook', 'wikipedia', 'reddit'])) from hits_100m_single]]></query>
<query><![CDATA[select sum(match(URL, 'yandex|google|facebook|wikipedia|reddit')) FROM hits_100m_single]]></query>
<query><![CDATA[select sum(firstMatch(URL, ['yandex', 'google', 'http', 'facebook', 'google'])) from hits_100m_single]]></query>
<main_metric>
<min_time/>
</main_metric>
</test>

View File

@ -0,0 +1,10 @@
0
0
0
0
0
0
0
0
0
0

View File

@ -0,0 +1,24 @@
#!/usr/bin/env bash
# set -x
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. $CURDIR/../shell_config.sh
# Not default server config needed
tcp_port_secure=`$CLICKHOUSE_EXTRACT_CONFIG -k tcp_port_secure 2>/dev/null`
if [ -z $tcp_port_secure ]; then
# Secure port disabled. Fake result
cat $CURDIR/00505_shard_secure.reference
else
$CLICKHOUSE_CLIENT -q "SELECT * FROM remoteSecure('127.0.0.{1,2}', system.one);"
$CLICKHOUSE_CLIENT -q "SELECT * FROM remoteSecure('127.0.0.{1,2}:$CLICKHOUSE_PORT_TCP_SECURE', system.one);"
$CLICKHOUSE_CLIENT -q "SELECT * FROM remoteSecure('127.0.0.{1,2}', system.one);"
$CLICKHOUSE_CLIENT -q "SELECT * FROM remoteSecure(test_shard_localhost_secure, system.one);"
$CLICKHOUSE_CLIENT -q "SELECT * FROM remote(test_shard_localhost_secure, system.one);"
$CLICKHOUSE_CLIENT -q "SELECT * FROM remoteSecure(test_shard_localhost, system.one);"
$CLICKHOUSE_CLIENT -q "SELECT * FROM remote(test_shard_localhost, system.one);"
fi

View File

@ -0,0 +1,35 @@
*** Test fetches ***
*** replica 1 ***
1 1
2 2
*** replica 2 ***
1 1
2 2
*** Test merges ***
*** replica 1 ***
all_0_1_1 1
all_0_1_1 2
*** replica 2 ***
all_0_1_1 1
all_0_1_1 2
*** Test part removal ***
*** replica 1 ***
all_0_1_1
all_0_1_1
*** replica 2 ***
all_0_1_1
all_0_1_1
*** Test ALTER ***
*** replica 1 ***
1 1
2 1
*** replica 2 ***
1 1
2 1
*** Test CLEAR COLUMN ***
*** replica 1 ***
1 0
2 0
*** replica 2 ***
1 0
2 0

View File

@ -0,0 +1,61 @@
DROP TABLE IF EXISTS test.part_header_r1;
DROP TABLE IF EXISTS test.part_header_r2;
CREATE TABLE test.part_header_r1(x UInt32, y UInt32)
ENGINE ReplicatedMergeTree('/clickhouse/tables/test/part_header', '1') ORDER BY x
SETTINGS use_minimalistic_part_header_in_zookeeper = 0,
old_parts_lifetime = 1,
cleanup_delay_period = 0,
cleanup_delay_period_random_add = 0;
CREATE TABLE test.part_header_r2(x UInt32, y UInt32)
ENGINE ReplicatedMergeTree('/clickhouse/tables/test/part_header', '2') ORDER BY x
SETTINGS use_minimalistic_part_header_in_zookeeper = 1,
old_parts_lifetime = 1,
cleanup_delay_period = 0,
cleanup_delay_period_random_add = 0;
SELECT '*** Test fetches ***';
INSERT INTO test.part_header_r1 VALUES (1, 1);
INSERT INTO test.part_header_r2 VALUES (2, 2);
SYSTEM SYNC REPLICA test.part_header_r1;
SYSTEM SYNC REPLICA test.part_header_r2;
SELECT '*** replica 1 ***';
SELECT x, y FROM test.part_header_r1 ORDER BY x;
SELECT '*** replica 2 ***';
SELECT x, y FROM test.part_header_r2 ORDER BY x;
SELECT '*** Test merges ***';
OPTIMIZE TABLE test.part_header_r1;
SYSTEM SYNC REPLICA test.part_header_r2;
SELECT '*** replica 1 ***';
SELECT _part, x FROM test.part_header_r1 ORDER BY x;
SELECT '*** replica 2 ***';
SELECT _part, x FROM test.part_header_r2 ORDER BY x;
SELECT sleep(2) FORMAT Null;
SELECT '*** Test part removal ***';
SELECT '*** replica 1 ***';
SELECT name FROM system.parts WHERE database = 'test' AND table = 'part_header_r1';
SELECT name FROM system.zookeeper WHERE path = '/clickhouse/tables/test/part_header/replicas/1/parts';
SELECT '*** replica 2 ***';
SELECT name FROM system.parts WHERE database = 'test' AND table = 'part_header_r2';
SELECT name FROM system.zookeeper WHERE path = '/clickhouse/tables/test/part_header/replicas/1/parts';
SELECT '*** Test ALTER ***';
ALTER TABLE test.part_header_r1 MODIFY COLUMN y String;
SELECT '*** replica 1 ***';
SELECT x, length(y) FROM test.part_header_r1 ORDER BY x;
SELECT '*** replica 2 ***';
SELECT x, length(y) FROM test.part_header_r2 ORDER BY x;
SELECT '*** Test CLEAR COLUMN ***';
SET replication_alter_partitions_sync = 2;
ALTER TABLE test.part_header_r1 CLEAR COLUMN y IN PARTITION tuple();
SELECT '*** replica 1 ***';
SELECT x, length(y) FROM test.part_header_r1 ORDER BY x;
SELECT '*** replica 2 ***';
SELECT x, length(y) FROM test.part_header_r2 ORDER BY x;
DROP TABLE test.part_header_r1;
DROP TABLE test.part_header_r2;

View File

@ -0,0 +1 @@
1 2 5 3 4 7 6

View File

@ -0,0 +1,59 @@
#!/usr/bin/env bash
set -e
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. $CURDIR/../shell_config.sh
#create the schema file
echo "
@0x803231eaa402b968;
struct NestedNestedOne
{
nestednestednumber @0 : UInt64;
}
struct NestedNestedTwo
{
nestednestedtext @0 : Text;
}
struct NestedOne
{
nestednestedone @0 : NestedNestedOne;
nestednestedtwo @1 : NestedNestedTwo;
nestednumber @2: UInt64;
}
struct NestedTwo
{
nestednestedone @0 : NestedNestedOne;
nestednestedtwo @1 : NestedNestedTwo;
nestedtext @2 : Text;
}
struct CapnProto
{
number @0 : UInt64;
string @1 : Text;
nestedone @2 : NestedOne;
nestedtwo @3 : NestedTwo;
nestedthree @4 : NestedNestedTwo;
}" > test.capnp
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test.capnproto_input"
$CLICKHOUSE_CLIENT -q "CREATE TABLE test.capnproto_input
(
number UInt64,
string String,
nestedone_nestednumber UInt64,
nestedone_nestednestedone_nestednestednumber UInt64,
nestedone_nestednestedtwo_nestednestedtext String,
nestedtwo_nestednestedtwo_nestednestedtext String,
nestedtwo_nestednestedone_nestednestednumber UInt64,
nestedtwo_nestedtext String
) ENGINE = Memory"
echo -ne '\x00\x00\x00\x00\x15\x00\x00\x00\x00\x00\x00\x00\x01\x00\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x0d\x00\x00\x00\x12\x00\x00\x00\x0c\x00\x00\x00\x01\x00\x02\x00\x20\x00\x00\x00\x00\x00\x03\x00\x34\x00\x00\x00\x00\x00\x01\x00\x32\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x01\x00\x00\x00\x04\x00\x00\x00\x00\x00\x01\x00\x03\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x12\x00\x00\x00\x34\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x01\x00\x00\x00\x08\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x12\x00\x00\x00\x37\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x12\x00\x00\x00\x39\x00\x00\x00\x00\x00\x00\x00' | $CLICKHOUSE_CLIENT --stacktrace --format_schema='test:CapnProto' --query="INSERT INTO test.capnproto_input FORMAT CapnProto";
$CLICKHOUSE_CLIENT -q "SELECT * FROM test.capnproto_input"
$CLICKHOUSE_CLIENT -q "DROP TABLE test.capnproto_input"
# remove the schema file
rm test.capnp

View File

@ -0,0 +1,25 @@
-- this test cannot pass without the new DFA matching algorithm of sequenceMatch
DROP TABLE IF EXISTS test.sequence;
CREATE TABLE test.sequence
(
userID UInt64,
eventType Enum8('A' = 1, 'B' = 2, 'C' = 3),
EventTime UInt64
)
ENGINE = Memory;
INSERT INTO test.sequence SELECT 1, number = 0 ? 'A' : (number < 1000000 ? 'B' : 'C'), number FROM numbers(1000001);
SELECT userID
FROM test.sequence
GROUP BY userID
HAVING sequenceMatch('(?1).*(?2).*(?3)')(toDateTime(EventTime), eventType = 'A', eventType = 'B', eventType = 'C');
SELECT userID
FROM test.sequence
GROUP BY userID
HAVING sequenceMatch('(?1).*(?2).*(?3)')(toDateTime(EventTime), eventType = 'A', eventType = 'B', eventType = 'A');
DROP TABLE test.sequence;

4
debian/changelog vendored
View File

@ -1,5 +1,5 @@
clickhouse (19.1.0) unstable; urgency=low
clickhouse (19.1.1) unstable; urgency=low
* Modified source code
-- <root@yandex-team.ru> Tue, 01 Jan 2019 07:16:20 +0300
-- <root@yandex-team.ru> Wed, 16 Jan 2019 14:04:37 +0300

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/"
ARG version=19.1.0
ARG version=19.1.1
RUN apt-get update \
&& apt-get install --yes --no-install-recommends \

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/"
ARG version=19.1.0
ARG version=19.1.1
ARG gosu_ver=1.10
RUN apt-get update \

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/"
ARG version=19.1.0
ARG version=19.1.1
RUN apt-get update && \
apt-get install -y apt-transport-https dirmngr && \

View File

@ -18,7 +18,8 @@ RUN apt-get update -y \
sudo \
openssl \
netcat-openbsd \
telnet
telnet \
moreutils
ENV TZ=Europe/Moscow
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
@ -29,8 +30,14 @@ COPY part_log.xml /etc/clickhouse-server/config.d/part_log.xml
COPY log_queries.xml /etc/clickhouse-server/users.d/log_queries.xml
CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \
dpkg -i package_folder/clickhouse-common-static-dbg_*.deb; \
dpkg -i package_folder/clickhouse-server_*.deb; \
dpkg -i package_folder/clickhouse-client_*.deb; \
dpkg -i package_folder/clickhouse-test_*.deb; \
echo "TSAN_OPTIONS='halt_on_error=1'" >> /etc/environment; \
echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment; \
echo "ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-6.0/bin/llvm-symbolizer" >> /etc/environment; \
echo "UBSAN_SYMBOLIZER_PATH=/usr/lib/llvm-6.0/bin/llvm-symbolizer" >> /etc/environment; \
echo "LLVM_SYMBOLIZER_PATH=/usr/lib/llvm-6.0/bin/llvm-symbolizer" >> /etc/environment; \
service zookeeper start; sleep 5; \
service clickhouse-server start && sleep 5 && clickhouse-test --shard --zookeeper $SKIP_TESTS_OPTION 2>&1 | tee test_output/test_result.txt
service clickhouse-server start && sleep 5 && clickhouse-test --shard --zookeeper $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt

View File

@ -1,4 +1,4 @@
FROM ubuntu:18.10
FROM yandex/clickhouse-deb-builder
RUN apt-get update -y \
&& env DEBIAN_FRONTEND=noninteractive \
@ -24,7 +24,14 @@ COPY log_queries.xml /etc/clickhouse-server/users.d/log_queries.xml
COPY part_log.xml /etc/clickhouse-server/config.d/part_log.xml
CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \
dpkg -i package_folder/clickhouse-common-static-dbg_*.deb; \
dpkg -i package_folder/clickhouse-server_*.deb; \
dpkg -i package_folder/clickhouse-client_*.deb; \
dpkg -i package_folder/clickhouse-test_*.deb; \
echo "TSAN_OPTIONS='halt_on_error=1'" >> /etc/environment; \
echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment; \
echo "ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-6.0/bin/llvm-symbolizer" >> /etc/environment; \
echo "UBSAN_SYMBOLIZER_PATH=/usr/lib/llvm-6.0/bin/llvm-symbolizer" >> /etc/environment; \
echo "TSAN_SYMBOLIZER_PATH=/usr/lib/llvm-6.0/bin/llvm-symbolizer" >> /etc/environment; \
echo "LLVM_SYMBOLIZER_PATH=/usr/lib/llvm-6.0/bin/llvm-symbolizer" >> /etc/environment; \
service clickhouse-server start && sleep 1 && ./stress --output-folder test_output

View File

@ -1,5 +1,5 @@
# remote
# remote, remoteSecure
Allows you to access remote servers without creating a `Distributed` table.
@ -72,5 +72,6 @@ The `remote` table function can be useful in the following cases:
If the user is not specified, `default` is used.
If the password is not specified, an empty password is used.
`remoteSecure` - same as `remote` but with secured connection. Default port - `tcp_port_secure` from config or 9440.
[Original article](https://clickhouse.yandex/docs/en/query_language/table_functions/remote/) <!--hide-->

View File

@ -50,7 +50,7 @@ sudo apt-get install clickhouse-client clickhouse-server
### Из исходникого кода
Для компиляции ClickHouse вручную, испольщуйте инструкцию для [Linux](../development/build.md) или [Mac OS X](../development/build_osx.md).
Для компиляции ClickHouse вручную, используйте инструкцию для [Linux](../development/build.md) или [Mac OS X](../development/build_osx.md).
Можно скомпилировать пакеты и установить их, либо использовать программы без установки пакетов. Также при ручой сборке можно отключить необходимость поддержки набора инструкций SSE 4.2 или собрать под процессоры архитектуры AArch64.
@ -97,7 +97,7 @@ $ clickhouse-client
По умолчанию он соединяется с localhost:9000, от имени пользователя `default` без пароля. Также клиент может быть использован для соединения с удалённым сервером с помощью аргемента `--host`.
Терминал должен использлвать кодировку UTF-8.
Терминал должен использовать кодировку UTF-8.
Более подробная информация о клиенте располагается в разделе [«Клиент командной строки»](../interfaces/cli.md).

View File

@ -1,5 +1,5 @@
# remote
# remote, remoteSecure
Позволяет обратиться к удалённым серверам без создания таблицы типа `Distributed`.
@ -72,4 +72,6 @@ example01-{01..02}-{1|2}
Если пользователь не задан,то используется `default`.
Если пароль не задан, то используется пустой пароль.
`remoteSecure` - аналогично функции `remote` но с соединением по шифрованому каналу. Порт по умолчанию - `tcp_port_secure` из конфига или 9440.
[Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/table_functions/remote/) <!--hide-->

View File

@ -21,6 +21,7 @@ if (NOT DEFINED ENABLE_UTILS OR ENABLE_UTILS)
add_subdirectory (corrector_utf8)
add_subdirectory (zookeeper-cli)
add_subdirectory (zookeeper-dump-tree)
add_subdirectory (zookeeper-copy-tree)
add_subdirectory (zookeeper-remove-by-list)
add_subdirectory (zookeeper-create-entry-to-download-part)
add_subdirectory (wikistat-loader)

View File

@ -0,0 +1,2 @@
add_executable (zookeeper-copy-tree main.cpp ${SRCS})
target_link_libraries(zookeeper-copy-tree PRIVATE clickhouse_common_zookeeper clickhouse_common_io ${Boost_PROGRAM_OPTIONS_LIBRARY})

View File

@ -0,0 +1,149 @@
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Common/ZooKeeper/KeeperException.h>
#include <Common/Exception.h>
#include <boost/program_options.hpp>
#include <iostream>
namespace DB
{
namespace ErrorCodes
{
extern const int UNEXPECTED_NODE_IN_ZOOKEEPER;
}
}
int main(int argc, char ** argv)
try
{
boost::program_options::options_description desc("Allowed options");
desc.add_options()
("help,h", "produce help message")
("from", boost::program_options::value<std::string>()->required(),
"addresses of source ZooKeeper instances, comma separated. Example: example01e.yandex.ru:2181")
("from-path", boost::program_options::value<std::string>()->required(),
"where to copy from")
("to", boost::program_options::value<std::string>()->required(),
"addresses of destination ZooKeeper instances, comma separated. Example: example01e.yandex.ru:2181")
("to-path", boost::program_options::value<std::string>()->required(),
"where to copy to")
;
boost::program_options::variables_map options;
boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options);
if (options.count("help"))
{
std::cout << "Copy a ZooKeeper tree to another cluster." << std::endl;
std::cout << "Usage: " << argv[0] << " [options]" << std::endl;
std::cout << "WARNING: it is almost useless as it is impossible to corretly copy sequential nodes" << std::endl;
std::cout << desc << std::endl;
return 1;
}
zkutil::ZooKeeper from_zookeeper(options.at("from").as<std::string>());
zkutil::ZooKeeper to_zookeeper(options.at("to").as<std::string>());
std::string from_path = options.at("from-path").as<std::string>();
std::string to_path = options.at("to-path").as<std::string>();
if (to_zookeeper.exists(to_path))
throw DB::Exception("Destination path: " + to_path + " already exists, aborting.",
DB::ErrorCodes::UNEXPECTED_NODE_IN_ZOOKEEPER);
struct Node
{
Node(
std::string path_,
std::future<Coordination::GetResponse> get_future_,
std::future<Coordination::ListResponse> children_future_,
Node * parent_)
: path(std::move(path_))
, get_future(std::move(get_future_))
, children_future(std::move(children_future_))
, parent(parent_)
{
}
std::string path;
std::future<Coordination::GetResponse> get_future;
std::future<Coordination::ListResponse> children_future;
Node * parent = nullptr;
std::future<Coordination::CreateResponse> create_future;
bool created = false;
bool deleted = false;
bool ephemeral = false;
};
std::list<Node> nodes_queue;
nodes_queue.emplace_back(
from_path, from_zookeeper.asyncGet(from_path), from_zookeeper.asyncGetChildren(from_path), nullptr);
to_zookeeper.createAncestors(to_path);
for (auto it = nodes_queue.begin(); it != nodes_queue.end(); ++it)
{
Coordination::GetResponse get_response;
Coordination::ListResponse children_response;
try
{
get_response = it->get_future.get();
children_response = it->children_future.get();
}
catch (const Coordination::Exception & e)
{
if (e.code == Coordination::ZNONODE)
{
it->deleted = true;
continue;
}
throw;
}
if (get_response.stat.ephemeralOwner)
{
it->ephemeral = true;
continue;
}
if (it->parent && !it->parent->created)
{
it->parent->create_future.get();
it->parent->created = true;
std::cerr << it->parent->path << " copied!" << std::endl;
}
std::string new_path = it->path;
new_path.replace(0, from_path.length(), to_path);
it->create_future = to_zookeeper.asyncCreate(new_path, get_response.data, zkutil::CreateMode::Persistent);
get_response.data.clear();
get_response.data.shrink_to_fit();
for (const auto & name : children_response.names)
{
std::string child_path = it->path == "/" ? it->path + name : it->path + '/' + name;
nodes_queue.emplace_back(
child_path, from_zookeeper.asyncGet(child_path), from_zookeeper.asyncGetChildren(child_path),
&(*it));
}
}
for (auto it = nodes_queue.begin(); it != nodes_queue.end(); ++it)
{
if (!it->created && !it->deleted && !it->ephemeral)
{
it->create_future.get();
it->created = true;
std::cerr << it->path << " copied!" << std::endl;
}
}
}
catch (...)
{
std::cerr << DB::getCurrentExceptionMessage(true) << '\n';
throw;
}

View File

@ -192,6 +192,8 @@
rel="external nofollow" target="_blank">Column Store Database Benchmarks</a> by Percona</li>
<li><a href="http://tech.marksblogg.com/billion-nyc-taxi-clickhouse.html"
rel="external nofollow" target="_blank">1.1 Billion Taxi Rides on ClickHouse & an Intel Core i5</a> by Mark Litwintschik</li>
<li><a href="https://tech.marksblogg.com/billion-nyc-taxi-rides-clickhouse-cluster.html"
rel="external nofollow" target="_blank">1.1 Billion Taxi Rides: 108-core ClickHouse Cluster</a> by Mark Litwintschik</li>
<li><a href="https://www.altinity.com/blog/2017/6/20/clickhouse-vs-redshift"
rel="external nofollow" target="_blank">ClickHouse vs Amazon RedShift Benchmark</a> by Altinity</li>
<li><a href="https://carto.com/blog/inside/geospatial-processing-with-clickhouse"