mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 08:40:50 +00:00
Merge branch 'master' of github.com:yandex/ClickHouse
This commit is contained in:
commit
d8b01bec80
@ -1,7 +1,7 @@
|
||||
# Use Ninja instead of Unix Makefiles by default.
|
||||
# https://stackoverflow.com/questions/11269833/cmake-selecting-a-generator-within-cmakelists-txt
|
||||
#
|
||||
# Reason: it have better startup time than make and it parallelize jobs more uniformly.
|
||||
# Reason: it has better startup time than make and it parallelizes jobs more uniformly.
|
||||
# (when comparing to make with Makefiles that was generated by CMake)
|
||||
#
|
||||
# How to install Ninja on Ubuntu:
|
||||
|
@ -2,10 +2,10 @@
|
||||
set(VERSION_REVISION 54413)
|
||||
set(VERSION_MAJOR 19)
|
||||
set(VERSION_MINOR 1)
|
||||
set(VERSION_PATCH 0)
|
||||
set(VERSION_GITHASH 014e344a36bc19a58621e0add379984cf62b9067)
|
||||
set(VERSION_DESCRIBE v19.1.0-testing)
|
||||
set(VERSION_STRING 19.1.0)
|
||||
set(VERSION_PATCH 1)
|
||||
set(VERSION_GITHASH 4e7747117123f5a1b027a64865844b4faa10447d)
|
||||
set(VERSION_DESCRIBE v19.1.1-testing)
|
||||
set(VERSION_STRING 19.1.1)
|
||||
# end of autochange
|
||||
|
||||
set(VERSION_EXTRA "" CACHE STRING "")
|
||||
|
@ -235,6 +235,11 @@ private:
|
||||
actions.clear();
|
||||
actions.emplace_back(PatternActionType::KleeneStar);
|
||||
|
||||
dfa_states.clear();
|
||||
dfa_states.emplace_back(true);
|
||||
|
||||
pattern_has_time = false;
|
||||
|
||||
const char * pos = pattern.data();
|
||||
const char * begin = pos;
|
||||
const char * end = pos + pattern.size();
|
||||
@ -285,6 +290,7 @@ private:
|
||||
actions.back().type != PatternActionType::KleeneStar)
|
||||
throw Exception{"Temporal condition should be preceeded by an event condition", ErrorCodes::BAD_ARGUMENTS};
|
||||
|
||||
pattern_has_time = true;
|
||||
actions.emplace_back(type, duration);
|
||||
}
|
||||
else
|
||||
@ -299,6 +305,9 @@ private:
|
||||
throw Exception{"Event number " + toString(event_number) + " is out of range", ErrorCodes::BAD_ARGUMENTS};
|
||||
|
||||
actions.emplace_back(PatternActionType::SpecificEvent, event_number - 1);
|
||||
dfa_states.back().transition = DFATransition::SpecificEvent;
|
||||
dfa_states.back().event = event_number - 1;
|
||||
dfa_states.emplace_back();
|
||||
}
|
||||
|
||||
if (!match(")"))
|
||||
@ -306,17 +315,88 @@ private:
|
||||
|
||||
}
|
||||
else if (match(".*"))
|
||||
{
|
||||
actions.emplace_back(PatternActionType::KleeneStar);
|
||||
dfa_states.back().has_kleene = true;
|
||||
}
|
||||
else if (match("."))
|
||||
{
|
||||
actions.emplace_back(PatternActionType::AnyEvent);
|
||||
dfa_states.back().transition = DFATransition::AnyEvent;
|
||||
dfa_states.emplace_back();
|
||||
}
|
||||
else
|
||||
throw_exception("Could not parse pattern, unexpected starting symbol");
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
/// Uses a DFA based approach in order to better handle patterns without
|
||||
/// time assertions.
|
||||
///
|
||||
/// NOTE: This implementation relies on the assumption that the pattern are *small*.
|
||||
///
|
||||
/// This algorithm performs in O(mn) (with m the number of DFA states and N the number
|
||||
/// of events) with a memory consumption and memory allocations in O(m). It means that
|
||||
/// if n >>> m (which is expected to be the case), this algorithm can be considered linear.
|
||||
template <typename T>
|
||||
bool match(T & events_it, const T events_end) const
|
||||
bool dfaMatch(T & events_it, const T events_end) const
|
||||
{
|
||||
using ActiveStates = std::vector<bool>;
|
||||
|
||||
/// Those two vectors keep track of which states should be considered for the current
|
||||
/// event as well as the states which should be considered for the next event.
|
||||
ActiveStates active_states(dfa_states.size(), false);
|
||||
ActiveStates next_active_states(dfa_states.size(), false);
|
||||
active_states[0] = true;
|
||||
|
||||
/// Keeps track of dead-ends in order not to iterate over all the events to realize that
|
||||
/// the match failed.
|
||||
size_t n_active = 1;
|
||||
|
||||
for (/* empty */; events_it != events_end && n_active > 0 && !active_states.back(); ++events_it)
|
||||
{
|
||||
n_active = 0;
|
||||
next_active_states.assign(dfa_states.size(), false);
|
||||
|
||||
for (size_t state = 0; state < dfa_states.size(); ++state)
|
||||
{
|
||||
if (!active_states[state])
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (dfa_states[state].transition)
|
||||
{
|
||||
case DFATransition::None:
|
||||
break;
|
||||
case DFATransition::AnyEvent:
|
||||
next_active_states[state + 1] = true;
|
||||
++n_active;
|
||||
break;
|
||||
case DFATransition::SpecificEvent:
|
||||
if (events_it->second.test(dfa_states[state].event))
|
||||
{
|
||||
next_active_states[state + 1] = true;
|
||||
++n_active;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (dfa_states[state].has_kleene)
|
||||
{
|
||||
next_active_states[state] = true;
|
||||
++n_active;
|
||||
}
|
||||
}
|
||||
swap(active_states, next_active_states);
|
||||
}
|
||||
|
||||
return active_states.back();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool backtrackingMatch(T & events_it, const T events_end) const
|
||||
{
|
||||
const auto action_begin = std::begin(actions);
|
||||
const auto action_end = std::end(actions);
|
||||
@ -445,10 +525,53 @@ protected:
|
||||
return action_it == action_end;
|
||||
}
|
||||
|
||||
private:
|
||||
enum class DFATransition : char
|
||||
{
|
||||
/// .-------.
|
||||
/// | |
|
||||
/// `-------'
|
||||
None,
|
||||
/// .-------. (?[0-9])
|
||||
/// | | ----------
|
||||
/// `-------'
|
||||
SpecificEvent,
|
||||
/// .-------. .
|
||||
/// | | ----------
|
||||
/// `-------'
|
||||
AnyEvent,
|
||||
};
|
||||
|
||||
struct DFAState
|
||||
{
|
||||
DFAState(bool has_kleene = false)
|
||||
: has_kleene{has_kleene}, event{0}, transition{DFATransition::None}
|
||||
{}
|
||||
|
||||
/// .-------.
|
||||
/// | | - - -
|
||||
/// `-------'
|
||||
/// |_^
|
||||
bool has_kleene;
|
||||
/// In the case of a state transitions with a `SpecificEvent`,
|
||||
/// `event` contains the value of the event.
|
||||
uint32_t event;
|
||||
/// The kind of transition out of this state.
|
||||
DFATransition transition;
|
||||
};
|
||||
|
||||
using DFAStates = std::vector<DFAState>;
|
||||
|
||||
protected:
|
||||
/// `True` if the parsed pattern contains time assertions (?t...), `false` otherwise.
|
||||
bool pattern_has_time;
|
||||
|
||||
private:
|
||||
std::string pattern;
|
||||
size_t arg_count;
|
||||
PatternActions actions;
|
||||
|
||||
DFAStates dfa_states;
|
||||
};
|
||||
|
||||
|
||||
@ -471,7 +594,8 @@ public:
|
||||
const auto events_end = std::end(data_ref.events_list);
|
||||
auto events_it = events_begin;
|
||||
|
||||
static_cast<ColumnUInt8 &>(to).getData().push_back(match(events_it, events_end));
|
||||
bool match = pattern_has_time ? backtrackingMatch(events_it, events_end) : dfaMatch(events_it, events_end);
|
||||
static_cast<ColumnUInt8 &>(to).getData().push_back(match);
|
||||
}
|
||||
};
|
||||
|
||||
@ -501,7 +625,7 @@ private:
|
||||
auto events_it = events_begin;
|
||||
|
||||
size_t count = 0;
|
||||
while (events_it != events_end && match(events_it, events_end))
|
||||
while (events_it != events_end && backtrackingMatch(events_it, events_end))
|
||||
++count;
|
||||
|
||||
return count;
|
||||
|
@ -1,15 +1,17 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/StringSearcher.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Core/Types.h>
|
||||
#include <Poco/UTF8Encoding.h>
|
||||
#include <Poco/Unicode.h>
|
||||
#include <Common/StringSearcher.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <common/StringRef.h>
|
||||
#include <common/unaligned.h>
|
||||
#include <ext/range.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
|
||||
/** Search for a substring in a string by Volnitsky's algorithm
|
||||
* http://volnitsky.com/project/str_search/
|
||||
@ -28,117 +30,38 @@
|
||||
* - if it did not match, we check the next cell of the hash table from the collision resolution chain;
|
||||
* - if not found, skip to haystack almost the size of the needle bytes;
|
||||
*
|
||||
* Unaligned memory access is used.
|
||||
* MultiVolnitsky - search for multiple substrings in a string:
|
||||
* - Add bigrams to hash table with string index. Then the usual Volnitsky search is used.
|
||||
* - We are adding while searching, limiting the number of fallback searchers and the total number of added bigrams
|
||||
*/
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
||||
/// @todo store lowercase needle to speed up in case there are numerous occurrences of bigrams from needle in haystack
|
||||
template <typename CRTP>
|
||||
class VolnitskyBase
|
||||
namespace VolnitskyTraits
|
||||
{
|
||||
protected:
|
||||
using Offset = UInt8; /// Offset in the needle. For the basic algorithm, the length of the needle must not be greater than 255.
|
||||
using Id = UInt8; /// Index of the string (within the array of multiple needles), must not be greater than 255.
|
||||
using Ngram = UInt16; /// n-gram (2 bytes).
|
||||
|
||||
const UInt8 * const needle;
|
||||
const size_t needle_size;
|
||||
const UInt8 * const needle_end = needle + needle_size;
|
||||
/// For how long we move, if the n-gram from haystack is not found in the hash table.
|
||||
const size_t step = needle_size - sizeof(Ngram) + 1;
|
||||
|
||||
/** max needle length is 255, max distinct ngrams for case-sensitive is (255 - 1), case-insensitive is 4 * (255 - 1)
|
||||
* storage of 64K ngrams (n = 2, 128 KB) should be large enough for both cases */
|
||||
static const size_t hash_size = 64 * 1024; /// Fits into the L2 cache (of common Intel CPUs).
|
||||
Offset hash[hash_size]; /// Hash table.
|
||||
/** Fits into the L2 cache (of common Intel CPUs).
|
||||
* This number is extremely good for compilers as it is numeric_limits<Uint16>::max() and there are optimizations with movzwl and other instructions with 2 bytes
|
||||
*/
|
||||
static constexpr size_t hash_size = 64 * 1024;
|
||||
|
||||
/// min haystack size to use main algorithm instead of fallback
|
||||
static constexpr auto min_haystack_size_for_algorithm = 20000;
|
||||
const bool fallback; /// Do we need to use the fallback algorithm.
|
||||
static constexpr size_t min_haystack_size_for_algorithm = 20000;
|
||||
|
||||
public:
|
||||
/** haystack_size_hint - the expected total size of the haystack for `search` calls. Optional (zero means unspecified).
|
||||
* If you specify it small enough, the fallback algorithm will be used,
|
||||
* since it is considered that it's useless to waste time initializing the hash table.
|
||||
*/
|
||||
VolnitskyBase(const char * const needle, const size_t needle_size, size_t haystack_size_hint = 0)
|
||||
: needle{reinterpret_cast<const UInt8 *>(needle)}, needle_size{needle_size},
|
||||
fallback{
|
||||
needle_size < 2 * sizeof(Ngram)
|
||||
|| needle_size >= std::numeric_limits<Offset>::max()
|
||||
|| (haystack_size_hint && haystack_size_hint < min_haystack_size_for_algorithm)}
|
||||
static inline bool isFallbackNeedle(const size_t needle_size, size_t haystack_size_hint = 0)
|
||||
{
|
||||
if (fallback)
|
||||
return;
|
||||
|
||||
memset(hash, 0, sizeof(hash));
|
||||
|
||||
/// int is used here because unsigned can't be used with condition like `i >= 0`, unsigned always >= 0
|
||||
for (auto i = static_cast<int>(needle_size - sizeof(Ngram)); i >= 0; --i)
|
||||
self().putNGram(this->needle + i, i + 1, this->needle);
|
||||
return needle_size < 2 * sizeof(Ngram) || needle_size >= std::numeric_limits<Offset>::max()
|
||||
|| (haystack_size_hint && haystack_size_hint < min_haystack_size_for_algorithm);
|
||||
}
|
||||
|
||||
static inline Ngram toNGram(const UInt8 * const pos) { return unalignedLoad<Ngram>(pos); }
|
||||
|
||||
/// If not found, the end of the haystack is returned.
|
||||
const UInt8 * search(const UInt8 * const haystack, const size_t haystack_size) const
|
||||
{
|
||||
if (needle_size == 0)
|
||||
return haystack;
|
||||
|
||||
const auto haystack_end = haystack + haystack_size;
|
||||
|
||||
if (needle_size == 1 || fallback || haystack_size <= needle_size)
|
||||
return self().search_fallback(haystack, haystack_end);
|
||||
|
||||
/// Let's "apply" the needle to the haystack and compare the n-gram from the end of the needle.
|
||||
const auto * pos = haystack + needle_size - sizeof(Ngram);
|
||||
for (; pos <= haystack_end - needle_size; pos += step)
|
||||
{
|
||||
/// We look at all the cells of the hash table that can correspond to the n-gram from haystack.
|
||||
for (size_t cell_num = toNGram(pos) % hash_size; hash[cell_num];
|
||||
cell_num = (cell_num + 1) % hash_size)
|
||||
{
|
||||
/// When found - compare bytewise, using the offset from the hash table.
|
||||
const auto res = pos - (hash[cell_num] - 1);
|
||||
|
||||
if (self().compare(res))
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
/// The remaining tail.
|
||||
return self().search_fallback(pos - step + 1, haystack_end);
|
||||
}
|
||||
|
||||
const char * search(const char * haystack, size_t haystack_size) const
|
||||
{
|
||||
return reinterpret_cast<const char *>(search(reinterpret_cast<const UInt8 *>(haystack), haystack_size));
|
||||
}
|
||||
|
||||
protected:
|
||||
CRTP & self() { return static_cast<CRTP &>(*this); }
|
||||
const CRTP & self() const { return const_cast<VolnitskyBase *>(this)->self(); }
|
||||
|
||||
static Ngram toNGram(const UInt8 * const pos)
|
||||
{
|
||||
return unalignedLoad<Ngram>(pos);
|
||||
}
|
||||
|
||||
void putNGramBase(const Ngram ngram, const int offset)
|
||||
{
|
||||
/// Put the offset for the n-gram in the corresponding cell or the nearest free cell.
|
||||
size_t cell_num = ngram % hash_size;
|
||||
|
||||
while (hash[cell_num])
|
||||
cell_num = (cell_num + 1) % hash_size; /// Search for the next free cell.
|
||||
|
||||
hash[cell_num] = offset;
|
||||
}
|
||||
|
||||
void putNGramASCIICaseInsensitive(const UInt8 * const pos, const int offset)
|
||||
template <typename Callback>
|
||||
static inline void putNGramASCIICaseInsensitive(const UInt8 * const pos, const int offset, const Callback & putNGramBase)
|
||||
{
|
||||
struct Chars
|
||||
{
|
||||
@ -186,74 +109,21 @@ protected:
|
||||
/// 1 combination: 01
|
||||
putNGramBase(n, offset);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <bool CaseSensitive, bool ASCII> struct VolnitskyImpl;
|
||||
|
||||
/// Case sensitive comparison
|
||||
template <bool ASCII> struct VolnitskyImpl<true, ASCII> : VolnitskyBase<VolnitskyImpl<true, ASCII>>
|
||||
{
|
||||
VolnitskyImpl(const char * const needle_, const size_t needle_size_, const size_t haystack_size_hint = 0)
|
||||
: VolnitskyBase<VolnitskyImpl<true, ASCII>>{needle_, needle_size_, haystack_size_hint},
|
||||
fallback_searcher{needle_, needle_size_}
|
||||
template <bool CaseSensitive, bool ASCII, typename Callback>
|
||||
static inline void putNGram(const UInt8 * const pos, const int offset, [[maybe_unused]] const UInt8 * const begin, const Callback & putNGramBase)
|
||||
{
|
||||
}
|
||||
|
||||
void putNGram(const UInt8 * const pos, const int offset, const UInt8 * const /*begin*/)
|
||||
if constexpr (CaseSensitive)
|
||||
{
|
||||
this->putNGramBase(this->toNGram(pos), offset);
|
||||
putNGramBase(toNGram(pos), offset);
|
||||
}
|
||||
|
||||
bool compare(const UInt8 * const pos) const
|
||||
else
|
||||
{
|
||||
/// @todo: maybe just use memcmp for this case and rely on internal SSE optimization as in case with memcpy?
|
||||
return fallback_searcher.compare(pos);
|
||||
}
|
||||
|
||||
const UInt8 * search_fallback(const UInt8 * const haystack, const UInt8 * const haystack_end) const
|
||||
if constexpr (ASCII)
|
||||
{
|
||||
return fallback_searcher.search(haystack, haystack_end);
|
||||
putNGramASCIICaseInsensitive(pos, offset, putNGramBase);
|
||||
}
|
||||
|
||||
ASCIICaseSensitiveStringSearcher fallback_searcher;
|
||||
};
|
||||
|
||||
/// Case-insensitive ASCII
|
||||
template <> struct VolnitskyImpl<false, true> : VolnitskyBase<VolnitskyImpl<false, true>>
|
||||
{
|
||||
VolnitskyImpl(const char * const needle_, const size_t needle_size_, const size_t haystack_size_hint = 0)
|
||||
: VolnitskyBase{needle_, needle_size_, haystack_size_hint}, fallback_searcher{needle_, needle_size_}
|
||||
{
|
||||
}
|
||||
|
||||
void putNGram(const UInt8 * const pos, const int offset, const UInt8 * const /*begin*/)
|
||||
{
|
||||
putNGramASCIICaseInsensitive(pos, offset);
|
||||
}
|
||||
|
||||
bool compare(const UInt8 * const pos) const
|
||||
{
|
||||
return fallback_searcher.compare(pos);
|
||||
}
|
||||
|
||||
const UInt8 * search_fallback(const UInt8 * const haystack, const UInt8 * const haystack_end) const
|
||||
{
|
||||
return fallback_searcher.search(haystack, haystack_end);
|
||||
}
|
||||
|
||||
ASCIICaseInsensitiveStringSearcher fallback_searcher;
|
||||
};
|
||||
|
||||
/// Case-sensitive UTF-8
|
||||
template <> struct VolnitskyImpl<false, false> : VolnitskyBase<VolnitskyImpl<false, false>>
|
||||
{
|
||||
VolnitskyImpl(const char * const needle_, const size_t needle_size_, const size_t haystack_size_hint = 0)
|
||||
: VolnitskyBase{needle_, needle_size_, haystack_size_hint}, fallback_searcher{needle_, needle_size_}
|
||||
{
|
||||
}
|
||||
|
||||
void putNGram(const UInt8 * const pos, const int offset, const UInt8 * const begin)
|
||||
else
|
||||
{
|
||||
struct Chars
|
||||
{
|
||||
@ -263,16 +133,14 @@ template <> struct VolnitskyImpl<false, false> : VolnitskyBase<VolnitskyImpl<fal
|
||||
|
||||
union
|
||||
{
|
||||
Ngram n;
|
||||
VolnitskyTraits::Ngram n;
|
||||
Chars chars;
|
||||
};
|
||||
|
||||
n = toNGram(pos);
|
||||
|
||||
if (isascii(chars.c0) && isascii(chars.c1))
|
||||
{
|
||||
putNGramASCIICaseInsensitive(pos, offset);
|
||||
}
|
||||
putNGramASCIICaseInsensitive(pos, offset, putNGramBase);
|
||||
else
|
||||
{
|
||||
/** n-gram (in the case of n = 2)
|
||||
@ -435,25 +303,389 @@ template <> struct VolnitskyImpl<false, false> : VolnitskyBase<VolnitskyImpl<fal
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool compare(const UInt8 * const pos) const
|
||||
|
||||
/// @todo store lowercase needle to speed up in case there are numerous occurrences of bigrams from needle in haystack
|
||||
template <bool CaseSensitive, bool ASCII, typename FallbackSearcher>
|
||||
class VolnitskyBase
|
||||
{
|
||||
protected:
|
||||
const UInt8 * const needle;
|
||||
const size_t needle_size;
|
||||
const UInt8 * const needle_end = needle + needle_size;
|
||||
/// For how long we move, if the n-gram from haystack is not found in the hash table.
|
||||
const size_t step = needle_size - sizeof(VolnitskyTraits::Ngram) + 1;
|
||||
|
||||
/** max needle length is 255, max distinct ngrams for case-sensitive is (255 - 1), case-insensitive is 4 * (255 - 1)
|
||||
* storage of 64K ngrams (n = 2, 128 KB) should be large enough for both cases */
|
||||
VolnitskyTraits::Offset hash[VolnitskyTraits::hash_size]; /// Hash table.
|
||||
|
||||
const bool fallback; /// Do we need to use the fallback algorithm.
|
||||
|
||||
FallbackSearcher fallback_searcher;
|
||||
|
||||
public:
|
||||
/** haystack_size_hint - the expected total size of the haystack for `search` calls. Optional (zero means unspecified).
|
||||
* If you specify it small enough, the fallback algorithm will be used,
|
||||
* since it is considered that it's useless to waste time initializing the hash table.
|
||||
*/
|
||||
VolnitskyBase(const char * const needle, const size_t needle_size, size_t haystack_size_hint = 0)
|
||||
: needle{reinterpret_cast<const UInt8 *>(needle)}
|
||||
, needle_size{needle_size}
|
||||
, fallback{VolnitskyTraits::isFallbackNeedle(needle_size, haystack_size_hint)}
|
||||
, fallback_searcher{needle, needle_size}
|
||||
{
|
||||
return fallback_searcher.compare(pos);
|
||||
if (fallback)
|
||||
return;
|
||||
|
||||
memset(hash, 0, sizeof(hash));
|
||||
|
||||
auto callback = [this](const VolnitskyTraits::Ngram ngram, const int offset) { return this->putNGramBase(ngram, offset); };
|
||||
/// ssize_t is used here because unsigned can't be used with condition like `i >= 0`, unsigned always >= 0
|
||||
for (auto i = static_cast<ssize_t>(needle_size - sizeof(VolnitskyTraits::Ngram)); i >= 0; --i)
|
||||
VolnitskyTraits::putNGram<CaseSensitive, ASCII>(this->needle + i, i + 1, this->needle, callback);
|
||||
}
|
||||
|
||||
const UInt8 * search_fallback(const UInt8 * const haystack, const UInt8 * const haystack_end) const
|
||||
|
||||
/// If not found, the end of the haystack is returned.
|
||||
const UInt8 * search(const UInt8 * const haystack, const size_t haystack_size) const
|
||||
{
|
||||
if (needle_size == 0)
|
||||
return haystack;
|
||||
|
||||
const auto haystack_end = haystack + haystack_size;
|
||||
|
||||
if (fallback || haystack_size <= needle_size)
|
||||
return fallback_searcher.search(haystack, haystack_end);
|
||||
|
||||
/// Let's "apply" the needle to the haystack and compare the n-gram from the end of the needle.
|
||||
const auto * pos = haystack + needle_size - sizeof(VolnitskyTraits::Ngram);
|
||||
for (; pos <= haystack_end - needle_size; pos += step)
|
||||
{
|
||||
/// We look at all the cells of the hash table that can correspond to the n-gram from haystack.
|
||||
for (size_t cell_num = VolnitskyTraits::toNGram(pos) % VolnitskyTraits::hash_size; hash[cell_num];
|
||||
cell_num = (cell_num + 1) % VolnitskyTraits::hash_size)
|
||||
{
|
||||
/// When found - compare bytewise, using the offset from the hash table.
|
||||
const auto res = pos - (hash[cell_num] - 1);
|
||||
|
||||
/// pointer in the code is always padded array so we can use pagesafe semantics
|
||||
if (fallback_searcher.compare(res))
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
UTF8CaseInsensitiveStringSearcher fallback_searcher;
|
||||
return fallback_searcher.search(pos - step + 1, haystack_end);
|
||||
}
|
||||
|
||||
const char * search(const char * haystack, size_t haystack_size) const
|
||||
{
|
||||
return reinterpret_cast<const char *>(search(reinterpret_cast<const UInt8 *>(haystack), haystack_size));
|
||||
}
|
||||
|
||||
protected:
|
||||
void putNGramBase(const VolnitskyTraits::Ngram ngram, const int offset)
|
||||
{
|
||||
/// Put the offset for the n-gram in the corresponding cell or the nearest free cell.
|
||||
size_t cell_num = ngram % VolnitskyTraits::hash_size;
|
||||
|
||||
while (hash[cell_num])
|
||||
cell_num = (cell_num + 1) % VolnitskyTraits::hash_size; /// Search for the next free cell.
|
||||
|
||||
hash[cell_num] = offset;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
using Volnitsky = VolnitskyImpl<true, true>;
|
||||
using VolnitskyUTF8 = VolnitskyImpl<true, false>; /// exactly same as Volnitsky
|
||||
using VolnitskyCaseInsensitive = VolnitskyImpl<false, true>; /// ignores non-ASCII bytes
|
||||
using VolnitskyCaseInsensitiveUTF8 = VolnitskyImpl<false, false>;
|
||||
template <bool CaseSensitive, bool ASCII, typename FallbackSearcher>
|
||||
class MultiVolnitskyBase
|
||||
{
|
||||
private:
|
||||
/// needles and their offsets
|
||||
const std::vector<StringRef> & needles;
|
||||
|
||||
|
||||
/// fallback searchers
|
||||
std::vector<size_t> fallback_needles;
|
||||
std::vector<FallbackSearcher> fallback_searchers;
|
||||
|
||||
/// because std::pair<> is not POD
|
||||
struct OffsetId
|
||||
{
|
||||
VolnitskyTraits::Id id;
|
||||
VolnitskyTraits::Offset off;
|
||||
};
|
||||
|
||||
OffsetId hash[VolnitskyTraits::hash_size];
|
||||
|
||||
/// step for each bunch of strings
|
||||
size_t step;
|
||||
|
||||
/// last index of offsets that was not processed
|
||||
size_t last;
|
||||
|
||||
/// limit for adding to hashtable. In worst case with case insentive search, the table will be filled at most as half
|
||||
static constexpr size_t small_limit = VolnitskyTraits::hash_size / 8;
|
||||
|
||||
public:
|
||||
MultiVolnitskyBase(const std::vector<StringRef> & needles_) : needles{needles_}, step{0}, last{0}
|
||||
{
|
||||
fallback_searchers.reserve(needles.size());
|
||||
}
|
||||
|
||||
template <typename ResultType, typename AnsCallback>
|
||||
void searchAll(
|
||||
const ColumnString::Chars & haystack_data,
|
||||
const ColumnString::Offsets & haystack_offsets,
|
||||
const AnsCallback & ansCallback,
|
||||
ResultType & ans)
|
||||
{
|
||||
const size_t haystack_string_size = haystack_offsets.size();
|
||||
const size_t needles_size = needles.size();
|
||||
|
||||
/// something can be uninitialized after
|
||||
std::fill(ans.begin(), ans.end(), 0);
|
||||
|
||||
while (!reset())
|
||||
{
|
||||
size_t fallback_size = fallback_needles.size();
|
||||
size_t prev_offset = 0;
|
||||
for (size_t j = 0, from = 0; j < haystack_string_size; ++j, from += needles_size)
|
||||
{
|
||||
const auto * haystack = &haystack_data[prev_offset];
|
||||
const auto * haystack_end = haystack + haystack_offsets[j] - prev_offset - 1;
|
||||
for (size_t i = 0; i < fallback_size; ++i)
|
||||
{
|
||||
const UInt8 * ptr = fallback_searchers[fallback_needles[i]].search(haystack, haystack_end);
|
||||
if (ptr != haystack_end)
|
||||
ans[from + fallback_needles[i]] = ansCallback(haystack, ptr);
|
||||
}
|
||||
|
||||
/// check if we have one non empty volnitsky searcher
|
||||
if (step != std::numeric_limits<size_t>::max())
|
||||
{
|
||||
const auto * pos = haystack + step - sizeof(VolnitskyTraits::Ngram);
|
||||
for (; pos <= haystack_end - sizeof(VolnitskyTraits::Ngram); pos += step)
|
||||
{
|
||||
for (size_t cell_num = VolnitskyTraits::toNGram(pos) % VolnitskyTraits::hash_size; hash[cell_num].off;
|
||||
cell_num = (cell_num + 1) % VolnitskyTraits::hash_size)
|
||||
{
|
||||
if (pos >= haystack + hash[cell_num].off - 1)
|
||||
{
|
||||
const auto * res = pos - (hash[cell_num].off - 1);
|
||||
const size_t ind = hash[cell_num].id;
|
||||
if (ans[from + ind] == 0 && res + needles[ind].size <= haystack_end)
|
||||
{
|
||||
if (fallback_searchers[ind].compare(res))
|
||||
{
|
||||
ans[from + ind] = ansCallback(haystack, res);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
prev_offset = haystack_offsets[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ResultType>
|
||||
void search(const ColumnString::Chars & haystack_data, const ColumnString::Offsets & haystack_offsets, ResultType & ans)
|
||||
{
|
||||
auto callback = [this](const UInt8 * haystack, const UInt8 * haystack_end) -> bool
|
||||
{
|
||||
return this->searchOne(haystack, haystack_end);
|
||||
};
|
||||
searchInternal(haystack_data, haystack_offsets, callback, ans);
|
||||
}
|
||||
|
||||
template <typename ResultType>
|
||||
void searchIndex(const ColumnString::Chars & haystack_data, const ColumnString::Offsets & haystack_offsets, ResultType & ans)
|
||||
{
|
||||
auto callback = [this](const UInt8 * haystack, const UInt8 * haystack_end) -> size_t
|
||||
{
|
||||
return this->searchOneIndex(haystack, haystack_end);
|
||||
};
|
||||
searchInternal(haystack_data, haystack_offsets, callback, ans);
|
||||
}
|
||||
|
||||
private:
|
||||
/**
|
||||
* This function is needed to initialize hash table
|
||||
* Returns `true` if there is nothing to initialize
|
||||
* and `false` if we have something to initialize and initializes it.
|
||||
* This function is a kind of fallback if there are many needles.
|
||||
* We actually destroy the hash table and initialize it with uninitialized needles
|
||||
* and search through the haystack again.
|
||||
* The actual usage of this function is like this:
|
||||
* while (!reset())
|
||||
* {
|
||||
* search inside the haystack with the known needles
|
||||
* }
|
||||
*/
|
||||
bool reset()
|
||||
{
|
||||
if (last == needles.size())
|
||||
return true;
|
||||
|
||||
memset(hash, 0, sizeof(hash));
|
||||
fallback_needles.clear();
|
||||
step = std::numeric_limits<size_t>::max();
|
||||
|
||||
size_t buf = 0;
|
||||
size_t size = needles.size();
|
||||
|
||||
for (; last < size; ++last)
|
||||
{
|
||||
const char * cur_needle_data = needles[last].data;
|
||||
const size_t cur_needle_size = needles[last].size;
|
||||
|
||||
/// save the indices of fallback searchers
|
||||
if (VolnitskyTraits::isFallbackNeedle(cur_needle_size))
|
||||
{
|
||||
fallback_needles.push_back(last);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// put all bigrams
|
||||
auto callback = [this](const VolnitskyTraits::Ngram ngram, const int offset)
|
||||
{
|
||||
return this->putNGramBase(ngram, offset, this->last);
|
||||
};
|
||||
|
||||
buf += cur_needle_size - sizeof(VolnitskyTraits::Ngram) + 1;
|
||||
|
||||
/// this is the condition when we actually need to stop and start searching with known needles
|
||||
if (buf > small_limit)
|
||||
break;
|
||||
|
||||
step = std::min(step, cur_needle_size - sizeof(VolnitskyTraits::Ngram) + 1);
|
||||
for (auto i = static_cast<int>(cur_needle_size - sizeof(VolnitskyTraits::Ngram)); i >= 0; --i)
|
||||
{
|
||||
VolnitskyTraits::putNGram<CaseSensitive, ASCII>(
|
||||
reinterpret_cast<const UInt8 *>(cur_needle_data) + i,
|
||||
i + 1,
|
||||
reinterpret_cast<const UInt8 *>(cur_needle_data),
|
||||
callback);
|
||||
}
|
||||
}
|
||||
fallback_searchers.emplace_back(cur_needle_data, cur_needle_size);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename OneSearcher, typename ResultType>
|
||||
inline void searchInternal(
|
||||
const ColumnString::Chars & haystack_data,
|
||||
const ColumnString::Offsets & haystack_offsets,
|
||||
const OneSearcher & searchFallback,
|
||||
ResultType & ans)
|
||||
{
|
||||
const size_t haystack_string_size = haystack_offsets.size();
|
||||
while (!reset())
|
||||
{
|
||||
size_t prev_offset = 0;
|
||||
for (size_t j = 0; j < haystack_string_size; ++j)
|
||||
{
|
||||
const auto * haystack = &haystack_data[prev_offset];
|
||||
const auto * haystack_end = haystack + haystack_offsets[j] - prev_offset - 1;
|
||||
ans[j] = searchFallback(haystack, haystack_end);
|
||||
prev_offset = haystack_offsets[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline bool searchOne(const UInt8 * haystack, const UInt8 * haystack_end) const
|
||||
{
|
||||
const size_t fallback_size = fallback_needles.size();
|
||||
for (size_t i = 0; i < fallback_size; ++i)
|
||||
if (fallback_searchers[fallback_needles[i]].search(haystack, haystack_end) != haystack_end)
|
||||
return true;
|
||||
|
||||
/// check if we have one non empty volnitsky searcher
|
||||
if (step != std::numeric_limits<size_t>::max())
|
||||
{
|
||||
const auto * pos = haystack + step - sizeof(VolnitskyTraits::Ngram);
|
||||
for (; pos <= haystack_end - sizeof(VolnitskyTraits::Ngram); pos += step)
|
||||
{
|
||||
for (size_t cell_num = VolnitskyTraits::toNGram(pos) % VolnitskyTraits::hash_size; hash[cell_num].off;
|
||||
cell_num = (cell_num + 1) % VolnitskyTraits::hash_size)
|
||||
{
|
||||
if (pos >= haystack + hash[cell_num].off - 1)
|
||||
{
|
||||
const auto res = pos - (hash[cell_num].off - 1);
|
||||
const size_t ind = hash[cell_num].id;
|
||||
if (res + needles[ind].size <= haystack_end && fallback_searchers[ind].compare(res))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
inline size_t searchOneIndex(const UInt8 * haystack, const UInt8 * haystack_end) const
|
||||
{
|
||||
const size_t fallback_size = fallback_needles.size();
|
||||
|
||||
size_t ans = std::numeric_limits<size_t>::max();
|
||||
|
||||
for (size_t i = 0; i < fallback_size; ++i)
|
||||
if (fallback_searchers[fallback_needles[i]].search(haystack, haystack_end) != haystack_end)
|
||||
ans = std::min(ans, fallback_needles[i]);
|
||||
|
||||
/// check if we have one non empty volnitsky searcher
|
||||
if (step != std::numeric_limits<size_t>::max())
|
||||
{
|
||||
const auto * pos = haystack + step - sizeof(VolnitskyTraits::Ngram);
|
||||
for (; pos <= haystack_end - sizeof(VolnitskyTraits::Ngram); pos += step)
|
||||
{
|
||||
for (size_t cell_num = VolnitskyTraits::toNGram(pos) % VolnitskyTraits::hash_size; hash[cell_num].off;
|
||||
cell_num = (cell_num + 1) % VolnitskyTraits::hash_size)
|
||||
{
|
||||
if (pos >= haystack + hash[cell_num].off - 1)
|
||||
{
|
||||
const auto res = pos - (hash[cell_num].off - 1);
|
||||
const size_t ind = hash[cell_num].id;
|
||||
if (res + needles[ind].size <= haystack_end && fallback_searchers[ind].compare(res))
|
||||
ans = std::min(ans, ind);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* if nothing was found, ans + 1 will be equal to zero and we can
|
||||
* assign it into the result because we need to return the position starting with one
|
||||
*/
|
||||
return ans + 1;
|
||||
}
|
||||
|
||||
void putNGramBase(const VolnitskyTraits::Ngram ngram, const int offset, const size_t num)
|
||||
{
|
||||
size_t cell_num = ngram % VolnitskyTraits::hash_size;
|
||||
|
||||
while (hash[cell_num].off)
|
||||
cell_num = (cell_num + 1) % VolnitskyTraits::hash_size;
|
||||
|
||||
hash[cell_num] = {static_cast<VolnitskyTraits::Id>(num), static_cast<VolnitskyTraits::Offset>(offset)};
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
using Volnitsky = VolnitskyBase<true, true, ASCIICaseSensitiveStringSearcher>;
|
||||
using VolnitskyUTF8 = VolnitskyBase<true, false, ASCIICaseSensitiveStringSearcher>; /// exactly same as Volnitsky
|
||||
using VolnitskyCaseInsensitive = VolnitskyBase<false, true, ASCIICaseInsensitiveStringSearcher>; /// ignores non-ASCII bytes
|
||||
using VolnitskyCaseInsensitiveUTF8 = VolnitskyBase<false, false, UTF8CaseInsensitiveStringSearcher>;
|
||||
|
||||
using MultiVolnitsky = MultiVolnitskyBase<true, true, ASCIICaseSensitiveStringSearcher>;
|
||||
using MultiVolnitskyUTF8 = MultiVolnitskyBase<true, false, ASCIICaseSensitiveStringSearcher>;
|
||||
using MultiVolnitskyCaseInsensitive = MultiVolnitskyBase<false, true, ASCIICaseInsensitiveStringSearcher>;
|
||||
using MultiVolnitskyCaseInsensitiveUTF8 = MultiVolnitskyBase<false, false, UTF8CaseInsensitiveStringSearcher>;
|
||||
|
||||
|
||||
}
|
||||
|
@ -61,6 +61,7 @@ struct Request
|
||||
{
|
||||
Request() = default;
|
||||
Request(const Request &) = default;
|
||||
Request & operator=(const Request &) = default;
|
||||
virtual ~Request() = default;
|
||||
virtual String getPath() const = 0;
|
||||
virtual void addRootPath(const String & /* root_path */) {}
|
||||
@ -76,6 +77,7 @@ struct Response
|
||||
int32_t error = 0;
|
||||
Response() = default;
|
||||
Response(const Response &) = default;
|
||||
Response & operator=(const Response &) = default;
|
||||
virtual ~Response() = default;
|
||||
virtual void removeRootPath(const String & /* root_path */) {}
|
||||
};
|
||||
|
@ -23,6 +23,7 @@ namespace ErrorCodes
|
||||
extern const int BAD_TYPE_OF_FIELD;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int THERE_IS_NO_COLUMN;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
static String getSchemaPath(const String & schema_dir, const String & schema_file)
|
||||
@ -39,7 +40,7 @@ CapnProtoRowInputStream::NestedField split(const Block & header, size_t i)
|
||||
if (name.size() > 0 && name[0] == '.')
|
||||
name.erase(0, 1);
|
||||
|
||||
boost::split(field.tokens, name, boost::is_any_of("."));
|
||||
boost::split(field.tokens, name, boost::is_any_of("._"));
|
||||
return field;
|
||||
}
|
||||
|
||||
@ -109,44 +110,62 @@ capnp::StructSchema::Field getFieldOrThrow(capnp::StructSchema node, const std::
|
||||
throw Exception("Field " + field + " doesn't exist in schema " + node.getShortDisplayName().cStr(), ErrorCodes::THERE_IS_NO_COLUMN);
|
||||
}
|
||||
|
||||
void CapnProtoRowInputStream::createActions(const NestedFieldList & sortedFields, capnp::StructSchema reader)
|
||||
{
|
||||
String last;
|
||||
size_t level = 0;
|
||||
capnp::StructSchema::Field parent;
|
||||
|
||||
for (const auto & field : sortedFields)
|
||||
void CapnProtoRowInputStream::createActions(const NestedFieldList & sorted_fields, capnp::StructSchema reader)
|
||||
{
|
||||
/// Columns in a table can map to fields in Cap'n'Proto or to structs.
|
||||
|
||||
/// Store common parents and their tokens in order to backtrack.
|
||||
std::vector<capnp::StructSchema::Field> parents;
|
||||
std::vector<std::string> parent_tokens;
|
||||
|
||||
capnp::StructSchema cur_reader = reader;
|
||||
|
||||
for (const auto & field : sorted_fields)
|
||||
{
|
||||
// Move to a different field in the same structure, keep parent
|
||||
if (level > 0 && field.tokens[level - 1] != last)
|
||||
if (field.tokens.empty())
|
||||
throw Exception("Logical error in CapnProtoRowInputStream", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
// Backtrack to common parent
|
||||
while (field.tokens.size() < parent_tokens.size() + 1
|
||||
|| !std::equal(parent_tokens.begin(), parent_tokens.end(), field.tokens.begin()))
|
||||
{
|
||||
auto child = getFieldOrThrow(parent.getContainingStruct(), field.tokens[level - 1]);
|
||||
reader = child.getType().asStruct();
|
||||
actions.push_back({Action::POP});
|
||||
actions.push_back({Action::PUSH, child});
|
||||
}
|
||||
// Descend to a nested structure
|
||||
for (; level < field.tokens.size() - 1; ++level)
|
||||
parents.pop_back();
|
||||
parent_tokens.pop_back();
|
||||
|
||||
if (parents.empty())
|
||||
{
|
||||
auto node = getFieldOrThrow(reader, field.tokens[level]);
|
||||
cur_reader = reader;
|
||||
break;
|
||||
}
|
||||
else
|
||||
cur_reader = parents.back().getType().asStruct();
|
||||
}
|
||||
|
||||
// Go forward
|
||||
while (parent_tokens.size() + 1 < field.tokens.size())
|
||||
{
|
||||
const auto & token = field.tokens[parents.size()];
|
||||
auto node = getFieldOrThrow(cur_reader, token);
|
||||
if (node.getType().isStruct())
|
||||
{
|
||||
// Descend to field structure
|
||||
last = field.tokens[level];
|
||||
parent = node;
|
||||
reader = parent.getType().asStruct();
|
||||
actions.push_back({Action::PUSH, parent});
|
||||
parents.emplace_back(node);
|
||||
parent_tokens.emplace_back(token);
|
||||
cur_reader = node.getType().asStruct();
|
||||
actions.push_back({Action::PUSH, node});
|
||||
}
|
||||
else if (node.getType().isList())
|
||||
{
|
||||
break; // Collect list
|
||||
}
|
||||
else
|
||||
throw Exception("Field " + field.tokens[level] + "is neither Struct nor List", ErrorCodes::BAD_TYPE_OF_FIELD);
|
||||
throw Exception("Field " + token + " is neither Struct nor List", ErrorCodes::BAD_TYPE_OF_FIELD);
|
||||
}
|
||||
|
||||
// Read field from the structure
|
||||
auto node = getFieldOrThrow(reader, field.tokens[level]);
|
||||
auto node = getFieldOrThrow(cur_reader, field.tokens[parents.size()]);
|
||||
if (node.getType().isList() && actions.size() > 0 && actions.back().field == node)
|
||||
{
|
||||
// The field list here flattens Nested elements into multiple arrays
|
||||
@ -168,7 +187,6 @@ void CapnProtoRowInputStream::createActions(const NestedFieldList & sortedFields
|
||||
CapnProtoRowInputStream::CapnProtoRowInputStream(ReadBuffer & istr_, const Block & header_, const String & schema_dir, const String & schema_file, const String & root_object)
|
||||
: istr(istr_), header(header_), parser(std::make_shared<SchemaParser>())
|
||||
{
|
||||
|
||||
// Parse the schema and fetch the root object
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
@ -188,14 +206,8 @@ CapnProtoRowInputStream::CapnProtoRowInputStream(ReadBuffer & istr_, const Block
|
||||
for (size_t i = 0; i < num_columns; ++i)
|
||||
list.push_back(split(header, i));
|
||||
|
||||
// Reorder list to make sure we don't have to backtrack
|
||||
std::sort(list.begin(), list.end(), [](const NestedField & a, const NestedField & b)
|
||||
{
|
||||
if (a.tokens.size() == b.tokens.size())
|
||||
return a.tokens < b.tokens;
|
||||
return a.tokens.size() < b.tokens.size();
|
||||
});
|
||||
|
||||
// Order list first by value of strings then by length of string vector.
|
||||
std::sort(list.begin(), list.end(), [](const NestedField & a, const NestedField & b) { return a.tokens < b.tokens; });
|
||||
createActions(list, root);
|
||||
}
|
||||
|
||||
|
@ -1,28 +1,28 @@
|
||||
#include <Functions/FunctionsStringSearch.h>
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <Poco/UTF8String.h>
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
#include <Common/Volnitsky.h>
|
||||
#include <Common/config.h>
|
||||
|
||||
#include <DataTypes/DataTypeFixedString.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/Regexps.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Common/config.h>
|
||||
#include <re2/re2.h>
|
||||
#include <re2/stringpiece.h>
|
||||
#include <Poco/UTF8String.h>
|
||||
#include <Common/Volnitsky.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
|
||||
#if USE_RE2_ST
|
||||
#include <re2_st/re2.h> // Y_IGNORE
|
||||
# include <re2_st/re2.h> // Y_IGNORE
|
||||
#else
|
||||
#define re2_st re2
|
||||
# define re2_st re2
|
||||
#endif
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
@ -35,7 +35,10 @@ namespace ErrorCodes
|
||||
struct PositionCaseSensitiveASCII
|
||||
{
|
||||
/// For searching single substring inside big-enough contiguous chunk of data. Coluld have slightly expensive initialization.
|
||||
using SearcherInBigHaystack = VolnitskyImpl<true, true>;
|
||||
using SearcherInBigHaystack = Volnitsky;
|
||||
|
||||
/// For search many substrings in one string
|
||||
using MultiSearcherInBigHaystack = MultiVolnitsky;
|
||||
|
||||
/// For searching single substring, that is different each time. This object is created for each row of data. It must have cheap initialization.
|
||||
using SearcherInSmallHaystack = LibCASCIICaseSensitiveStringSearcher;
|
||||
@ -50,23 +53,24 @@ struct PositionCaseSensitiveASCII
|
||||
return SearcherInSmallHaystack(needle_data, needle_size);
|
||||
}
|
||||
|
||||
/// Number of code points between 'begin' and 'end' (this has different behaviour for ASCII and UTF-8).
|
||||
static size_t countChars(const char * begin, const char * end)
|
||||
static MultiSearcherInBigHaystack createMultiSearcherInBigHaystack(const std::vector<StringRef> & needles)
|
||||
{
|
||||
return end - begin;
|
||||
return MultiSearcherInBigHaystack(needles);
|
||||
}
|
||||
|
||||
/// Number of code points between 'begin' and 'end' (this has different behaviour for ASCII and UTF-8).
|
||||
static size_t countChars(const char * begin, const char * end) { return end - begin; }
|
||||
|
||||
/// Convert string to lowercase. Only for case-insensitive search.
|
||||
/// Implementation is permitted to be inefficient because it is called for single string.
|
||||
static void toLowerIfNeed(std::string &)
|
||||
{
|
||||
}
|
||||
static void toLowerIfNeed(std::string &) {}
|
||||
};
|
||||
|
||||
struct PositionCaseInsensitiveASCII
|
||||
{
|
||||
/// `Volnitsky` is not used here, because one person has measured that this is better. It will be good if you question it.
|
||||
using SearcherInBigHaystack = ASCIICaseInsensitiveStringSearcher;
|
||||
using MultiSearcherInBigHaystack = MultiVolnitskyCaseInsensitive;
|
||||
using SearcherInSmallHaystack = LibCASCIICaseInsensitiveStringSearcher;
|
||||
|
||||
static SearcherInBigHaystack createSearcherInBigHaystack(const char * needle_data, size_t needle_size, size_t /*haystack_size_hint*/)
|
||||
@ -79,20 +83,20 @@ struct PositionCaseInsensitiveASCII
|
||||
return SearcherInSmallHaystack(needle_data, needle_size);
|
||||
}
|
||||
|
||||
static size_t countChars(const char * begin, const char * end)
|
||||
static MultiSearcherInBigHaystack createMultiSearcherInBigHaystack(const std::vector<StringRef> & needles)
|
||||
{
|
||||
return end - begin;
|
||||
return MultiSearcherInBigHaystack(needles);
|
||||
}
|
||||
|
||||
static void toLowerIfNeed(std::string & s)
|
||||
{
|
||||
std::transform(std::begin(s), std::end(s), std::begin(s), tolower);
|
||||
}
|
||||
static size_t countChars(const char * begin, const char * end) { return end - begin; }
|
||||
|
||||
static void toLowerIfNeed(std::string & s) { std::transform(std::begin(s), std::end(s), std::begin(s), tolower); }
|
||||
};
|
||||
|
||||
struct PositionCaseSensitiveUTF8
|
||||
{
|
||||
using SearcherInBigHaystack = VolnitskyImpl<true, false>;
|
||||
using SearcherInBigHaystack = VolnitskyUTF8;
|
||||
using MultiSearcherInBigHaystack = MultiVolnitskyUTF8;
|
||||
using SearcherInSmallHaystack = LibCASCIICaseSensitiveStringSearcher;
|
||||
|
||||
static SearcherInBigHaystack createSearcherInBigHaystack(const char * needle_data, size_t needle_size, size_t haystack_size_hint)
|
||||
@ -105,6 +109,11 @@ struct PositionCaseSensitiveUTF8
|
||||
return SearcherInSmallHaystack(needle_data, needle_size);
|
||||
}
|
||||
|
||||
static MultiSearcherInBigHaystack createMultiSearcherInBigHaystack(const std::vector<StringRef> & needles)
|
||||
{
|
||||
return MultiSearcherInBigHaystack(needles);
|
||||
}
|
||||
|
||||
static size_t countChars(const char * begin, const char * end)
|
||||
{
|
||||
size_t res = 0;
|
||||
@ -114,14 +123,13 @@ struct PositionCaseSensitiveUTF8
|
||||
return res;
|
||||
}
|
||||
|
||||
static void toLowerIfNeed(std::string &)
|
||||
{
|
||||
}
|
||||
static void toLowerIfNeed(std::string &) {}
|
||||
};
|
||||
|
||||
struct PositionCaseInsensitiveUTF8
|
||||
{
|
||||
using SearcherInBigHaystack = VolnitskyImpl<false, false>;
|
||||
using SearcherInBigHaystack = VolnitskyCaseInsensitiveUTF8;
|
||||
using MultiSearcherInBigHaystack = MultiVolnitskyCaseInsensitiveUTF8;
|
||||
using SearcherInSmallHaystack = UTF8CaseInsensitiveStringSearcher; /// TODO Very suboptimal.
|
||||
|
||||
static SearcherInBigHaystack createSearcherInBigHaystack(const char * needle_data, size_t needle_size, size_t haystack_size_hint)
|
||||
@ -134,6 +142,11 @@ struct PositionCaseInsensitiveUTF8
|
||||
return SearcherInSmallHaystack(needle_data, needle_size);
|
||||
}
|
||||
|
||||
static MultiSearcherInBigHaystack createMultiSearcherInBigHaystack(const std::vector<StringRef> & needles)
|
||||
{
|
||||
return MultiSearcherInBigHaystack(needles);
|
||||
}
|
||||
|
||||
static size_t countChars(const char * begin, const char * end)
|
||||
{
|
||||
size_t res = 0;
|
||||
@ -143,10 +156,7 @@ struct PositionCaseInsensitiveUTF8
|
||||
return res;
|
||||
}
|
||||
|
||||
static void toLowerIfNeed(std::string & s)
|
||||
{
|
||||
Poco::UTF8::toLowerInPlace(s);
|
||||
}
|
||||
static void toLowerIfNeed(std::string & s) { Poco::UTF8::toLowerInPlace(s); }
|
||||
};
|
||||
|
||||
template <typename Impl>
|
||||
@ -155,10 +165,8 @@ struct PositionImpl
|
||||
using ResultType = UInt64;
|
||||
|
||||
/// Find one substring in many strings.
|
||||
static void vector_constant(const ColumnString::Chars & data,
|
||||
const ColumnString::Offsets & offsets,
|
||||
const std::string & needle,
|
||||
PaddedPODArray<UInt64> & res)
|
||||
static void vector_constant(
|
||||
const ColumnString::Chars & data, const ColumnString::Offsets & offsets, const std::string & needle, PaddedPODArray<UInt64> & res)
|
||||
{
|
||||
const UInt8 * begin = data.data();
|
||||
const UInt8 * pos = begin;
|
||||
@ -210,7 +218,8 @@ struct PositionImpl
|
||||
}
|
||||
|
||||
/// Search each time for a different single substring inside each time different string.
|
||||
static void vector_vector(const ColumnString::Chars & haystack_data,
|
||||
static void vector_vector(
|
||||
const ColumnString::Chars & haystack_data,
|
||||
const ColumnString::Offsets & haystack_offsets,
|
||||
const ColumnString::Chars & needle_data,
|
||||
const ColumnString::Offsets & needle_offsets,
|
||||
@ -234,8 +243,8 @@ struct PositionImpl
|
||||
else
|
||||
{
|
||||
/// It is assumed that the StringSearcher is not very difficult to initialize.
|
||||
typename Impl::SearcherInSmallHaystack searcher
|
||||
= Impl::createSearcherInSmallHaystack(reinterpret_cast<const char *>(&needle_data[prev_needle_offset]),
|
||||
typename Impl::SearcherInSmallHaystack searcher = Impl::createSearcherInSmallHaystack(
|
||||
reinterpret_cast<const char *>(&needle_data[prev_needle_offset]),
|
||||
needle_offsets[i] - prev_needle_offset - 1); /// zero byte at the end
|
||||
|
||||
/// searcher returns a pointer to the found substring or to the end of `haystack`.
|
||||
@ -244,7 +253,9 @@ struct PositionImpl
|
||||
|
||||
if (pos != haystack_size)
|
||||
{
|
||||
res[i] = 1 + Impl::countChars(reinterpret_cast<const char *>(&haystack_data[prev_haystack_offset]),
|
||||
res[i] = 1
|
||||
+ Impl::countChars(
|
||||
reinterpret_cast<const char *>(&haystack_data[prev_haystack_offset]),
|
||||
reinterpret_cast<const char *>(&haystack_data[prev_haystack_offset + pos]));
|
||||
}
|
||||
else
|
||||
@ -256,8 +267,9 @@ struct PositionImpl
|
||||
}
|
||||
}
|
||||
|
||||
/// Find many substrings in one line.
|
||||
static void constant_vector(const String & haystack,
|
||||
/// Find many substrings in single string.
|
||||
static void constant_vector(
|
||||
const String & haystack,
|
||||
const ColumnString::Chars & needle_data,
|
||||
const ColumnString::Offsets & needle_offsets,
|
||||
PaddedPODArray<UInt64> & res)
|
||||
@ -281,7 +293,8 @@ struct PositionImpl
|
||||
typename Impl::SearcherInSmallHaystack searcher = Impl::createSearcherInSmallHaystack(
|
||||
reinterpret_cast<const char *>(&needle_data[prev_needle_offset]), needle_offsets[i] - prev_needle_offset - 1);
|
||||
|
||||
size_t pos = searcher.search(reinterpret_cast<const UInt8 *>(haystack.data()),
|
||||
size_t pos = searcher.search(
|
||||
reinterpret_cast<const UInt8 *>(haystack.data()),
|
||||
reinterpret_cast<const UInt8 *>(haystack.data()) + haystack.size())
|
||||
- reinterpret_cast<const UInt8 *>(haystack.data());
|
||||
|
||||
@ -298,6 +311,56 @@ struct PositionImpl
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Impl>
|
||||
struct MultiPositionImpl
|
||||
{
|
||||
using ResultType = UInt64;
|
||||
|
||||
static void vector_constant(
|
||||
const ColumnString::Chars & haystack_data,
|
||||
const ColumnString::Offsets & haystack_offsets,
|
||||
const std::vector<StringRef> & needles,
|
||||
PaddedPODArray<UInt64> & res)
|
||||
{
|
||||
auto resCallback = [](const UInt8 * start, const UInt8 * end) -> UInt64
|
||||
{
|
||||
return 1 + Impl::countChars(reinterpret_cast<const char *>(start), reinterpret_cast<const char *>(end));
|
||||
};
|
||||
|
||||
Impl::createMultiSearcherInBigHaystack(needles).searchAll(haystack_data, haystack_offsets, resCallback, res);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Impl>
|
||||
struct MultiSearchImpl
|
||||
{
|
||||
using ResultType = UInt64;
|
||||
|
||||
static void vector_constant(
|
||||
const ColumnString::Chars & haystack_data,
|
||||
const ColumnString::Offsets & haystack_offsets,
|
||||
const std::vector<StringRef> & needles,
|
||||
PaddedPODArray<UInt64> & res)
|
||||
{
|
||||
Impl::createMultiSearcherInBigHaystack(needles).search(haystack_data, haystack_offsets, res);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Impl>
|
||||
struct FirstMatchImpl
|
||||
{
|
||||
using ResultType = UInt64;
|
||||
|
||||
static void vector_constant(
|
||||
const ColumnString::Chars & haystack_data,
|
||||
const ColumnString::Offsets & haystack_offsets,
|
||||
const std::vector<StringRef> & needles,
|
||||
PaddedPODArray<UInt64> & res)
|
||||
{
|
||||
Impl::createMultiSearcherInBigHaystack(needles).searchIndex(haystack_data, haystack_offsets, res);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/// Is the LIKE expression reduced to finding a substring in a string?
|
||||
inline bool likePatternIsStrstr(const String & pattern, String & res)
|
||||
@ -348,10 +411,8 @@ struct MatchImpl
|
||||
{
|
||||
using ResultType = UInt8;
|
||||
|
||||
static void vector_constant(const ColumnString::Chars & data,
|
||||
const ColumnString::Offsets & offsets,
|
||||
const std::string & pattern,
|
||||
PaddedPODArray<UInt8> & res)
|
||||
static void vector_constant(
|
||||
const ColumnString::Chars & data, const ColumnString::Offsets & offsets, const std::string & pattern, PaddedPODArray<UInt8> & res)
|
||||
{
|
||||
if (offsets.empty())
|
||||
return;
|
||||
@ -467,13 +528,14 @@ struct MatchImpl
|
||||
size_t str_size = (i != 0 ? offsets[i] - offsets[i - 1] : offsets[0]) - 1;
|
||||
|
||||
/** Even in the case of `required_substring_is_prefix` use UNANCHORED check for regexp,
|
||||
* so that it can match when `required_substring` occurs into the line several times,
|
||||
* so that it can match when `required_substring` occurs into the string several times,
|
||||
* and at the first occurrence, the regexp is not a match.
|
||||
*/
|
||||
|
||||
if (required_substring_is_prefix)
|
||||
res[i] = revert
|
||||
^ regexp->getRE2()->Match(re2_st::StringPiece(str_data, str_size),
|
||||
^ regexp->getRE2()->Match(
|
||||
re2_st::StringPiece(str_data, str_size),
|
||||
reinterpret_cast<const char *>(pos) - str_data,
|
||||
str_size,
|
||||
re2_st::RE2::UNANCHORED,
|
||||
@ -504,13 +566,15 @@ struct MatchImpl
|
||||
res = revert ^ regexp->match(data);
|
||||
}
|
||||
|
||||
template <typename... Args> static void vector_vector(Args &&...)
|
||||
template <typename... Args>
|
||||
static void vector_vector(Args &&...)
|
||||
{
|
||||
throw Exception("Functions 'like' and 'match' don't support non-constant needle argument", ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
|
||||
/// Search different needles in single haystack.
|
||||
template <typename... Args> static void constant_vector(Args &&...)
|
||||
template <typename... Args>
|
||||
static void constant_vector(Args &&...)
|
||||
{
|
||||
throw Exception("Functions 'like' and 'match' don't support non-constant needle argument", ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
@ -519,7 +583,8 @@ struct MatchImpl
|
||||
|
||||
struct ExtractImpl
|
||||
{
|
||||
static void vector(const ColumnString::Chars & data,
|
||||
static void vector(
|
||||
const ColumnString::Chars & data,
|
||||
const ColumnString::Offsets & offsets,
|
||||
const std::string & pattern,
|
||||
ColumnString::Chars & res_data,
|
||||
@ -613,16 +678,17 @@ struct ReplaceRegexpImpl
|
||||
|
||||
for (const auto & it : instructions)
|
||||
if (it.first >= num_captures)
|
||||
throw Exception("Invalid replace instruction in replacement string. Id: " + toString(it.first) + ", but regexp has only "
|
||||
+ toString(num_captures - 1)
|
||||
+ " subpatterns",
|
||||
throw Exception(
|
||||
"Invalid replace instruction in replacement string. Id: " + toString(it.first) + ", but regexp has only "
|
||||
+ toString(num_captures - 1) + " subpatterns",
|
||||
ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
return instructions;
|
||||
}
|
||||
|
||||
|
||||
static void processString(const re2_st::StringPiece & input,
|
||||
static void processString(
|
||||
const re2_st::StringPiece & input,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offset & res_offset,
|
||||
re2_st::RE2 & searcher,
|
||||
@ -687,7 +753,8 @@ struct ReplaceRegexpImpl
|
||||
}
|
||||
|
||||
|
||||
static void vector(const ColumnString::Chars & data,
|
||||
static void vector(
|
||||
const ColumnString::Chars & data,
|
||||
const ColumnString::Offsets & offsets,
|
||||
const std::string & needle,
|
||||
const std::string & replacement,
|
||||
@ -715,7 +782,8 @@ struct ReplaceRegexpImpl
|
||||
}
|
||||
}
|
||||
|
||||
static void vector_fixed(const ColumnString::Chars & data,
|
||||
static void vector_fixed(
|
||||
const ColumnString::Chars & data,
|
||||
size_t n,
|
||||
const std::string & needle,
|
||||
const std::string & replacement,
|
||||
@ -749,7 +817,8 @@ struct ReplaceRegexpImpl
|
||||
template <bool replace_one = false>
|
||||
struct ReplaceStringImpl
|
||||
{
|
||||
static void vector(const ColumnString::Chars & data,
|
||||
static void vector(
|
||||
const ColumnString::Chars & data,
|
||||
const ColumnString::Offsets & offsets,
|
||||
const std::string & needle,
|
||||
const std::string & replacement,
|
||||
@ -791,7 +860,7 @@ struct ReplaceStringImpl
|
||||
if (i == offsets.size())
|
||||
break;
|
||||
|
||||
/// Is it true that this line no longer needs to perform transformations.
|
||||
/// Is it true that this string no longer needs to perform transformations.
|
||||
bool can_finish_current_string = false;
|
||||
|
||||
/// We check that the entry does not go through the boundaries of strings.
|
||||
@ -824,7 +893,8 @@ struct ReplaceStringImpl
|
||||
|
||||
/// Note: this function converts fixed-length strings to variable-length strings
|
||||
/// and each variable-length string should ends with zero byte.
|
||||
static void vector_fixed(const ColumnString::Chars & data,
|
||||
static void vector_fixed(
|
||||
const ColumnString::Chars & data,
|
||||
size_t n,
|
||||
const std::string & needle,
|
||||
const std::string & replacement,
|
||||
@ -851,7 +921,8 @@ struct ReplaceStringImpl
|
||||
const UInt8 * match = searcher.search(pos, end - pos);
|
||||
|
||||
#define COPY_REST_OF_CURRENT_STRING() \
|
||||
do { \
|
||||
do \
|
||||
{ \
|
||||
const size_t len = begin + n * (i + 1) - pos; \
|
||||
res_data.resize(res_data.size() + len + 1); \
|
||||
memcpy(&res_data[res_offset], pos, len); \
|
||||
@ -878,7 +949,7 @@ struct ReplaceStringImpl
|
||||
memcpy(&res_data[res_offset], pos, match - pos);
|
||||
res_offset += (match - pos);
|
||||
|
||||
/// Is it true that this line no longer needs to perform conversions.
|
||||
/// Is it true that this string no longer needs to perform conversions.
|
||||
bool can_finish_current_string = false;
|
||||
|
||||
/// We check that the entry does not pass through the boundaries of strings.
|
||||
@ -935,20 +1006,11 @@ class FunctionStringReplace : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = Name::name;
|
||||
static FunctionPtr create(const Context &)
|
||||
{
|
||||
return std::make_shared<FunctionStringReplace>();
|
||||
}
|
||||
static FunctionPtr create(const Context &) { return std::make_shared<FunctionStringReplace>(); }
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
String getName() const override { return name; }
|
||||
|
||||
size_t getNumberOfArguments() const override
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
size_t getNumberOfArguments() const override { return 3; }
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; }
|
||||
@ -956,15 +1018,18 @@ public:
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
if (!isStringOrFixedString(arguments[0]))
|
||||
throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName(),
|
||||
throw Exception(
|
||||
"Illegal type " + arguments[0]->getName() + " of first argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
if (!isStringOrFixedString(arguments[1]))
|
||||
throw Exception("Illegal type " + arguments[1]->getName() + " of second argument of function " + getName(),
|
||||
throw Exception(
|
||||
"Illegal type " + arguments[1]->getName() + " of second argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
if (!isStringOrFixedString(arguments[2]))
|
||||
throw Exception("Illegal type " + arguments[2]->getName() + " of third argument of function " + getName(),
|
||||
throw Exception(
|
||||
"Illegal type " + arguments[2]->getName() + " of third argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
return std::make_shared<DataTypeString>();
|
||||
@ -1025,6 +1090,54 @@ struct NamePositionCaseInsensitiveUTF8
|
||||
{
|
||||
static constexpr auto name = "positionCaseInsensitiveUTF8";
|
||||
};
|
||||
struct NameMultiPosition
|
||||
{
|
||||
static constexpr auto name = "multiPosition";
|
||||
};
|
||||
struct NameMultiPositionUTF8
|
||||
{
|
||||
static constexpr auto name = "multiPositionUTF8";
|
||||
};
|
||||
struct NameMultiPositionCaseInsensitive
|
||||
{
|
||||
static constexpr auto name = "multiPositionCaseInsensitive";
|
||||
};
|
||||
struct NameMultiPositionCaseInsensitiveUTF8
|
||||
{
|
||||
static constexpr auto name = "multiPositionCaseInsensitiveUTF8";
|
||||
};
|
||||
struct NameMultiSearch
|
||||
{
|
||||
static constexpr auto name = "multiSearch";
|
||||
};
|
||||
struct NameMultiSearchUTF8
|
||||
{
|
||||
static constexpr auto name = "multiSearchUTF8";
|
||||
};
|
||||
struct NameMultiSearchCaseInsensitive
|
||||
{
|
||||
static constexpr auto name = "multiSearchCaseInsensitive";
|
||||
};
|
||||
struct NameMultiSearchCaseInsensitiveUTF8
|
||||
{
|
||||
static constexpr auto name = "multiSearchCaseInsensitiveUTF8";
|
||||
};
|
||||
struct NameFirstMatch
|
||||
{
|
||||
static constexpr auto name = "firstMatch";
|
||||
};
|
||||
struct NameFirstMatchUTF8
|
||||
{
|
||||
static constexpr auto name = "firstMatchUTF8";
|
||||
};
|
||||
struct NameFirstMatchCaseInsensitive
|
||||
{
|
||||
static constexpr auto name = "firstMatchCaseInsensitive";
|
||||
};
|
||||
struct NameFirstMatchCaseInsensitiveUTF8
|
||||
{
|
||||
static constexpr auto name = "firstMatchCaseInsensitiveUTF8";
|
||||
};
|
||||
struct NameMatch
|
||||
{
|
||||
static constexpr auto name = "match";
|
||||
@ -1064,6 +1177,27 @@ using FunctionPositionCaseInsensitive = FunctionsStringSearch<PositionImpl<Posit
|
||||
using FunctionPositionCaseInsensitiveUTF8
|
||||
= FunctionsStringSearch<PositionImpl<PositionCaseInsensitiveUTF8>, NamePositionCaseInsensitiveUTF8>;
|
||||
|
||||
using FunctionMultiPosition = FunctionsMultiStringPosition<MultiPositionImpl<PositionCaseSensitiveASCII>, NameMultiPosition>;
|
||||
using FunctionMultiPositionUTF8 = FunctionsMultiStringPosition<MultiPositionImpl<PositionCaseSensitiveUTF8>, NameMultiPositionUTF8>;
|
||||
using FunctionMultiPositionCaseInsensitive
|
||||
= FunctionsMultiStringPosition<MultiPositionImpl<PositionCaseInsensitiveASCII>, NameMultiPositionCaseInsensitive>;
|
||||
using FunctionMultiPositionCaseInsensitiveUTF8
|
||||
= FunctionsMultiStringPosition<MultiPositionImpl<PositionCaseInsensitiveUTF8>, NameMultiPositionCaseInsensitiveUTF8>;
|
||||
|
||||
using FunctionMultiSearch = FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseSensitiveASCII>, NameMultiSearch>;
|
||||
using FunctionMultiSearchUTF8 = FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseSensitiveUTF8>, NameMultiSearchUTF8>;
|
||||
using FunctionMultiSearchCaseInsensitive
|
||||
= FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseInsensitiveASCII>, NameMultiSearchCaseInsensitive>;
|
||||
using FunctionMultiSearchCaseInsensitiveUTF8
|
||||
= FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseInsensitiveUTF8>, NameMultiSearchCaseInsensitiveUTF8>;
|
||||
|
||||
using FunctionFirstMatch = FunctionsMultiStringSearch<FirstMatchImpl<PositionCaseSensitiveASCII>, NameFirstMatch>;
|
||||
using FunctionFirstMatchUTF8 = FunctionsMultiStringSearch<FirstMatchImpl<PositionCaseSensitiveUTF8>, NameFirstMatchUTF8>;
|
||||
using FunctionFirstMatchCaseInsensitive
|
||||
= FunctionsMultiStringSearch<FirstMatchImpl<PositionCaseInsensitiveASCII>, NameFirstMatchCaseInsensitive>;
|
||||
using FunctionFirstMatchCaseInsensitiveUTF8
|
||||
= FunctionsMultiStringSearch<FirstMatchImpl<PositionCaseInsensitiveUTF8>, NameFirstMatchCaseInsensitiveUTF8>;
|
||||
|
||||
using FunctionMatch = FunctionsStringSearch<MatchImpl<false>, NameMatch>;
|
||||
using FunctionLike = FunctionsStringSearch<MatchImpl<true>, NameLike>;
|
||||
using FunctionNotLike = FunctionsStringSearch<MatchImpl<true, true>, NameNotLike>;
|
||||
@ -1080,14 +1214,32 @@ void registerFunctionsStringSearch(FunctionFactory & factory)
|
||||
factory.registerFunction<FunctionReplaceAll>();
|
||||
factory.registerFunction<FunctionReplaceRegexpOne>();
|
||||
factory.registerFunction<FunctionReplaceRegexpAll>();
|
||||
|
||||
factory.registerFunction<FunctionPosition>(FunctionFactory::CaseInsensitive);
|
||||
factory.registerFunction<FunctionPositionUTF8>();
|
||||
factory.registerFunction<FunctionPositionCaseInsensitive>();
|
||||
factory.registerFunction<FunctionPositionCaseInsensitiveUTF8>();
|
||||
|
||||
factory.registerFunction<FunctionMultiPosition>();
|
||||
factory.registerFunction<FunctionMultiPositionUTF8>();
|
||||
factory.registerFunction<FunctionMultiPositionCaseInsensitive>();
|
||||
factory.registerFunction<FunctionMultiPositionCaseInsensitiveUTF8>();
|
||||
|
||||
factory.registerFunction<FunctionMultiSearch>();
|
||||
factory.registerFunction<FunctionMultiSearchUTF8>();
|
||||
factory.registerFunction<FunctionMultiSearchCaseInsensitive>();
|
||||
factory.registerFunction<FunctionMultiSearchCaseInsensitiveUTF8>();
|
||||
|
||||
factory.registerFunction<FunctionFirstMatch>();
|
||||
factory.registerFunction<FunctionFirstMatchUTF8>();
|
||||
factory.registerFunction<FunctionFirstMatchCaseInsensitive>();
|
||||
factory.registerFunction<FunctionFirstMatchCaseInsensitiveUTF8>();
|
||||
|
||||
factory.registerFunction<FunctionMatch>();
|
||||
factory.registerFunction<FunctionLike>();
|
||||
factory.registerFunction<FunctionNotLike>();
|
||||
factory.registerFunction<FunctionExtract>();
|
||||
|
||||
factory.registerAlias("locate", NamePosition::name, FunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("replace", NameReplaceAll::name, FunctionFactory::CaseInsensitive);
|
||||
}
|
||||
|
@ -1,17 +1,20 @@
|
||||
#pragma once
|
||||
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Core/Field.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
|
||||
#include <Functions/IFunction.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <common/StringRef.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** Search and replace functions in strings:
|
||||
*
|
||||
* position(haystack, needle) - the normal search for a substring in a string, returns the position (in bytes) of the found substring starting with 1, or 0 if no substring is found.
|
||||
@ -35,12 +38,28 @@ namespace DB
|
||||
*
|
||||
* replaceRegexpOne(haystack, pattern, replacement) - replaces the pattern with the specified regexp, only the first occurrence.
|
||||
* replaceRegexpAll(haystack, pattern, replacement) - replaces the pattern with the specified type, all occurrences.
|
||||
*
|
||||
* multiPosition(haystack, [pattern_1, pattern_2, ..., pattern_n]) -- find first occurences (positions) of all the const patterns inside haystack
|
||||
* multiPositionUTF8(haystack, [pattern_1, pattern_2, ..., pattern_n])
|
||||
* multiPositionCaseInsensitive(haystack, [pattern_1, pattern_2, ..., pattern_n])
|
||||
* multiPositionCaseInsensitiveUTF8(haystack, [pattern_1, pattern_2, ..., pattern_n])
|
||||
*
|
||||
* multiSearch(haystack, [pattern_1, pattern_2, ..., pattern_n]) -- find any of the const patterns inside haystack and return 0 or 1
|
||||
* multiSearchUTF8(haystack, [pattern_1, pattern_2, ..., pattern_n])
|
||||
* multiSearchCaseInsensitive(haystack, [pattern_1, pattern_2, ..., pattern_n])
|
||||
* multiSearchCaseInsensitiveUTF8(haystack, [pattern_1, pattern_2, ..., pattern_n])
|
||||
|
||||
* firstMatch(haystack, [pattern_1, pattern_2, ..., pattern_n]) -- returns the first index of the matched string or zero if nothing was found
|
||||
* firstMatchUTF8(haystack, [pattern_1, pattern_2, ..., pattern_n])
|
||||
* firstMatchCaseInsensitive(haystack, [pattern_1, pattern_2, ..., pattern_n])
|
||||
* firstMatchCaseInsensitiveUTF8(haystack, [pattern_1, pattern_2, ..., pattern_n])
|
||||
*/
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
template <typename Impl, typename Name>
|
||||
@ -48,20 +67,11 @@ class FunctionsStringSearch : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = Name::name;
|
||||
static FunctionPtr create(const Context &)
|
||||
{
|
||||
return std::make_shared<FunctionsStringSearch>();
|
||||
}
|
||||
static FunctionPtr create(const Context &) { return std::make_shared<FunctionsStringSearch>(); }
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
String getName() const override { return name; }
|
||||
|
||||
size_t getNumberOfArguments() const override
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
size_t getNumberOfArguments() const override { return 2; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
@ -90,7 +100,8 @@ public:
|
||||
{
|
||||
ResultType res{};
|
||||
Impl::constant_constant(col_haystack_const->getValue<String>(), col_needle_const->getValue<String>(), res);
|
||||
block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst(col_haystack_const->size(), toField(res));
|
||||
block.getByPosition(result).column
|
||||
= block.getByPosition(result).type->createColumnConst(col_haystack_const->size(), toField(res));
|
||||
return;
|
||||
}
|
||||
|
||||
@ -103,20 +114,22 @@ public:
|
||||
const ColumnString * col_needle_vector = checkAndGetColumn<ColumnString>(&*column_needle);
|
||||
|
||||
if (col_haystack_vector && col_needle_vector)
|
||||
Impl::vector_vector(col_haystack_vector->getChars(),
|
||||
Impl::vector_vector(
|
||||
col_haystack_vector->getChars(),
|
||||
col_haystack_vector->getOffsets(),
|
||||
col_needle_vector->getChars(),
|
||||
col_needle_vector->getOffsets(),
|
||||
vec_res);
|
||||
else if (col_haystack_vector && col_needle_const)
|
||||
Impl::vector_constant(col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), col_needle_const->getValue<String>(), vec_res);
|
||||
Impl::vector_constant(
|
||||
col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), col_needle_const->getValue<String>(), vec_res);
|
||||
else if (col_haystack_const && col_needle_vector)
|
||||
Impl::constant_vector(col_haystack_const->getValue<String>(), col_needle_vector->getChars(), col_needle_vector->getOffsets(), vec_res);
|
||||
Impl::constant_vector(
|
||||
col_haystack_const->getValue<String>(), col_needle_vector->getChars(), col_needle_vector->getOffsets(), vec_res);
|
||||
else
|
||||
throw Exception("Illegal columns " + block.getByPosition(arguments[0]).column->getName() + " and "
|
||||
+ block.getByPosition(arguments[1]).column->getName()
|
||||
+ " of arguments of function "
|
||||
+ getName(),
|
||||
throw Exception(
|
||||
"Illegal columns " + block.getByPosition(arguments[0]).column->getName() + " and "
|
||||
+ block.getByPosition(arguments[1]).column->getName() + " of arguments of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
|
||||
block.getByPosition(result).column = std::move(col_res);
|
||||
@ -129,20 +142,11 @@ class FunctionsStringSearchToString : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = Name::name;
|
||||
static FunctionPtr create(const Context &)
|
||||
{
|
||||
return std::make_shared<FunctionsStringSearchToString>();
|
||||
}
|
||||
static FunctionPtr create(const Context &) { return std::make_shared<FunctionsStringSearchToString>(); }
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
String getName() const override { return name; }
|
||||
|
||||
size_t getNumberOfArguments() const override
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
size_t getNumberOfArguments() const override { return 2; }
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
|
||||
@ -186,4 +190,156 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Impl, typename Name>
|
||||
class FunctionsMultiStringPosition : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = Name::name;
|
||||
static FunctionPtr create(const Context &) { return std::make_shared<FunctionsMultiStringPosition>(); }
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
size_t getNumberOfArguments() const override { return 2; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
if (arguments.size() + 1 >= std::numeric_limits<UInt8>::max())
|
||||
throw Exception(
|
||||
"Number of arguments for function " + getName() + " doesn't match: passed " + std::to_string(arguments.size())
|
||||
+ ", should be at most 255.",
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
if (!isString(arguments[0]))
|
||||
throw Exception(
|
||||
"Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[1].get());
|
||||
if (!array_type || !checkAndGetDataType<DataTypeString>(array_type->getNestedType().get()))
|
||||
throw Exception(
|
||||
"Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
|
||||
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>());
|
||||
}
|
||||
|
||||
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
|
||||
{
|
||||
using ResultType = typename Impl::ResultType;
|
||||
|
||||
const ColumnPtr & column_haystack = block.getByPosition(arguments[0]).column;
|
||||
|
||||
const ColumnString * col_haystack_vector = checkAndGetColumn<ColumnString>(&*column_haystack);
|
||||
|
||||
const ColumnPtr & arr_ptr = block.getByPosition(arguments[1]).column;
|
||||
const ColumnConst * col_const_arr = checkAndGetColumnConst<ColumnArray>(arr_ptr.get());
|
||||
|
||||
if (!col_const_arr)
|
||||
throw Exception(
|
||||
"Illegal column " + block.getByPosition(arguments[1]).column->getName() + ". The array is not const",
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
|
||||
Array src_arr = col_const_arr->getValue<Array>();
|
||||
|
||||
std::vector<StringRef> refs;
|
||||
for (const auto & el : src_arr)
|
||||
refs.emplace_back(el.get<String>());
|
||||
|
||||
const size_t column_haystack_size = column_haystack->size();
|
||||
|
||||
auto col_res = ColumnVector<ResultType>::create();
|
||||
auto col_offsets = ColumnArray::ColumnOffsets::create(column_haystack_size);
|
||||
|
||||
auto & vec_res = col_res->getData();
|
||||
auto & offsets_res = col_offsets->getData();
|
||||
|
||||
vec_res.resize(column_haystack_size * refs.size());
|
||||
|
||||
if (col_haystack_vector)
|
||||
Impl::vector_constant(col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), refs, vec_res);
|
||||
else
|
||||
throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName(), ErrorCodes::ILLEGAL_COLUMN);
|
||||
|
||||
size_t refs_size = refs.size();
|
||||
size_t accum = refs_size;
|
||||
|
||||
for (size_t i = 0; i < column_haystack_size; ++i, accum += refs_size)
|
||||
offsets_res[i] = accum;
|
||||
|
||||
block.getByPosition(result).column = ColumnArray::create(std::move(col_res), std::move(col_offsets));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Impl, typename Name>
|
||||
class FunctionsMultiStringSearch : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = Name::name;
|
||||
static FunctionPtr create(const Context &) { return std::make_shared<FunctionsMultiStringSearch>(); }
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
size_t getNumberOfArguments() const override { return 2; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
if (arguments.size() + 1 >= std::numeric_limits<UInt8>::max())
|
||||
throw Exception(
|
||||
"Number of arguments for function " + getName() + " doesn't match: passed " + std::to_string(arguments.size())
|
||||
+ ", should be at most 255.",
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
if (!isString(arguments[0]))
|
||||
throw Exception(
|
||||
"Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[1].get());
|
||||
if (!array_type || !checkAndGetDataType<DataTypeString>(array_type->getNestedType().get()))
|
||||
throw Exception(
|
||||
"Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
|
||||
return std::make_shared<DataTypeNumber<typename Impl::ResultType>>();
|
||||
}
|
||||
|
||||
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
|
||||
{
|
||||
using ResultType = typename Impl::ResultType;
|
||||
|
||||
const ColumnPtr & column_haystack = block.getByPosition(arguments[0]).column;
|
||||
|
||||
const ColumnString * col_haystack_vector = checkAndGetColumn<ColumnString>(&*column_haystack);
|
||||
|
||||
const ColumnPtr & arr_ptr = block.getByPosition(arguments[1]).column;
|
||||
const ColumnConst * col_const_arr = checkAndGetColumnConst<ColumnArray>(arr_ptr.get());
|
||||
|
||||
if (!col_const_arr)
|
||||
throw Exception(
|
||||
"Illegal column " + block.getByPosition(arguments[1]).column->getName() + ". The array is not const",
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
|
||||
Array src_arr = col_const_arr->getValue<Array>();
|
||||
|
||||
std::vector<StringRef> refs;
|
||||
refs.reserve(src_arr.size());
|
||||
|
||||
for (const auto & el : src_arr)
|
||||
refs.emplace_back(el.get<String>());
|
||||
|
||||
const size_t column_haystack_size = column_haystack->size();
|
||||
|
||||
auto col_res = ColumnVector<ResultType>::create();
|
||||
|
||||
auto & vec_res = col_res->getData();
|
||||
|
||||
vec_res.resize(column_haystack_size);
|
||||
|
||||
if (col_haystack_vector)
|
||||
Impl::vector_constant(col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), refs, vec_res);
|
||||
else
|
||||
throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName(), ErrorCodes::ILLEGAL_COLUMN);
|
||||
|
||||
block.getByPosition(result).column = std::move(col_res);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -446,13 +446,11 @@ void ActionsVisitor::visit(const ASTPtr & ast)
|
||||
|
||||
for (size_t j = 0; j < lambda_arg_asts.size(); ++j)
|
||||
{
|
||||
ASTIdentifier * lambda_identifier = typeid_cast<ASTIdentifier *>(lambda_arg_asts[j].get());
|
||||
if (!lambda_identifier)
|
||||
auto opt_arg_name = getIdentifierName(lambda_arg_asts[j]);
|
||||
if (!opt_arg_name)
|
||||
throw Exception("lambda argument declarations must be identifiers", ErrorCodes::TYPE_MISMATCH);
|
||||
|
||||
String arg_name = lambda_identifier->name;
|
||||
|
||||
lambda_arguments.emplace_back(arg_name, lambda_type->getArgumentTypes()[j]);
|
||||
lambda_arguments.emplace_back(*opt_arg_name, lambda_type->getArgumentTypes()[j]);
|
||||
}
|
||||
|
||||
actions_stack.pushLevel(lambda_arguments);
|
||||
@ -541,9 +539,6 @@ void ActionsVisitor::makeSet(const ASTFunction * node, const Block & sample_bloc
|
||||
const ASTIdentifier * identifier = typeid_cast<const ASTIdentifier *>(arg.get());
|
||||
if (typeid_cast<const ASTSubquery *>(arg.get()) || identifier)
|
||||
{
|
||||
/// We get the stream of blocks for the subquery. Create Set and put it in place of the subquery.
|
||||
String set_id = arg->getColumnName();
|
||||
|
||||
/// A special case is if the name of the table is specified on the right side of the IN statement,
|
||||
/// and the table has the type Set (a previously prepared set).
|
||||
if (identifier)
|
||||
@ -563,6 +558,9 @@ void ActionsVisitor::makeSet(const ASTFunction * node, const Block & sample_bloc
|
||||
}
|
||||
}
|
||||
|
||||
/// We get the stream of blocks for the subquery. Create Set and put it in place of the subquery.
|
||||
String set_id = arg->getColumnName();
|
||||
|
||||
SubqueryForSet & subquery_for_set = subqueries_for_sets[set_id];
|
||||
|
||||
/// If you already created a Set with the same subquery / table.
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <Parsers/ASTTablesInSelectQuery.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/DumpASTNode.h>
|
||||
#include <Interpreters/DatabaseAndTableWithAlias.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -90,19 +91,15 @@ private:
|
||||
void visit(ASTTableExpression & table_expression, ASTPtr &) const
|
||||
{
|
||||
if (table_expression.database_and_table_name)
|
||||
{
|
||||
tryVisit<ASTIdentifier>(table_expression.database_and_table_name);
|
||||
|
||||
if (table_expression.database_and_table_name->children.size() != 2)
|
||||
throw Exception("Logical error: more than two components in table expression", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
else if (table_expression.subquery)
|
||||
tryVisit<ASTSubquery>(table_expression.subquery);
|
||||
}
|
||||
|
||||
/// @note It expects that only table (not column) identifiers are visited.
|
||||
void visit(const ASTIdentifier & identifier, ASTPtr & ast) const
|
||||
{
|
||||
if (ast->children.empty())
|
||||
if (identifier.name_parts.empty())
|
||||
ast = createTableIdentifier(database_name, identifier.name);
|
||||
}
|
||||
|
||||
|
@ -67,12 +67,13 @@ Cluster::Address::Address(const Poco::Util::AbstractConfiguration & config, cons
|
||||
}
|
||||
|
||||
|
||||
Cluster::Address::Address(const String & host_port_, const String & user_, const String & password_, UInt16 clickhouse_port)
|
||||
Cluster::Address::Address(const String & host_port_, const String & user_, const String & password_, UInt16 clickhouse_port, bool secure_)
|
||||
: user(user_), password(password_)
|
||||
{
|
||||
auto parsed_host_port = parseAddress(host_port_, clickhouse_port);
|
||||
host_name = parsed_host_port.first;
|
||||
port = parsed_host_port.second;
|
||||
secure = secure_ ? Protocol::Secure::Enable : Protocol::Secure::Disable;
|
||||
|
||||
initially_resolved_address = DNSResolver::instance().resolveAddress(parsed_host_port.first, parsed_host_port.second);
|
||||
is_local = isLocal(*this, initially_resolved_address, clickhouse_port);
|
||||
@ -319,7 +320,7 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, const Setting
|
||||
|
||||
|
||||
Cluster::Cluster(const Settings & settings, const std::vector<std::vector<String>> & names,
|
||||
const String & username, const String & password, UInt16 clickhouse_port, bool treat_local_as_remote)
|
||||
const String & username, const String & password, UInt16 clickhouse_port, bool treat_local_as_remote, bool secure)
|
||||
{
|
||||
UInt32 current_shard_num = 1;
|
||||
|
||||
@ -327,7 +328,7 @@ Cluster::Cluster(const Settings & settings, const std::vector<std::vector<String
|
||||
{
|
||||
Addresses current;
|
||||
for (auto & replica : shard)
|
||||
current.emplace_back(replica, username, password, clickhouse_port);
|
||||
current.emplace_back(replica, username, password, clickhouse_port, secure);
|
||||
|
||||
addresses_with_failover.emplace_back(current);
|
||||
|
||||
|
@ -24,7 +24,7 @@ public:
|
||||
/// This parameter is needed only to check that some address is local (points to ourself).
|
||||
Cluster(const Settings & settings, const std::vector<std::vector<String>> & names,
|
||||
const String & username, const String & password,
|
||||
UInt16 clickhouse_port, bool treat_local_as_remote);
|
||||
UInt16 clickhouse_port, bool treat_local_as_remote, bool secure = false);
|
||||
|
||||
Cluster(const Cluster &) = delete;
|
||||
Cluster & operator=(const Cluster &) = delete;
|
||||
@ -69,7 +69,7 @@ public:
|
||||
|
||||
Address() = default;
|
||||
Address(const Poco::Util::AbstractConfiguration & config, const String & config_prefix);
|
||||
Address(const String & host_port_, const String & user_, const String & password_, UInt16 clickhouse_port);
|
||||
Address(const String & host_port_, const String & user_, const String & password_, UInt16 clickhouse_port, bool secure_ = false);
|
||||
|
||||
/// Returns 'escaped_host_name:port'
|
||||
String toString() const;
|
||||
|
@ -13,7 +13,7 @@ namespace DB
|
||||
|
||||
/// Checks that ast is ASTIdentifier and remove num_qualifiers_to_strip components from left.
|
||||
/// Example: 'database.table.name' -> (num_qualifiers_to_strip = 2) -> 'name'.
|
||||
void stripIdentifier(DB::ASTPtr & ast, size_t num_qualifiers_to_strip)
|
||||
void stripIdentifier(const DB::ASTPtr & ast, size_t num_qualifiers_to_strip)
|
||||
{
|
||||
ASTIdentifier * identifier = typeid_cast<ASTIdentifier *>(ast.get());
|
||||
|
||||
@ -22,29 +22,15 @@ void stripIdentifier(DB::ASTPtr & ast, size_t num_qualifiers_to_strip)
|
||||
|
||||
if (num_qualifiers_to_strip)
|
||||
{
|
||||
size_t num_components = identifier->children.size();
|
||||
|
||||
/// plain column
|
||||
if (num_components - num_qualifiers_to_strip == 1)
|
||||
{
|
||||
DB::String node_alias = identifier->tryGetAlias();
|
||||
ast = identifier->children.back();
|
||||
if (!node_alias.empty())
|
||||
ast->setAlias(node_alias);
|
||||
}
|
||||
else
|
||||
/// nested column
|
||||
{
|
||||
identifier->children.erase(identifier->children.begin(), identifier->children.begin() + num_qualifiers_to_strip);
|
||||
identifier->name_parts.erase(identifier->name_parts.begin(), identifier->name_parts.begin() + num_qualifiers_to_strip);
|
||||
DB::String new_name;
|
||||
for (const auto & child : identifier->children)
|
||||
for (const auto & part : identifier->name_parts)
|
||||
{
|
||||
if (!new_name.empty())
|
||||
new_name += '.';
|
||||
new_name += static_cast<const ASTIdentifier &>(*child.get()).name;
|
||||
}
|
||||
identifier->name = new_name;
|
||||
new_name += part;
|
||||
}
|
||||
identifier->name.swap(new_name);
|
||||
}
|
||||
}
|
||||
|
||||
@ -52,32 +38,16 @@ void stripIdentifier(DB::ASTPtr & ast, size_t num_qualifiers_to_strip)
|
||||
size_t getNumComponentsToStripInOrderToTranslateQualifiedName(const ASTIdentifier & identifier,
|
||||
const DatabaseAndTableWithAlias & names)
|
||||
{
|
||||
size_t num_qualifiers_to_strip = 0;
|
||||
|
||||
/// It is compound identifier
|
||||
if (!identifier.children.empty())
|
||||
{
|
||||
size_t num_components = identifier.children.size();
|
||||
|
||||
/// database.table.column
|
||||
if (num_components >= 3
|
||||
&& !names.database.empty()
|
||||
&& *getIdentifierName(identifier.children[0]) == names.database
|
||||
&& *getIdentifierName(identifier.children[1]) == names.table)
|
||||
{
|
||||
num_qualifiers_to_strip = 2;
|
||||
}
|
||||
if (doesIdentifierBelongTo(identifier, names.database, names.table))
|
||||
return 2;
|
||||
|
||||
/// table.column or alias.column. If num_components > 2, it is like table.nested.column.
|
||||
if (num_components >= 2
|
||||
&& ((!names.table.empty() && *getIdentifierName(identifier.children[0]) == names.table)
|
||||
|| (!names.alias.empty() && *getIdentifierName(identifier.children[0]) == names.alias)))
|
||||
{
|
||||
num_qualifiers_to_strip = 1;
|
||||
}
|
||||
}
|
||||
/// table.column or alias.column.
|
||||
if (doesIdentifierBelongTo(identifier, names.table) ||
|
||||
doesIdentifierBelongTo(identifier, names.alias))
|
||||
return 1;
|
||||
|
||||
return num_qualifiers_to_strip;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@ -87,13 +57,13 @@ DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTIdentifier & ident
|
||||
table = identifier.name;
|
||||
alias = identifier.tryGetAlias();
|
||||
|
||||
if (!identifier.children.empty())
|
||||
if (!identifier.name_parts.empty())
|
||||
{
|
||||
if (identifier.children.size() != 2)
|
||||
throw Exception("Logical error: number of components in table expression not equal to two", ErrorCodes::LOGICAL_ERROR);
|
||||
if (identifier.name_parts.size() != 2)
|
||||
throw Exception("Logical error: 2 components expected in table expression '" + identifier.name + "'", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
getIdentifierName(identifier.children[0], database);
|
||||
getIdentifierName(identifier.children[1], table);
|
||||
database = identifier.name_parts[0];
|
||||
table = identifier.name_parts[1];
|
||||
}
|
||||
}
|
||||
|
||||
@ -118,6 +88,22 @@ DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTTableExpression &
|
||||
throw Exception("Logical error: no known elements in ASTTableExpression", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
bool DatabaseAndTableWithAlias::satisfies(const DatabaseAndTableWithAlias & db_table, bool table_may_be_an_alias)
|
||||
{
|
||||
/// table.*, alias.* or database.table.*
|
||||
|
||||
if (database.empty())
|
||||
{
|
||||
if (!db_table.table.empty() && table == db_table.table)
|
||||
return true;
|
||||
|
||||
if (!db_table.alias.empty())
|
||||
return (alias == db_table.alias) || (table_may_be_an_alias && table == db_table.alias);
|
||||
}
|
||||
|
||||
return database == db_table.database && table == db_table.table;
|
||||
}
|
||||
|
||||
String DatabaseAndTableWithAlias::getQualifiedNamePrefix() const
|
||||
{
|
||||
if (alias.empty() && table.empty())
|
||||
@ -133,17 +119,7 @@ void DatabaseAndTableWithAlias::makeQualifiedName(const ASTPtr & ast) const
|
||||
String prefix = getQualifiedNamePrefix();
|
||||
identifier->name.insert(identifier->name.begin(), prefix.begin(), prefix.end());
|
||||
|
||||
Names qualifiers;
|
||||
if (!alias.empty())
|
||||
qualifiers.push_back(alias);
|
||||
else
|
||||
{
|
||||
qualifiers.push_back(database);
|
||||
qualifiers.push_back(table);
|
||||
}
|
||||
|
||||
for (const auto & qualifier : qualifiers)
|
||||
identifier->children.emplace_back(std::make_shared<ASTIdentifier>(qualifier));
|
||||
addIdentifierQualifier(*identifier, database, table, alias);
|
||||
}
|
||||
}
|
||||
|
||||
@ -209,21 +185,13 @@ std::optional<DatabaseAndTableWithAlias> getDatabaseAndTable(const ASTSelectQuer
|
||||
return DatabaseAndTableWithAlias(database_and_table_name);
|
||||
}
|
||||
|
||||
ASTPtr getTableFunctionOrSubquery(const ASTSelectQuery & select, size_t table_number)
|
||||
ASTPtr extractTableExpression(const ASTSelectQuery & select, size_t table_number)
|
||||
{
|
||||
const ASTTableExpression * table_expression = getTableExpression(select, table_number);
|
||||
if (table_expression)
|
||||
if (const ASTTableExpression * table_expression = getTableExpression(select, table_number))
|
||||
{
|
||||
#if 1 /// TODO: It hides some logical error in InterpreterSelectQuery & distributed tables
|
||||
if (table_expression->database_and_table_name)
|
||||
{
|
||||
if (table_expression->database_and_table_name->children.empty())
|
||||
return table_expression->database_and_table_name;
|
||||
|
||||
if (table_expression->database_and_table_name->children.size() == 2)
|
||||
return table_expression->database_and_table_name->children[1];
|
||||
}
|
||||
#endif
|
||||
if (table_expression->table_function)
|
||||
return table_expression->table_function;
|
||||
|
||||
|
@ -2,8 +2,9 @@
|
||||
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
|
||||
#include <Core/Types.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -33,9 +34,12 @@ struct DatabaseAndTableWithAlias
|
||||
|
||||
/// If ast is ASTIdentifier, prepend getQualifiedNamePrefix() to it's name.
|
||||
void makeQualifiedName(const ASTPtr & ast) const;
|
||||
|
||||
/// Check if it satisfies another db_table name. @note opterion is not symmetric.
|
||||
bool satisfies(const DatabaseAndTableWithAlias & table, bool table_may_be_an_alias);
|
||||
};
|
||||
|
||||
void stripIdentifier(DB::ASTPtr & ast, size_t num_qualifiers_to_strip);
|
||||
void stripIdentifier(const DB::ASTPtr & ast, size_t num_qualifiers_to_strip);
|
||||
|
||||
size_t getNumComponentsToStripInOrderToTranslateQualifiedName(const ASTIdentifier & identifier,
|
||||
const DatabaseAndTableWithAlias & names);
|
||||
@ -44,6 +48,6 @@ std::vector<DatabaseAndTableWithAlias> getDatabaseAndTables(const ASTSelectQuery
|
||||
std::optional<DatabaseAndTableWithAlias> getDatabaseAndTable(const ASTSelectQuery & select, size_t table_number);
|
||||
|
||||
std::vector<const ASTTableExpression *> getSelectTablesExpression(const ASTSelectQuery & select_query);
|
||||
ASTPtr getTableFunctionOrSubquery(const ASTSelectQuery & select, size_t table_number);
|
||||
ASTPtr extractTableExpression(const ASTSelectQuery & select, size_t table_number);
|
||||
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTSubquery.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Parsers/ASTTablesInSelectQuery.h>
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
|
||||
|
@ -310,7 +310,7 @@ void ExpressionAnalyzer::makeSetsForIndexImpl(const ASTPtr & node, const Block &
|
||||
|
||||
if (!prepared_sets.count(arg->range)) /// Not already prepared.
|
||||
{
|
||||
if (typeid_cast<ASTSubquery *>(arg.get()) || typeid_cast<ASTIdentifier *>(arg.get()))
|
||||
if (typeid_cast<ASTSubquery *>(arg.get()) || isIdentifier(arg))
|
||||
{
|
||||
if (settings.use_index_for_in_with_subqueries)
|
||||
tryMakeSetForIndexFromSubquery(arg);
|
||||
|
@ -55,7 +55,7 @@ public:
|
||||
ASTPtr table_name;
|
||||
ASTPtr subquery_or_table_name;
|
||||
|
||||
if (typeid_cast<const ASTIdentifier *>(subquery_or_table_name_or_table_expression.get()))
|
||||
if (isIdentifier(subquery_or_table_name_or_table_expression))
|
||||
{
|
||||
table_name = subquery_or_table_name_or_table_expression;
|
||||
subquery_or_table_name = table_name;
|
||||
@ -86,7 +86,7 @@ public:
|
||||
if (table_name)
|
||||
{
|
||||
/// If this is already an external table, you do not need to add anything. Just remember its presence.
|
||||
if (external_tables.end() != external_tables.find(static_cast<const ASTIdentifier &>(*table_name).name))
|
||||
if (external_tables.end() != external_tables.find(*getIdentifierName(table_name)))
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -86,20 +86,17 @@ BlockInputStreamPtr InterpreterDescribeQuery::executeImpl()
|
||||
String database_name;
|
||||
String table_name;
|
||||
|
||||
auto identifier = table_expression->database_and_table_name;
|
||||
if (identifier->children.size() > 2)
|
||||
auto identifier = typeid_cast<const ASTIdentifier *>(table_expression->database_and_table_name.get());
|
||||
if (identifier->name_parts.size() > 2)
|
||||
throw Exception("Logical error: more than two components in table expression", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
if (identifier->children.size() > 1)
|
||||
if (identifier->name_parts.size() > 1)
|
||||
{
|
||||
auto database_ptr = identifier->children[0];
|
||||
auto table_ptr = identifier->children[1];
|
||||
|
||||
getIdentifierName(database_ptr, database_name);
|
||||
getIdentifierName(table_ptr, table_name);
|
||||
database_name = identifier->name_parts[0];
|
||||
table_name = identifier->name_parts[1];
|
||||
}
|
||||
else
|
||||
getIdentifierName(identifier, table_name);
|
||||
table_name = identifier->name;
|
||||
|
||||
table = context.getTable(database_name, table_name);
|
||||
}
|
||||
|
@ -147,14 +147,22 @@ InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
|
||||
max_streams = settings.max_threads;
|
||||
|
||||
ASTPtr table_expression = getTableFunctionOrSubquery(query, 0);
|
||||
ASTPtr table_expression = extractTableExpression(query, 0);
|
||||
|
||||
bool is_table_func = false;
|
||||
bool is_subquery = false;
|
||||
if (table_expression)
|
||||
{
|
||||
is_table_func = typeid_cast<const ASTFunction *>(table_expression.get());
|
||||
is_subquery = typeid_cast<const ASTSelectWithUnionQuery *>(table_expression.get());
|
||||
}
|
||||
|
||||
if (input)
|
||||
{
|
||||
/// Read from prepared input.
|
||||
source_header = input->getHeader();
|
||||
}
|
||||
else if (table_expression && typeid_cast<const ASTSelectWithUnionQuery *>(table_expression.get()))
|
||||
else if (is_subquery)
|
||||
{
|
||||
/// Read from subquery.
|
||||
interpreter_subquery = std::make_unique<InterpreterSelectWithUnionQuery>(
|
||||
@ -164,7 +172,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
}
|
||||
else if (!storage)
|
||||
{
|
||||
if (table_expression && typeid_cast<const ASTFunction *>(table_expression.get()))
|
||||
if (is_table_func)
|
||||
{
|
||||
/// Read from table function.
|
||||
storage = context.getQueryContext().executeTableFunction(table_expression);
|
||||
@ -208,7 +216,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
if (query_analyzer->isRewriteSubqueriesPredicate())
|
||||
{
|
||||
/// remake interpreter_subquery when PredicateOptimizer is rewrite subqueries and main table is subquery
|
||||
if (table_expression && typeid_cast<ASTSelectWithUnionQuery *>(table_expression.get()))
|
||||
if (is_subquery)
|
||||
interpreter_subquery = std::make_unique<InterpreterSelectWithUnionQuery>(
|
||||
table_expression, getSubqueryContext(context), required_columns, QueryProcessingStage::Complete, subquery_depth + 1,
|
||||
only_analyze);
|
||||
@ -921,7 +929,7 @@ void InterpreterSelectQuery::executeFetchColumns(
|
||||
/// If we need less number of columns that subquery have - update the interpreter.
|
||||
if (required_columns.size() < source_header.columns())
|
||||
{
|
||||
ASTPtr subquery = getTableFunctionOrSubquery(query, 0);
|
||||
ASTPtr subquery = extractTableExpression(query, 0);
|
||||
if (!subquery)
|
||||
throw Exception("Subquery expected", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
@ -1396,7 +1404,7 @@ bool hasWithTotalsInAnySubqueryInFromClause(const ASTSelectQuery & query)
|
||||
* In other cases, totals will be computed on the initiating server of the query, and it is not necessary to read the data to the end.
|
||||
*/
|
||||
|
||||
if (auto query_table = getTableFunctionOrSubquery(query, 0))
|
||||
if (auto query_table = extractTableExpression(query, 0))
|
||||
{
|
||||
if (auto ast_union = typeid_cast<const ASTSelectWithUnionQuery *>(query_table.get()))
|
||||
{
|
||||
|
@ -1,6 +1,5 @@
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Interpreters/JoinToSubqueryTransformVisitor.h>
|
||||
#include <Interpreters/SemanticSelectQuery.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Parsers/ASTTablesInSelectQuery.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
@ -19,6 +18,7 @@ namespace ErrorCodes
|
||||
extern const int TOO_DEEP_AST;
|
||||
}
|
||||
|
||||
#if 0
|
||||
/// Attach additional semantic info to generated select.
|
||||
struct AppendSemanticVisitorData
|
||||
{
|
||||
@ -35,6 +35,7 @@ struct AppendSemanticVisitorData
|
||||
done = true;
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
/// Replaces one table element with pair.
|
||||
struct RewriteTablesVisitorData
|
||||
@ -124,7 +125,7 @@ void JoinToSubqueryTransformMatcher::visit(ASTSelectQuery & select, ASTPtr & ast
|
||||
if (!left)
|
||||
return;
|
||||
|
||||
SemanticSelectQuery::hideNames(select, hidden_names, subquery_name);
|
||||
//SemanticSelectQuery::hideNames(select, hidden_names, subquery_name);
|
||||
}
|
||||
|
||||
select.tables = std::make_shared<ASTTablesInSelectQuery>();
|
||||
@ -135,11 +136,15 @@ void JoinToSubqueryTransformMatcher::visit(ASTSelectQuery & select, ASTPtr & ast
|
||||
data.done = true;
|
||||
}
|
||||
|
||||
ASTPtr JoinToSubqueryTransformMatcher::replaceJoin(ASTSelectQuery & select, ASTPtr ast_left, ASTPtr ast_right, const String & subquery_alias)
|
||||
ASTPtr JoinToSubqueryTransformMatcher::replaceJoin(ASTSelectQuery &, ASTPtr ast_left, ASTPtr ast_right, const String & subquery_alias)
|
||||
{
|
||||
#if 0
|
||||
using RewriteMatcher = LinkedMatcher<
|
||||
OneTypeMatcher<RewriteTablesVisitorData>,
|
||||
OneTypeMatcher<AppendSemanticVisitorData>>;
|
||||
#else
|
||||
using RewriteMatcher = OneTypeMatcher<RewriteTablesVisitorData>;
|
||||
#endif
|
||||
using RewriteVisitor = InDepthNodeVisitor<RewriteMatcher, true>;
|
||||
|
||||
auto left = typeid_cast<const ASTTablesInSelectQueryElement *>(ast_left.get());
|
||||
@ -160,8 +165,12 @@ ASTPtr JoinToSubqueryTransformMatcher::replaceJoin(ASTSelectQuery & select, ASTP
|
||||
if (!res)
|
||||
throw Exception("Cannot parse rewrite query", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
#if 0
|
||||
RewriteVisitor::Data visitor_data =
|
||||
std::make_pair<RewriteTablesVisitorData, AppendSemanticVisitorData>({ast_left, ast_right}, {select.semantic});
|
||||
#else
|
||||
RewriteVisitor::Data visitor_data{ast_left, ast_right};
|
||||
#endif
|
||||
RewriteVisitor(visitor_data).visit(res);
|
||||
return res;
|
||||
}
|
||||
|
@ -133,8 +133,12 @@ void PredicateExpressionsOptimizer::getDependenciesAndQualifiedOfExpression(cons
|
||||
{
|
||||
if (const auto identifier = typeid_cast<ASTIdentifier *>(expression.get()))
|
||||
{
|
||||
if (!identifier->children.empty())
|
||||
dependencies_and_qualified.emplace_back(std::pair(identifier, expression->getAliasOrColumnName()));
|
||||
String table_alias;
|
||||
if (!identifier->name_parts.empty())
|
||||
{
|
||||
if (!tables_with_aliases.empty())
|
||||
table_alias = tables_with_aliases[0].getQualifiedNamePrefix();
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t best_table_pos = 0;
|
||||
@ -153,9 +157,11 @@ void PredicateExpressionsOptimizer::getDependenciesAndQualifiedOfExpression(cons
|
||||
}
|
||||
}
|
||||
|
||||
String qualified_name = tables_with_aliases[best_table_pos].getQualifiedNamePrefix() + expression->getAliasOrColumnName();
|
||||
dependencies_and_qualified.emplace_back(std::pair(identifier, qualified_name));
|
||||
table_alias = tables_with_aliases[best_table_pos].getQualifiedNamePrefix();
|
||||
}
|
||||
|
||||
String qualified_name = table_alias + expression->getAliasOrColumnName();
|
||||
dependencies_and_qualified.emplace_back(std::pair(identifier, qualified_name));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -356,31 +362,17 @@ ASTs PredicateExpressionsOptimizer::evaluateAsterisk(ASTSelectQuery * select_que
|
||||
if (qualified_asterisk->children.size() != 1)
|
||||
throw Exception("Logical error: qualified asterisk must have exactly one child", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
ASTIdentifier * ident = typeid_cast<ASTIdentifier *>(qualified_asterisk->children[0].get());
|
||||
if (!ident)
|
||||
throw Exception("Logical error: qualified asterisk must have identifier as its child", ErrorCodes::LOGICAL_ERROR);
|
||||
DatabaseAndTableWithAlias ident_db_and_name(qualified_asterisk->children[0]);
|
||||
|
||||
size_t num_components = ident->children.size();
|
||||
if (num_components > 2)
|
||||
throw Exception("Qualified asterisk cannot have more than two qualifiers", ErrorCodes::UNKNOWN_ELEMENT_IN_AST);
|
||||
|
||||
for (auto it = tables_expression.begin(); it != tables_expression.end(); ++it)
|
||||
for (auto it = tables_expression.begin(); it != tables_expression.end();)
|
||||
{
|
||||
const ASTTableExpression * table_expression = *it;
|
||||
DatabaseAndTableWithAlias database_and_table_with_alias(*table_expression, context.getCurrentDatabase());
|
||||
/// database.table.*
|
||||
if (num_components == 2 && !database_and_table_with_alias.database.empty()
|
||||
&& static_cast<const ASTIdentifier &>(*ident->children[0]).name == database_and_table_with_alias.database
|
||||
&& static_cast<const ASTIdentifier &>(*ident->children[1]).name == database_and_table_with_alias.table)
|
||||
continue;
|
||||
/// table.* or alias.*
|
||||
else if (num_components == 0
|
||||
&& ((!database_and_table_with_alias.table.empty() && ident->name == database_and_table_with_alias.table)
|
||||
|| (!database_and_table_with_alias.alias.empty() && ident->name == database_and_table_with_alias.alias)))
|
||||
continue;
|
||||
|
||||
if (ident_db_and_name.satisfies(database_and_table_with_alias, true))
|
||||
++it;
|
||||
else
|
||||
/// It's not a required table
|
||||
tables_expression.erase(it);
|
||||
it = tables_expression.erase(it); /// It's not a required table
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -168,18 +168,11 @@ void QueryNormalizer::visit(ASTExpressionList & node, const ASTPtr &, Data & dat
|
||||
}
|
||||
else if (const auto * qualified_asterisk = typeid_cast<const ASTQualifiedAsterisk *>(child.get()))
|
||||
{
|
||||
const ASTIdentifier * identifier = typeid_cast<const ASTIdentifier *>(qualified_asterisk->children[0].get());
|
||||
size_t num_components = identifier->children.size();
|
||||
DatabaseAndTableWithAlias ident_db_and_name(qualified_asterisk->children[0]);
|
||||
|
||||
for (const auto & [table_name, table_columns] : tables_with_columns)
|
||||
{
|
||||
if ((num_components == 2 /// database.table.*
|
||||
&& !table_name.database.empty() /// This is normal (not a temporary) table.
|
||||
&& static_cast<const ASTIdentifier &>(*identifier->children[0]).name == table_name.database
|
||||
&& static_cast<const ASTIdentifier &>(*identifier->children[1]).name == table_name.table)
|
||||
|| (num_components == 0 /// t.*
|
||||
&& ((!table_name.table.empty() && identifier->name == table_name.table) /// table.*
|
||||
|| (!table_name.alias.empty() && identifier->name == table_name.alias)))) /// alias.*
|
||||
if (ident_db_and_name.satisfies(table_name, true))
|
||||
{
|
||||
for (const auto & column_name : table_columns)
|
||||
node.children.emplace_back(std::make_shared<ASTIdentifier>(column_name));
|
||||
|
@ -1,43 +0,0 @@
|
||||
#pragma once
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Additional information for ASTSelectQuery
|
||||
class SemanticSelectQuery : public ISemantic
|
||||
{
|
||||
public:
|
||||
SemanticPtr clone() const override { return std::make_shared<SemanticSelectQuery>(*this); }
|
||||
|
||||
std::vector<String> getPossibleNames(const String & name) const
|
||||
{
|
||||
std::vector<String> res;
|
||||
res.push_back(name);
|
||||
|
||||
for (auto it = hidings.find(name); it != hidings.end(); it = hidings.find(it->second))
|
||||
res.push_back(it->second);
|
||||
return res;
|
||||
}
|
||||
|
||||
static void hideNames(ASTSelectQuery & select, const std::vector<String> & hidden, const String & new_name)
|
||||
{
|
||||
if (!select.semantic)
|
||||
select.semantic = std::make_shared<SemanticSelectQuery>();
|
||||
|
||||
auto & sema = static_cast<SemanticSelectQuery &>(*select.semantic);
|
||||
sema.hideNames(hidden, new_name);
|
||||
}
|
||||
|
||||
private:
|
||||
std::unordered_map<String, String> hidings;
|
||||
|
||||
void hideNames(const std::vector<String> & hidden, const String & new_name)
|
||||
{
|
||||
for (auto & name : hidden)
|
||||
hidings.emplace(name, new_name);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -89,38 +89,17 @@ std::vector<ASTPtr *> TranslateQualifiedNamesMatcher::visit(const ASTIdentifier
|
||||
|
||||
std::vector<ASTPtr *> TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAsterisk & , const ASTPtr & ast, Data & data)
|
||||
{
|
||||
const std::vector<DatabaseAndTableWithAlias> & tables = data.tables;
|
||||
|
||||
if (ast->children.size() != 1)
|
||||
throw Exception("Logical error: qualified asterisk must have exactly one child", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
ASTIdentifier * ident = typeid_cast<ASTIdentifier *>(ast->children[0].get());
|
||||
if (!ident)
|
||||
throw Exception("Logical error: qualified asterisk must have identifier as its child", ErrorCodes::LOGICAL_ERROR);
|
||||
auto & ident = ast->children[0];
|
||||
|
||||
size_t num_components = ident->children.size();
|
||||
if (num_components > 2)
|
||||
throw Exception("Qualified asterisk cannot have more than two qualifiers", ErrorCodes::UNKNOWN_ELEMENT_IN_AST);
|
||||
/// @note it could contain table alias as table name.
|
||||
DatabaseAndTableWithAlias db_and_table(ident);
|
||||
|
||||
DatabaseAndTableWithAlias db_and_table(*ident);
|
||||
|
||||
for (const auto & table_names : tables)
|
||||
{
|
||||
/// database.table.*, table.* or alias.*
|
||||
if (num_components == 2)
|
||||
{
|
||||
if (!table_names.database.empty() &&
|
||||
db_and_table.database == table_names.database &&
|
||||
db_and_table.table == table_names.table)
|
||||
for (const auto & known_table : data.tables)
|
||||
if (db_and_table.satisfies(known_table, true))
|
||||
return {};
|
||||
}
|
||||
else if (num_components == 0)
|
||||
{
|
||||
if ((!table_names.table.empty() && db_and_table.table == table_names.table) ||
|
||||
(!table_names.alias.empty() && db_and_table.table == table_names.alias))
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
throw Exception("Unknown qualified identifier: " + ident->getAliasOrColumnName(), ErrorCodes::UNKNOWN_IDENTIFIER);
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <Core/Names.h>
|
||||
#include <Interpreters/DatabaseAndTableWithAlias.h>
|
||||
#include <Interpreters/InDepthNodeVisitor.h>
|
||||
|
||||
|
@ -18,14 +18,14 @@ void ASTIdentifier::formatImplWithoutAlias(const FormatSettings & settings, Form
|
||||
|
||||
/// A simple or compound identifier?
|
||||
|
||||
if (children.size() > 1)
|
||||
if (name_parts.size() > 1)
|
||||
{
|
||||
for (size_t i = 0, size = children.size(); i < size; ++i)
|
||||
for (size_t i = 0, size = name_parts.size(); i < size; ++i)
|
||||
{
|
||||
if (i != 0)
|
||||
settings.ostr << '.';
|
||||
|
||||
format_element(static_cast<const ASTIdentifier &>(*children[i].get()).name);
|
||||
format_element(name_parts[i]);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -44,11 +44,7 @@ ASTPtr createTableIdentifier(const String & database_name, const String & table_
|
||||
if (database_name.empty())
|
||||
return ASTIdentifier::createSpecial(table_name);
|
||||
|
||||
ASTPtr database = ASTIdentifier::createSpecial(database_name);
|
||||
ASTPtr table = ASTIdentifier::createSpecial(table_name);
|
||||
|
||||
ASTPtr database_and_table = ASTIdentifier::createSpecial(database_name + "." + table_name);
|
||||
database_and_table->children = {database, table};
|
||||
ASTPtr database_and_table = ASTIdentifier::createSpecial(database_name + "." + table_name, {database_name, table_name});
|
||||
return database_and_table;
|
||||
}
|
||||
|
||||
@ -117,4 +113,35 @@ void setIdentifierSpecial(ASTPtr & ast)
|
||||
id->setSpecial();
|
||||
}
|
||||
|
||||
void addIdentifierQualifier(ASTIdentifier & identifier, const String & database, const String & table, const String & alias)
|
||||
{
|
||||
if (!alias.empty())
|
||||
{
|
||||
identifier.name_parts.emplace_back(alias);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!database.empty())
|
||||
identifier.name_parts.emplace_back(database);
|
||||
identifier.name_parts.emplace_back(table);
|
||||
}
|
||||
}
|
||||
|
||||
bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & database, const String & table)
|
||||
{
|
||||
size_t num_components = identifier.name_parts.size();
|
||||
if (num_components >= 3)
|
||||
return identifier.name_parts[0] == database &&
|
||||
identifier.name_parts[1] == table;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & table)
|
||||
{
|
||||
size_t num_components = identifier.name_parts.size();
|
||||
if (num_components >= 2)
|
||||
return identifier.name_parts[0] == table;
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -12,11 +12,14 @@ namespace DB
|
||||
class ASTIdentifier : public ASTWithAlias
|
||||
{
|
||||
public:
|
||||
/// name. The composite identifier here will have a concatenated name (of the form a.b.c), and individual components will be available inside the children.
|
||||
/// The composite identifier will have a concatenated name (of the form a.b.c),
|
||||
/// and individual components will be available inside the name_parts.
|
||||
String name;
|
||||
std::vector<String> name_parts;
|
||||
|
||||
ASTIdentifier(const String & name_)
|
||||
ASTIdentifier(const String & name_, std::vector<String> && name_parts_ = {})
|
||||
: name(name_)
|
||||
, name_parts(name_parts_)
|
||||
, special(false)
|
||||
{
|
||||
range = StringRange(name.data(), name.data() + name.size());
|
||||
@ -37,11 +40,13 @@ protected:
|
||||
void appendColumnNameImpl(WriteBuffer & ostr) const override;
|
||||
|
||||
private:
|
||||
using ASTWithAlias::children; /// ASTIdentifier is child free
|
||||
|
||||
bool special; /// TODO: it would be ptr to semantic here
|
||||
|
||||
static std::shared_ptr<ASTIdentifier> createSpecial(const String & name_)
|
||||
static std::shared_ptr<ASTIdentifier> createSpecial(const String & name, std::vector<String> && name_parts = {})
|
||||
{
|
||||
auto ret = std::make_shared<ASTIdentifier>(name_);
|
||||
auto ret = std::make_shared<ASTIdentifier>(name, std::move(name_parts));
|
||||
ret->special = true;
|
||||
return ret;
|
||||
}
|
||||
@ -77,5 +82,8 @@ std::optional<String> getTableIdentifierName(const ASTIdentifier & node);
|
||||
std::optional<String> getTableIdentifierName(const ASTPtr & ast);
|
||||
|
||||
void setIdentifierSpecial(ASTPtr & ast);
|
||||
void addIdentifierQualifier(ASTIdentifier & identifier, const String & database, const String & table, const String & alias);
|
||||
bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & table_or_alias);
|
||||
bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & database, const String & table);
|
||||
|
||||
}
|
||||
|
@ -51,8 +51,6 @@ ASTPtr ASTSelectQuery::clone() const
|
||||
|
||||
#undef CLONE
|
||||
|
||||
if (semantic)
|
||||
res->semantic = semantic->clone();
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -169,19 +169,19 @@ bool ParserCompoundIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & ex
|
||||
return false;
|
||||
|
||||
String name;
|
||||
std::vector<String> parts;
|
||||
const ASTExpressionList & list = static_cast<const ASTExpressionList &>(*id_list.get());
|
||||
for (const auto & child : list.children)
|
||||
{
|
||||
if (!name.empty())
|
||||
name += '.';
|
||||
name += *getIdentifierName(child);
|
||||
parts.emplace_back(*getIdentifierName(child));
|
||||
name += parts.back();
|
||||
}
|
||||
|
||||
node = std::make_shared<ASTIdentifier>(name);
|
||||
|
||||
/// In `children`, remember the identifiers-components, if there are more than one.
|
||||
if (list.children.size() > 1)
|
||||
node->children.insert(node->children.end(), list.children.begin(), list.children.end());
|
||||
if (parts.size() == 1)
|
||||
parts.clear();
|
||||
node = std::make_shared<ASTIdentifier>(name, std::move(parts));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -31,20 +31,6 @@ class IAST;
|
||||
using ASTPtr = std::shared_ptr<IAST>;
|
||||
using ASTs = std::vector<ASTPtr>;
|
||||
|
||||
class ISemantic;
|
||||
using SemanticPtr = std::shared_ptr<ISemantic>;
|
||||
|
||||
/// Interfase to set additional information to IAST. Derived classes should be named according to their AST nodes' types:
|
||||
/// ASTIdentifier => SemanticIdentifer, ASTSome => SemanticSome, ...
|
||||
class ISemantic
|
||||
{
|
||||
public:
|
||||
virtual ~ISemantic() = default;
|
||||
ISemantic() = default;
|
||||
ISemantic(const ISemantic &) = default;
|
||||
virtual SemanticPtr clone() const = 0;
|
||||
};
|
||||
|
||||
class WriteBuffer;
|
||||
|
||||
|
||||
@ -58,7 +44,6 @@ public:
|
||||
|
||||
/// This pointer does not allow it to be deleted while the range refers to it.
|
||||
StringPtr owned_string;
|
||||
SemanticPtr semantic;
|
||||
|
||||
virtual ~IAST() = default;
|
||||
IAST() = default;
|
||||
|
@ -219,6 +219,8 @@ public:
|
||||
/// If commit() was not called, deletes temporary files, canceling the ALTER.
|
||||
~AlterDataPartTransaction();
|
||||
|
||||
const String & getPartName() const { return data_part->name; }
|
||||
|
||||
/// Review the changes before the commit.
|
||||
const NamesAndTypesList & getNewColumns() const { return new_columns; }
|
||||
const DataPart::Checksums & getNewChecksums() const { return new_checksums; }
|
||||
|
@ -312,7 +312,11 @@ String MergeTreeDataPartChecksums::getTotalChecksumHex() const
|
||||
void MinimalisticDataPartChecksums::serialize(WriteBuffer & to) const
|
||||
{
|
||||
writeString("checksums format version: 5\n", to);
|
||||
serializeWithoutHeader(to);
|
||||
}
|
||||
|
||||
void MinimalisticDataPartChecksums::serializeWithoutHeader(WriteBuffer & to) const
|
||||
{
|
||||
writeVarUInt(num_compressed_files, to);
|
||||
writeVarUInt(num_uncompressed_files, to);
|
||||
|
||||
@ -337,26 +341,31 @@ bool MinimalisticDataPartChecksums::deserialize(ReadBuffer & in)
|
||||
|
||||
if (format_version < MINIMAL_VERSION_WITH_MINIMALISTIC_CHECKSUMS)
|
||||
{
|
||||
auto full_checksums_ptr = std::make_unique<MergeTreeDataPartChecksums>();
|
||||
if (!full_checksums_ptr->read(in, format_version))
|
||||
MergeTreeDataPartChecksums new_full_checksums;
|
||||
if (!new_full_checksums.read(in, format_version))
|
||||
return false;
|
||||
|
||||
computeTotalChecksums(*full_checksums_ptr);
|
||||
full_checksums = std::move(full_checksums_ptr);
|
||||
computeTotalChecksums(new_full_checksums);
|
||||
full_checksums = std::move(new_full_checksums);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (format_version > MINIMAL_VERSION_WITH_MINIMALISTIC_CHECKSUMS)
|
||||
throw Exception("Unknown checksums format version: " + DB::toString(format_version), ErrorCodes::UNKNOWN_FORMAT);
|
||||
|
||||
deserializeWithoutHeader(in);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void MinimalisticDataPartChecksums::deserializeWithoutHeader(ReadBuffer & in)
|
||||
{
|
||||
readVarUInt(num_compressed_files, in);
|
||||
readVarUInt(num_uncompressed_files, in);
|
||||
|
||||
readPODBinary(hash_of_all_files, in);
|
||||
readPODBinary(hash_of_uncompressed_files, in);
|
||||
readPODBinary(uncompressed_hash_of_compressed_files, in);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void MinimalisticDataPartChecksums::computeTotalChecksums(const MergeTreeDataPartChecksums & full_checksums_)
|
||||
@ -410,7 +419,7 @@ String MinimalisticDataPartChecksums::getSerializedString(const MergeTreeDataPar
|
||||
return checksums.getSerializedString();
|
||||
}
|
||||
|
||||
void MinimalisticDataPartChecksums::checkEqual(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files)
|
||||
void MinimalisticDataPartChecksums::checkEqual(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files) const
|
||||
{
|
||||
if (full_checksums && rhs.full_checksums)
|
||||
full_checksums->checkEqual(*rhs.full_checksums, check_uncompressed_hash_in_compressed_files);
|
||||
@ -419,7 +428,7 @@ void MinimalisticDataPartChecksums::checkEqual(const MinimalisticDataPartChecksu
|
||||
checkEqualImpl(rhs, check_uncompressed_hash_in_compressed_files);
|
||||
}
|
||||
|
||||
void MinimalisticDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files)
|
||||
void MinimalisticDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files) const
|
||||
{
|
||||
if (full_checksums)
|
||||
full_checksums->checkEqual(rhs, check_uncompressed_hash_in_compressed_files);
|
||||
@ -430,7 +439,7 @@ void MinimalisticDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums
|
||||
checkEqualImpl(rhs_minimalistic, check_uncompressed_hash_in_compressed_files);
|
||||
}
|
||||
|
||||
void MinimalisticDataPartChecksums::checkEqualImpl(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files)
|
||||
void MinimalisticDataPartChecksums::checkEqualImpl(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files) const
|
||||
{
|
||||
if (num_compressed_files != rhs.num_compressed_files || num_uncompressed_files != rhs.num_uncompressed_files)
|
||||
{
|
||||
|
@ -4,7 +4,7 @@
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <city.h>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
|
||||
|
||||
class SipHash;
|
||||
@ -112,7 +112,7 @@ struct MinimalisticDataPartChecksums
|
||||
}
|
||||
|
||||
/// Is set only for old formats
|
||||
std::unique_ptr<MergeTreeDataPartChecksums> full_checksums;
|
||||
std::optional<MergeTreeDataPartChecksums> full_checksums;
|
||||
|
||||
static constexpr size_t MINIMAL_VERSION_WITH_MINIMALISTIC_CHECKSUMS = 5;
|
||||
|
||||
@ -120,15 +120,17 @@ struct MinimalisticDataPartChecksums
|
||||
void computeTotalChecksums(const MergeTreeDataPartChecksums & full_checksums);
|
||||
|
||||
bool deserialize(ReadBuffer & in);
|
||||
void deserializeWithoutHeader(ReadBuffer & in);
|
||||
static MinimalisticDataPartChecksums deserializeFrom(const String & s);
|
||||
|
||||
void serialize(WriteBuffer & to) const;
|
||||
void serializeWithoutHeader(WriteBuffer & to) const;
|
||||
String getSerializedString();
|
||||
static String getSerializedString(const MergeTreeDataPartChecksums & full_checksums, bool minimalistic);
|
||||
|
||||
void checkEqual(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files);
|
||||
void checkEqual(const MergeTreeDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files);
|
||||
void checkEqualImpl(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files);
|
||||
void checkEqual(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files) const;
|
||||
void checkEqual(const MergeTreeDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files) const;
|
||||
void checkEqualImpl(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files) const;
|
||||
};
|
||||
|
||||
|
||||
|
@ -148,6 +148,13 @@ struct MergeTreeSettings
|
||||
*/ \
|
||||
M(SettingBool, use_minimalistic_checksums_in_zookeeper, true) \
|
||||
\
|
||||
/** Store part header (checksums and columns) in a compact format and a single part znode \
|
||||
* instead of separate znodes (<part>/columns and <part>/checksums). \
|
||||
* This can dramatically reduce snapshot size in ZooKeeper. \
|
||||
* Before enabling check that all replicas support new format. \
|
||||
*/ \
|
||||
M(SettingBool, use_minimalistic_part_header_in_zookeeper, false) \
|
||||
\
|
||||
/** How many records about mutations that are done to keep. \
|
||||
* If zero, then keep all of them */ \
|
||||
M(SettingUInt64, finished_mutations_to_keep, 100) \
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <Storages/MergeTree/ReplicatedMergeTreeAlterThread.h>
|
||||
#include <Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h>
|
||||
#include <Storages/MergeTree/ReplicatedMergeTreePartHeader.h>
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
#include <Storages/StorageReplicatedMergeTree.h>
|
||||
#include <Common/setThreadName.h>
|
||||
@ -155,32 +156,9 @@ void ReplicatedMergeTreeAlterThread::run()
|
||||
if (!transaction)
|
||||
continue;
|
||||
|
||||
storage.updatePartHeaderInZooKeeperAndCommit(zookeeper, *transaction);
|
||||
|
||||
++changed_parts;
|
||||
|
||||
/// Update part metadata in ZooKeeper.
|
||||
Coordination::Requests ops;
|
||||
ops.emplace_back(zkutil::makeSetRequest(
|
||||
storage.replica_path + "/parts/" + part->name + "/columns", transaction->getNewColumns().toString(), -1));
|
||||
ops.emplace_back(zkutil::makeSetRequest(
|
||||
storage.replica_path + "/parts/" + part->name + "/checksums",
|
||||
storage.getChecksumsForZooKeeper(transaction->getNewChecksums()),
|
||||
-1));
|
||||
|
||||
try
|
||||
{
|
||||
zookeeper->multi(ops);
|
||||
}
|
||||
catch (const Coordination::Exception & e)
|
||||
{
|
||||
/// The part does not exist in ZK. We will add to queue for verification - maybe the part is superfluous, and it must be removed locally.
|
||||
if (e.code == Coordination::ZNONODE)
|
||||
storage.enqueuePartForCheck(part->name);
|
||||
|
||||
throw;
|
||||
}
|
||||
|
||||
/// Apply file changes.
|
||||
transaction->commit();
|
||||
}
|
||||
|
||||
/// Columns sizes could be quietly changed in case of MODIFY/ADD COLUMN
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h>
|
||||
#include <Storages/MergeTree/checkDataPart.h>
|
||||
#include <Storages/MergeTree/ReplicatedMergeTreePartHeader.h>
|
||||
#include <Storages/StorageReplicatedMergeTree.h>
|
||||
#include <Common/setThreadName.h>
|
||||
|
||||
@ -204,22 +205,34 @@ void ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name)
|
||||
auto zookeeper = storage.getZooKeeper();
|
||||
auto table_lock = storage.lockStructure(false);
|
||||
|
||||
auto local_part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksums(
|
||||
part->columns, part->checksums);
|
||||
|
||||
String part_path = storage.replica_path + "/parts/" + part_name;
|
||||
String part_znode;
|
||||
/// If the part is in ZooKeeper, check its data with its checksums, and them with ZooKeeper.
|
||||
if (zookeeper->exists(storage.replica_path + "/parts/" + part_name))
|
||||
if (zookeeper->tryGet(part_path, part_znode))
|
||||
{
|
||||
LOG_WARNING(log, "Checking data of part " << part_name << ".");
|
||||
|
||||
try
|
||||
{
|
||||
auto zk_checksums = MinimalisticDataPartChecksums::deserializeFrom(
|
||||
zookeeper->get(storage.replica_path + "/parts/" + part_name + "/checksums"));
|
||||
zk_checksums.checkEqual(part->checksums, true);
|
||||
ReplicatedMergeTreePartHeader zk_part_header;
|
||||
if (!part_znode.empty())
|
||||
zk_part_header = ReplicatedMergeTreePartHeader::fromString(part_znode);
|
||||
else
|
||||
{
|
||||
String columns_znode = zookeeper->get(part_path + "/columns");
|
||||
String checksums_znode = zookeeper->get(part_path + "/checksums");
|
||||
zk_part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksumsZNodes(
|
||||
columns_znode, checksums_znode);
|
||||
}
|
||||
|
||||
auto zk_columns = NamesAndTypesList::parse(
|
||||
zookeeper->get(storage.replica_path + "/parts/" + part_name + "/columns"));
|
||||
if (part->columns != zk_columns)
|
||||
if (local_part_header.getColumnsHash() != zk_part_header.getColumnsHash())
|
||||
throw Exception("Columns of local part " + part_name + " are different from ZooKeeper", ErrorCodes::TABLE_DIFFERS_TOO_MUCH);
|
||||
|
||||
zk_part_header.getChecksums().checkEqual(local_part_header.getChecksums(), true);
|
||||
|
||||
checkDataPart(
|
||||
storage.data.getFullPath() + part_name,
|
||||
storage.data.index_granularity,
|
||||
|
@ -0,0 +1,66 @@
|
||||
#include <Storages/MergeTree/ReplicatedMergeTreePartHeader.h>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <Common/SipHash.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
static std::array<char, 16> getSipHash(const String & str)
|
||||
{
|
||||
SipHash hash;
|
||||
hash.update(str.data(), str.size());
|
||||
std::array<char, 16> result;
|
||||
hash.get128(result.data());
|
||||
return result;
|
||||
}
|
||||
|
||||
ReplicatedMergeTreePartHeader ReplicatedMergeTreePartHeader::fromColumnsAndChecksumsZNodes(
|
||||
const String & columns_znode, const String & checksums_znode)
|
||||
{
|
||||
auto columns_hash = getSipHash(columns_znode);
|
||||
auto checksums = MinimalisticDataPartChecksums::deserializeFrom(checksums_znode);
|
||||
return ReplicatedMergeTreePartHeader(std::move(columns_hash), std::move(checksums));
|
||||
}
|
||||
|
||||
ReplicatedMergeTreePartHeader ReplicatedMergeTreePartHeader::fromColumnsAndChecksums(
|
||||
const NamesAndTypesList & columns,
|
||||
const MergeTreeDataPartChecksums & full_checksums)
|
||||
{
|
||||
MinimalisticDataPartChecksums checksums;
|
||||
checksums.computeTotalChecksums(full_checksums);
|
||||
return ReplicatedMergeTreePartHeader(getSipHash(columns.toString()), std::move(checksums));
|
||||
}
|
||||
|
||||
void ReplicatedMergeTreePartHeader::read(ReadBuffer & in)
|
||||
{
|
||||
in >> "part header format version: 1\n";
|
||||
in.readStrict(columns_hash.data(), columns_hash.size());
|
||||
checksums.deserializeWithoutHeader(in);
|
||||
}
|
||||
|
||||
ReplicatedMergeTreePartHeader ReplicatedMergeTreePartHeader::fromString(const String & str)
|
||||
{
|
||||
ReadBufferFromString in(str);
|
||||
ReplicatedMergeTreePartHeader result;
|
||||
result.read(in);
|
||||
return result;
|
||||
}
|
||||
|
||||
void ReplicatedMergeTreePartHeader::write(WriteBuffer & out) const
|
||||
{
|
||||
writeString("part header format version: 1\n", out);
|
||||
out.write(columns_hash.data(), columns_hash.size());
|
||||
checksums.serializeWithoutHeader(out);
|
||||
}
|
||||
|
||||
String ReplicatedMergeTreePartHeader::toString() const
|
||||
{
|
||||
WriteBufferFromOwnString out;
|
||||
write(out);
|
||||
return out.str();
|
||||
}
|
||||
|
||||
}
|
50
dbms/src/Storages/MergeTree/ReplicatedMergeTreePartHeader.h
Normal file
50
dbms/src/Storages/MergeTree/ReplicatedMergeTreePartHeader.h
Normal file
@ -0,0 +1,50 @@
|
||||
#pragma once
|
||||
|
||||
#include <Storages/MergeTree/MergeTreeDataPartChecksum.h>
|
||||
#include <Core/Types.h>
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <array>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class NamesAndTypesList;
|
||||
|
||||
/// This class provides a compact representation of part metadata (available columns and checksums)
|
||||
/// that is intended to be stored in the part znode in ZooKeeper.
|
||||
/// It can also be initialized from the legacy format (from the contents of separate <part>/columns
|
||||
/// and <part>/checksums znodes).
|
||||
class ReplicatedMergeTreePartHeader
|
||||
{
|
||||
public:
|
||||
ReplicatedMergeTreePartHeader() = default;
|
||||
|
||||
static ReplicatedMergeTreePartHeader fromColumnsAndChecksumsZNodes(
|
||||
const String & columns_znode, const String & checksums_znode);
|
||||
|
||||
static ReplicatedMergeTreePartHeader fromColumnsAndChecksums(
|
||||
const NamesAndTypesList & columns, const MergeTreeDataPartChecksums & full_checksums);
|
||||
|
||||
void read(ReadBuffer & in);
|
||||
static ReplicatedMergeTreePartHeader fromString(const String & str);
|
||||
|
||||
void write(WriteBuffer & out) const;
|
||||
String toString() const;
|
||||
|
||||
const std::array<char, 16> & getColumnsHash() const { return columns_hash; }
|
||||
const MinimalisticDataPartChecksums & getChecksums() const { return checksums; }
|
||||
|
||||
private:
|
||||
ReplicatedMergeTreePartHeader(std::array<char, 16> columns_hash_, MinimalisticDataPartChecksums checksums_)
|
||||
: columns_hash(std::move(columns_hash_)), checksums(std::move(checksums_))
|
||||
{
|
||||
}
|
||||
|
||||
std::array<char, 16> columns_hash;
|
||||
MinimalisticDataPartChecksums checksums;
|
||||
};
|
||||
|
||||
}
|
@ -30,7 +30,7 @@ namespace ErrorCodes
|
||||
static void extractDependentTable(ASTSelectQuery & query, String & select_database_name, String & select_table_name)
|
||||
{
|
||||
auto db_and_table = getDatabaseAndTable(query, 0);
|
||||
ASTPtr subquery = getTableFunctionOrSubquery(query, 0);
|
||||
ASTPtr subquery = extractTableExpression(query, 0);
|
||||
|
||||
if (!db_and_table && !subquery)
|
||||
return;
|
||||
@ -69,7 +69,7 @@ static void checkAllowedQueries(const ASTSelectQuery & query)
|
||||
if (query.prewhere_expression || query.final() || query.sample_size())
|
||||
throw Exception("MATERIALIZED VIEW cannot have PREWHERE, SAMPLE or FINAL.", DB::ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW);
|
||||
|
||||
ASTPtr subquery = getTableFunctionOrSubquery(query, 0);
|
||||
ASTPtr subquery = extractTableExpression(query, 0);
|
||||
if (!subquery)
|
||||
return;
|
||||
|
||||
|
@ -8,13 +8,14 @@
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
#include <Storages/StorageReplicatedMergeTree.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataPart.h>
|
||||
#include <Storages/MergeTree/MergeList.h>
|
||||
#include <Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h>
|
||||
#include <Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h>
|
||||
#include <Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h>
|
||||
#include <Storages/MergeTree/ReplicatedMergeTreeMutationEntry.h>
|
||||
#include <Storages/MergeTree/MergeList.h>
|
||||
#include <Storages/MergeTree/ReplicatedMergeTreeAddress.h>
|
||||
#include <Storages/MergeTree/ReplicatedMergeTreeQuorumAddedParts.h>
|
||||
#include <Storages/MergeTree/ReplicatedMergeTreePartHeader.h>
|
||||
|
||||
#include <Databases/IDatabase.h>
|
||||
|
||||
@ -566,11 +567,8 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks)
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_ERROR(log, "Fetching missing part " << missing_name);
|
||||
parts_to_fetch.push_back(missing_name);
|
||||
}
|
||||
}
|
||||
|
||||
for (const String & name : parts_to_fetch)
|
||||
expected_parts.erase(name);
|
||||
@ -671,25 +669,49 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks)
|
||||
removePartsFromZooKeeper(zookeeper, Strings(expected_parts.begin(), expected_parts.end()));
|
||||
}
|
||||
|
||||
/// Add to the queue job to pick up the missing parts from other replicas and remove from ZK the information that we have them.
|
||||
for (const String & name : parts_to_fetch)
|
||||
/// Add to the queue jobs to pick up the missing parts from other replicas and remove from ZK the information that we have them.
|
||||
|
||||
std::vector<std::future<Coordination::ExistsResponse>> exists_futures;
|
||||
exists_futures.reserve(parts_to_fetch.size());
|
||||
for (const String & part_name : parts_to_fetch)
|
||||
{
|
||||
LOG_ERROR(log, "Removing missing part from ZooKeeper and queueing a fetch: " << name);
|
||||
String part_path = replica_path + "/parts/" + part_name;
|
||||
exists_futures.emplace_back(zookeeper->asyncExists(part_path));
|
||||
}
|
||||
|
||||
std::vector<std::future<Coordination::MultiResponse>> enqueue_futures;
|
||||
enqueue_futures.reserve(parts_to_fetch.size());
|
||||
for (size_t i = 0; i < parts_to_fetch.size(); ++i)
|
||||
{
|
||||
const String & part_name = parts_to_fetch[i];
|
||||
LOG_ERROR(log, "Removing locally missing part from ZooKeeper and queueing a fetch: " << part_name);
|
||||
|
||||
Coordination::Requests ops;
|
||||
|
||||
time_t part_create_time = 0;
|
||||
Coordination::ExistsResponse exists_resp = exists_futures[i].get();
|
||||
if (!exists_resp.error)
|
||||
{
|
||||
part_create_time = exists_resp.stat.ctime / 1000;
|
||||
removePartFromZooKeeper(part_name, ops, exists_resp.stat.numChildren > 0);
|
||||
}
|
||||
|
||||
LogEntry log_entry;
|
||||
log_entry.type = LogEntry::GET_PART;
|
||||
log_entry.source_replica = "";
|
||||
log_entry.new_part_name = name;
|
||||
log_entry.create_time = tryGetPartCreateTime(zookeeper, replica_path, name);
|
||||
log_entry.new_part_name = part_name;
|
||||
log_entry.create_time = part_create_time;
|
||||
|
||||
/// We assume that this occurs before the queue is loaded (queue.initialize).
|
||||
Coordination::Requests ops;
|
||||
removePartFromZooKeeper(name, ops);
|
||||
ops.emplace_back(zkutil::makeCreateRequest(
|
||||
replica_path + "/queue/queue-", log_entry.toString(), zkutil::CreateMode::PersistentSequential));
|
||||
zookeeper->multi(ops);
|
||||
|
||||
enqueue_futures.emplace_back(zookeeper->asyncMulti(ops));
|
||||
}
|
||||
|
||||
for (auto & future : enqueue_futures)
|
||||
future.get();
|
||||
|
||||
/// Remove extra local parts.
|
||||
for (const MergeTreeData::DataPartPtr & part : unexpected_parts)
|
||||
{
|
||||
@ -708,18 +730,19 @@ void StorageReplicatedMergeTree::checkPartChecksumsAndAddCommitOps(const zkutil:
|
||||
check(part->columns);
|
||||
int expected_columns_version = columns_version;
|
||||
|
||||
auto local_part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksums(
|
||||
part->columns, part->checksums);
|
||||
|
||||
Strings replicas = zookeeper->getChildren(zookeeper_path + "/replicas");
|
||||
std::shuffle(replicas.begin(), replicas.end(), rng);
|
||||
String expected_columns_str = part->columns.toString();
|
||||
bool has_been_alredy_added = false;
|
||||
bool has_been_already_added = false;
|
||||
|
||||
for (const String & replica : replicas)
|
||||
{
|
||||
Coordination::Stat stat_before, stat_after;
|
||||
String current_part_path = zookeeper_path + "/replicas/" + replica + "/parts/" + part_name;
|
||||
|
||||
String columns_str;
|
||||
if (!zookeeper->tryGet(current_part_path + "/columns", columns_str, &stat_before))
|
||||
String part_zk_str;
|
||||
if (!zookeeper->tryGet(current_part_path, part_zk_str))
|
||||
{
|
||||
if (absent_replicas_paths)
|
||||
absent_replicas_paths->emplace(current_part_path);
|
||||
@ -727,30 +750,41 @@ void StorageReplicatedMergeTree::checkPartChecksumsAndAddCommitOps(const zkutil:
|
||||
continue;
|
||||
}
|
||||
|
||||
if (columns_str != expected_columns_str)
|
||||
ReplicatedMergeTreePartHeader replica_part_header;
|
||||
if (!part_zk_str.empty())
|
||||
replica_part_header = ReplicatedMergeTreePartHeader::fromString(part_zk_str);
|
||||
else
|
||||
{
|
||||
LOG_INFO(log, "Not checking checksums of part " << part_name << " with replica " << replica
|
||||
<< " because columns are different");
|
||||
continue;
|
||||
}
|
||||
|
||||
Coordination::Stat columns_stat_before, columns_stat_after;
|
||||
String columns_str;
|
||||
String checksums_str;
|
||||
/// Let's check that the node's version with the columns did not change while we were reading the checksums.
|
||||
/// This ensures that the columns and the checksum refer to the same data.
|
||||
if (!zookeeper->tryGet(current_part_path + "/checksums", checksums_str) ||
|
||||
!zookeeper->exists(current_part_path + "/columns", &stat_after) ||
|
||||
stat_before.version != stat_after.version)
|
||||
if (!zookeeper->tryGet(current_part_path + "/columns", columns_str, &columns_stat_before) ||
|
||||
!zookeeper->tryGet(current_part_path + "/checksums", checksums_str) ||
|
||||
!zookeeper->exists(current_part_path + "/columns", &columns_stat_after) ||
|
||||
columns_stat_before.version != columns_stat_after.version)
|
||||
{
|
||||
LOG_INFO(log, "Not checking checksums of part " << part_name << " with replica " << replica
|
||||
<< " because part changed while we were reading its checksums");
|
||||
continue;
|
||||
}
|
||||
|
||||
auto zk_checksums = MinimalisticDataPartChecksums::deserializeFrom(checksums_str);
|
||||
zk_checksums.checkEqual(part->checksums, true);
|
||||
replica_part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksumsZNodes(
|
||||
columns_str, checksums_str);
|
||||
}
|
||||
|
||||
if (replica_part_header.getColumnsHash() != local_part_header.getColumnsHash())
|
||||
{
|
||||
LOG_INFO(log, "Not checking checksums of part " << part_name << " with replica " << replica
|
||||
<< " because columns are different");
|
||||
continue;
|
||||
}
|
||||
|
||||
replica_part_header.getChecksums().checkEqual(local_part_header.getChecksums(), true);
|
||||
|
||||
if (replica == replica_name)
|
||||
has_been_alredy_added = true;
|
||||
has_been_already_added = true;
|
||||
|
||||
/// If we verify checksums in "sequential manner" (i.e. recheck absence of checksums on other replicas when commit)
|
||||
/// then it is enough to verify checksums on at least one replica since checksums on other replicas must be the same.
|
||||
@ -761,12 +795,20 @@ void StorageReplicatedMergeTree::checkPartChecksumsAndAddCommitOps(const zkutil:
|
||||
}
|
||||
}
|
||||
|
||||
if (!has_been_alredy_added)
|
||||
if (!has_been_already_added)
|
||||
{
|
||||
String part_path = replica_path + "/parts/" + part_name;
|
||||
|
||||
ops.emplace_back(zkutil::makeCheckRequest(
|
||||
zookeeper_path + "/columns", expected_columns_version));
|
||||
|
||||
if (data.settings.use_minimalistic_part_header_in_zookeeper)
|
||||
{
|
||||
ops.emplace_back(zkutil::makeCreateRequest(
|
||||
part_path, local_part_header.toString(), zkutil::CreateMode::Persistent));
|
||||
}
|
||||
else
|
||||
{
|
||||
ops.emplace_back(zkutil::makeCreateRequest(
|
||||
part_path, "", zkutil::CreateMode::Persistent));
|
||||
ops.emplace_back(zkutil::makeCreateRequest(
|
||||
@ -774,6 +816,7 @@ void StorageReplicatedMergeTree::checkPartChecksumsAndAddCommitOps(const zkutil:
|
||||
ops.emplace_back(zkutil::makeCreateRequest(
|
||||
part_path + "/checksums", getChecksumsForZooKeeper(part->checksums), zkutil::CreateMode::Persistent));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_WARNING(log, "checkPartAndAddToZooKeeper: node " << replica_path + "/parts/" + part_name << " already exists."
|
||||
@ -1510,16 +1553,8 @@ void StorageReplicatedMergeTree::executeClearColumnInPartition(const LogEntry &
|
||||
if (!transaction)
|
||||
continue;
|
||||
|
||||
/// Update part metadata in ZooKeeper.
|
||||
Coordination::Requests ops;
|
||||
ops.emplace_back(zkutil::makeSetRequest(
|
||||
replica_path + "/parts/" + part->name + "/columns", transaction->getNewColumns().toString(), -1));
|
||||
ops.emplace_back(zkutil::makeSetRequest(
|
||||
replica_path + "/parts/" + part->name + "/checksums", getChecksumsForZooKeeper(transaction->getNewChecksums()), -1));
|
||||
updatePartHeaderInZooKeeperAndCommit(zookeeper, *transaction);
|
||||
|
||||
zookeeper->multi(ops);
|
||||
|
||||
transaction->commit();
|
||||
++modified_parts;
|
||||
}
|
||||
|
||||
@ -2322,12 +2357,15 @@ bool StorageReplicatedMergeTree::createLogEntryToMutatePart(const MergeTreeDataP
|
||||
}
|
||||
|
||||
|
||||
void StorageReplicatedMergeTree::removePartFromZooKeeper(const String & part_name, Coordination::Requests & ops)
|
||||
void StorageReplicatedMergeTree::removePartFromZooKeeper(const String & part_name, Coordination::Requests & ops, bool has_children)
|
||||
{
|
||||
String part_path = replica_path + "/parts/" + part_name;
|
||||
|
||||
if (has_children)
|
||||
{
|
||||
ops.emplace_back(zkutil::makeRemoveRequest(part_path + "/checksums", -1));
|
||||
ops.emplace_back(zkutil::makeRemoveRequest(part_path + "/columns", -1));
|
||||
}
|
||||
ops.emplace_back(zkutil::makeRemoveRequest(part_path, -1));
|
||||
}
|
||||
|
||||
@ -2338,19 +2376,26 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n
|
||||
|
||||
String part_path = replica_path + "/parts/" + part_name;
|
||||
|
||||
Coordination::Requests ops;
|
||||
|
||||
time_t part_create_time = 0;
|
||||
Coordination::Stat stat;
|
||||
if (zookeeper->exists(part_path, &stat))
|
||||
{
|
||||
part_create_time = stat.ctime / 1000;
|
||||
removePartFromZooKeeper(part_name, ops, stat.numChildren > 0);
|
||||
}
|
||||
|
||||
LogEntryPtr log_entry = std::make_shared<LogEntry>();
|
||||
log_entry->type = LogEntry::GET_PART;
|
||||
log_entry->create_time = tryGetPartCreateTime(zookeeper, replica_path, part_name);
|
||||
log_entry->create_time = part_create_time;
|
||||
log_entry->source_replica = "";
|
||||
log_entry->new_part_name = part_name;
|
||||
|
||||
Coordination::Requests ops;
|
||||
ops.emplace_back(zkutil::makeCreateRequest(
|
||||
replica_path + "/queue/queue-", log_entry->toString(),
|
||||
zkutil::CreateMode::PersistentSequential));
|
||||
|
||||
removePartFromZooKeeper(part_name, ops);
|
||||
|
||||
auto results = zookeeper->multi(ops);
|
||||
|
||||
String path_created = dynamic_cast<const Coordination::CreateResponse &>(*results[0]).path_created;
|
||||
@ -2691,8 +2736,18 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Strin
|
||||
MinimalisticDataPartChecksums source_part_checksums;
|
||||
source_part_checksums.computeTotalChecksums(source_part->checksums);
|
||||
|
||||
String desired_checksums_str = getZooKeeper()->get(source_replica_path + "/parts/" + part_name + "/checksums");
|
||||
auto desired_checksums = MinimalisticDataPartChecksums::deserializeFrom(desired_checksums_str);
|
||||
MinimalisticDataPartChecksums desired_checksums;
|
||||
auto zookeeper = getZooKeeper();
|
||||
String part_path = source_replica_path + "/parts/" + part_name;
|
||||
String part_znode = zookeeper->get(part_path);
|
||||
if (!part_znode.empty())
|
||||
desired_checksums = ReplicatedMergeTreePartHeader::fromString(part_znode).getChecksums();
|
||||
else
|
||||
{
|
||||
String desired_checksums_str = zookeeper->get(part_path + "/checksums");
|
||||
desired_checksums = MinimalisticDataPartChecksums::deserializeFrom(desired_checksums_str);
|
||||
}
|
||||
|
||||
if (source_part_checksums == desired_checksums)
|
||||
{
|
||||
LOG_TRACE(log, "Found local part " << source_part->name << " with the same checksums as " << part_name);
|
||||
@ -4450,32 +4505,40 @@ bool StorageReplicatedMergeTree::tryRemovePartsFromZooKeeperWithRetries(MergeTre
|
||||
|
||||
bool StorageReplicatedMergeTree::tryRemovePartsFromZooKeeperWithRetries(const Strings & part_names, size_t max_retries)
|
||||
{
|
||||
using MultiFuture = std::future<Coordination::MultiResponse>;
|
||||
|
||||
size_t num_tries = 0;
|
||||
bool sucess = false;
|
||||
bool success = false;
|
||||
|
||||
while (!sucess && (max_retries == 0 || num_tries < max_retries))
|
||||
while (!success && (max_retries == 0 || num_tries < max_retries))
|
||||
{
|
||||
std::vector<MultiFuture> futures;
|
||||
futures.reserve(part_names.size());
|
||||
|
||||
++num_tries;
|
||||
sucess = true;
|
||||
|
||||
try
|
||||
{
|
||||
++num_tries;
|
||||
success = true;
|
||||
|
||||
auto zookeeper = getZooKeeper();
|
||||
|
||||
std::vector<std::future<Coordination::ExistsResponse>> exists_futures;
|
||||
exists_futures.reserve(part_names.size());
|
||||
for (const String & part_name : part_names)
|
||||
{
|
||||
Coordination::Requests ops;
|
||||
removePartFromZooKeeper(part_name, ops);
|
||||
|
||||
futures.emplace_back(zookeeper->tryAsyncMulti(ops));
|
||||
String part_path = replica_path + "/parts/" + part_name;
|
||||
exists_futures.emplace_back(zookeeper->asyncExists(part_path));
|
||||
}
|
||||
|
||||
for (auto & future : futures)
|
||||
std::vector<std::future<Coordination::MultiResponse>> remove_futures;
|
||||
remove_futures.reserve(part_names.size());
|
||||
for (size_t i = 0; i < part_names.size(); ++i)
|
||||
{
|
||||
Coordination::ExistsResponse exists_resp = exists_futures[i].get();
|
||||
if (!exists_resp.error)
|
||||
{
|
||||
Coordination::Requests ops;
|
||||
removePartFromZooKeeper(part_names[i], ops, exists_resp.stat.numChildren > 0);
|
||||
remove_futures.emplace_back(zookeeper->tryAsyncMulti(ops));
|
||||
}
|
||||
}
|
||||
|
||||
for (auto & future : remove_futures)
|
||||
{
|
||||
auto response = future.get();
|
||||
|
||||
@ -4484,7 +4547,7 @@ bool StorageReplicatedMergeTree::tryRemovePartsFromZooKeeperWithRetries(const St
|
||||
|
||||
if (Coordination::isHardwareError(response.error))
|
||||
{
|
||||
sucess = false;
|
||||
success = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -4493,7 +4556,7 @@ bool StorageReplicatedMergeTree::tryRemovePartsFromZooKeeperWithRetries(const St
|
||||
}
|
||||
catch (Coordination::Exception & e)
|
||||
{
|
||||
sucess = false;
|
||||
success = false;
|
||||
|
||||
if (Coordination::isHardwareError(e.code))
|
||||
tryLogCurrentException(log, __PRETTY_FUNCTION__);
|
||||
@ -4501,69 +4564,78 @@ bool StorageReplicatedMergeTree::tryRemovePartsFromZooKeeperWithRetries(const St
|
||||
throw;
|
||||
}
|
||||
|
||||
if (!sucess && num_tries < max_retries)
|
||||
if (!success && num_tries < max_retries)
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
|
||||
}
|
||||
|
||||
return sucess;
|
||||
return success;
|
||||
}
|
||||
|
||||
/// TODO: rewrite this code using async Multi ops after final ZooKeeper library update
|
||||
void StorageReplicatedMergeTree::removePartsFromZooKeeper(zkutil::ZooKeeperPtr & zookeeper, const Strings & part_names,
|
||||
NameSet * parts_should_be_retried)
|
||||
void StorageReplicatedMergeTree::removePartsFromZooKeeper(
|
||||
zkutil::ZooKeeperPtr & zookeeper, const Strings & part_names, NameSet * parts_should_be_retried)
|
||||
{
|
||||
std::vector<std::future<Coordination::ExistsResponse>> exists_futures;
|
||||
exists_futures.reserve(part_names.size());
|
||||
for (const String & part_name : part_names)
|
||||
{
|
||||
String part_path = replica_path + "/parts/" + part_name;
|
||||
exists_futures.emplace_back(zookeeper->asyncExists(part_path));
|
||||
}
|
||||
|
||||
std::vector<std::future<Coordination::MultiResponse>> remove_futures;
|
||||
remove_futures.reserve(part_names.size());
|
||||
try
|
||||
{
|
||||
for (size_t i = 0; i < part_names.size(); ++i)
|
||||
{
|
||||
Coordination::ExistsResponse exists_resp = exists_futures[i].get();
|
||||
if (!exists_resp.error)
|
||||
{
|
||||
Coordination::Requests ops;
|
||||
auto it_first_node_in_batch = part_names.cbegin();
|
||||
|
||||
for (auto it = part_names.cbegin(); it != part_names.cend(); ++it)
|
||||
{
|
||||
removePartFromZooKeeper(*it, ops);
|
||||
|
||||
auto it_next = std::next(it);
|
||||
if (ops.size() >= zkutil::MULTI_BATCH_SIZE || it_next == part_names.cend())
|
||||
{
|
||||
Coordination::Responses unused_responses;
|
||||
auto code = zookeeper->tryMultiNoThrow(ops, unused_responses);
|
||||
ops.clear();
|
||||
|
||||
if (code == Coordination::ZNONODE)
|
||||
{
|
||||
/// Fallback
|
||||
LOG_DEBUG(log, "ZooKeeper nodes for some parts in the batch are missing, will remove part nodes one by one");
|
||||
|
||||
for (auto it_in_batch = it_first_node_in_batch; it_in_batch != it_next; ++it_in_batch)
|
||||
{
|
||||
Coordination::Requests cur_ops;
|
||||
removePartFromZooKeeper(*it_in_batch, cur_ops);
|
||||
auto cur_code = zookeeper->tryMultiNoThrow(cur_ops, unused_responses);
|
||||
|
||||
if (cur_code == Coordination::ZNONODE)
|
||||
{
|
||||
LOG_DEBUG(log, "There is no part " << *it_in_batch << " in ZooKeeper, it was only in filesystem");
|
||||
removePartFromZooKeeper(part_names[i], ops, exists_resp.stat.numChildren > 0);
|
||||
remove_futures.emplace_back(zookeeper->tryAsyncMulti(ops));
|
||||
}
|
||||
else if (parts_should_be_retried && Coordination::isHardwareError(cur_code))
|
||||
else
|
||||
{
|
||||
parts_should_be_retried->emplace(*it_in_batch);
|
||||
}
|
||||
else if (cur_code)
|
||||
{
|
||||
LOG_WARNING(log, "Cannot remove part " << *it_in_batch << " from ZooKeeper: " << zkutil::ZooKeeper::error2string(cur_code));
|
||||
LOG_DEBUG(log,
|
||||
"There is no part " << part_names[i] << " in ZooKeeper, it was only in filesystem");
|
||||
// emplace invalid future so that the total number of futures is the same as part_names.size();
|
||||
remove_futures.emplace_back();
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (parts_should_be_retried && Coordination::isHardwareError(code))
|
||||
catch (const Coordination::Exception & e)
|
||||
{
|
||||
for (auto it_in_batch = it_first_node_in_batch; it_in_batch != it_next; ++it_in_batch)
|
||||
parts_should_be_retried->emplace(*it_in_batch);
|
||||
}
|
||||
else if (code)
|
||||
{
|
||||
LOG_WARNING(log, "There was a problem with deleting " << (it_next - it_first_node_in_batch)
|
||||
<< " nodes from ZooKeeper: " << ::zkutil::ZooKeeper::error2string(code));
|
||||
if (parts_should_be_retried && Coordination::isHardwareError(e.code))
|
||||
parts_should_be_retried->insert(part_names.begin(), part_names.end());
|
||||
throw;
|
||||
}
|
||||
|
||||
it_first_node_in_batch = it_next;
|
||||
for (size_t i = 0; i < remove_futures.size(); ++i)
|
||||
{
|
||||
auto & future = remove_futures[i];
|
||||
|
||||
if (!future.valid())
|
||||
continue;
|
||||
|
||||
auto response = future.get();
|
||||
if (response.error == Coordination::ZOK)
|
||||
continue;
|
||||
else if (response.error == Coordination::ZNONODE)
|
||||
{
|
||||
LOG_DEBUG(log,
|
||||
"There is no part " << part_names[i] << " in ZooKeeper, it was only in filesystem");
|
||||
continue;
|
||||
}
|
||||
else if (Coordination::isHardwareError(response.error))
|
||||
{
|
||||
if (parts_should_be_retried)
|
||||
parts_should_be_retried->insert(part_names[i]);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
LOG_WARNING(log, "Cannot remove part " << part_names[i] << " from ZooKeeper: "
|
||||
<< zkutil::ZooKeeper::error2string(response.error));
|
||||
}
|
||||
}
|
||||
|
||||
@ -4809,6 +4881,16 @@ void StorageReplicatedMergeTree::getCommitPartOps(
|
||||
ops.emplace_back(zkutil::makeCheckRequest(
|
||||
zookeeper_path + "/columns",
|
||||
columns_version));
|
||||
|
||||
if (data.settings.use_minimalistic_part_header_in_zookeeper)
|
||||
{
|
||||
ops.emplace_back(zkutil::makeCreateRequest(
|
||||
replica_path + "/parts/" + part->name,
|
||||
ReplicatedMergeTreePartHeader::fromColumnsAndChecksums(part->columns, part->checksums).toString(),
|
||||
zkutil::CreateMode::Persistent));
|
||||
}
|
||||
else
|
||||
{
|
||||
ops.emplace_back(zkutil::makeCreateRequest(
|
||||
replica_path + "/parts/" + part->name,
|
||||
"",
|
||||
@ -4821,6 +4903,59 @@ void StorageReplicatedMergeTree::getCommitPartOps(
|
||||
replica_path + "/parts/" + part->name + "/checksums",
|
||||
getChecksumsForZooKeeper(part->checksums),
|
||||
zkutil::CreateMode::Persistent));
|
||||
}
|
||||
}
|
||||
|
||||
void StorageReplicatedMergeTree::updatePartHeaderInZooKeeperAndCommit(
|
||||
const zkutil::ZooKeeperPtr & zookeeper,
|
||||
MergeTreeData::AlterDataPartTransaction & transaction)
|
||||
{
|
||||
String part_path = replica_path + "/parts/" + transaction.getPartName();
|
||||
|
||||
bool need_delete_columns_and_checksums_nodes = false;
|
||||
try
|
||||
{
|
||||
if (data.settings.use_minimalistic_part_header_in_zookeeper)
|
||||
{
|
||||
auto part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksums(
|
||||
transaction.getNewColumns(), transaction.getNewChecksums());
|
||||
Coordination::Stat stat;
|
||||
zookeeper->set(part_path, part_header.toString(), -1, &stat);
|
||||
|
||||
need_delete_columns_and_checksums_nodes = stat.numChildren > 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
Coordination::Requests ops;
|
||||
ops.emplace_back(zkutil::makeSetRequest(
|
||||
part_path, String(), -1));
|
||||
ops.emplace_back(zkutil::makeSetRequest(
|
||||
part_path + "/columns", transaction.getNewColumns().toString(), -1));
|
||||
ops.emplace_back(zkutil::makeSetRequest(
|
||||
part_path + "/checksums", getChecksumsForZooKeeper(transaction.getNewChecksums()), -1));
|
||||
zookeeper->multi(ops);
|
||||
}
|
||||
}
|
||||
catch (const Coordination::Exception & e)
|
||||
{
|
||||
/// The part does not exist in ZK. We will add to queue for verification - maybe the part is superfluous, and it must be removed locally.
|
||||
if (e.code == Coordination::ZNONODE)
|
||||
enqueuePartForCheck(transaction.getPartName());
|
||||
|
||||
throw;
|
||||
}
|
||||
|
||||
/// Apply file changes.
|
||||
transaction.commit();
|
||||
|
||||
/// Legacy <part_path>/columns and <part_path>/checksums znodes are not needed anymore and can be deleted.
|
||||
if (need_delete_columns_and_checksums_nodes)
|
||||
{
|
||||
Coordination::Requests ops;
|
||||
ops.emplace_back(zkutil::makeRemoveRequest(part_path + "/columns", -1));
|
||||
ops.emplace_back(zkutil::makeRemoveRequest(part_path + "/checksums", -1));
|
||||
zookeeper->multi(ops);
|
||||
}
|
||||
}
|
||||
|
||||
ReplicatedMergeTreeAddress StorageReplicatedMergeTree::getReplicatedMergeTreeAddress() const
|
||||
|
@ -372,8 +372,14 @@ private:
|
||||
MergeTreeData::MutableDataPartPtr & part,
|
||||
const String & block_id_path = "") const;
|
||||
|
||||
/// Updates info about part columns and checksums in ZooKeeper and commits transaction if successful.
|
||||
void updatePartHeaderInZooKeeperAndCommit(
|
||||
const zkutil::ZooKeeperPtr & zookeeper,
|
||||
MergeTreeData::AlterDataPartTransaction & transaction);
|
||||
|
||||
/// Adds actions to `ops` that remove a part from ZooKeeper.
|
||||
void removePartFromZooKeeper(const String & part_name, Coordination::Requests & ops);
|
||||
/// Set has_children to true for "old-style" parts (those with /columns and /checksums child znodes).
|
||||
void removePartFromZooKeeper(const String & part_name, Coordination::Requests & ops, bool has_children);
|
||||
|
||||
/// Quickly removes big set of parts from ZooKeeper (using async multi queries)
|
||||
void removePartsFromZooKeeper(zkutil::ZooKeeperPtr & zookeeper, const Strings & part_names,
|
||||
|
@ -24,3 +24,6 @@ target_link_libraries (get_current_inserts_in_replicated PRIVATE dbms clickhouse
|
||||
|
||||
add_executable (get_abandonable_lock_in_all_partitions get_abandonable_lock_in_all_partitions.cpp)
|
||||
target_link_libraries (get_abandonable_lock_in_all_partitions PRIVATE dbms clickhouse_common_config clickhouse_common_zookeeper)
|
||||
|
||||
add_executable (transform_part_zk_nodes transform_part_zk_nodes.cpp)
|
||||
target_link_libraries (transform_part_zk_nodes PRIVATE dbms clickhouse_common_config clickhouse_common_zookeeper)
|
||||
|
130
dbms/src/Storages/tests/transform_part_zk_nodes.cpp
Normal file
130
dbms/src/Storages/tests/transform_part_zk_nodes.cpp
Normal file
@ -0,0 +1,130 @@
|
||||
#include <Storages/MergeTree/ReplicatedMergeTreePartHeader.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataPartChecksum.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/ZooKeeper/ZooKeeper.h>
|
||||
#include <Common/ZooKeeper/KeeperException.h>
|
||||
|
||||
#include <boost/program_options.hpp>
|
||||
|
||||
#include <list>
|
||||
#include <iostream>
|
||||
|
||||
|
||||
int main(int argc, char ** argv)
|
||||
try
|
||||
{
|
||||
boost::program_options::options_description desc("Allowed options");
|
||||
desc.add_options()
|
||||
("help,h", "produce help message")
|
||||
("address,a", boost::program_options::value<std::string>()->required(),
|
||||
"addresses of ZooKeeper instances, comma separated. Example: example01e.yandex.ru:2181")
|
||||
("path,p", boost::program_options::value<std::string>()->required(),
|
||||
"where to start")
|
||||
;
|
||||
|
||||
boost::program_options::variables_map options;
|
||||
boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options);
|
||||
|
||||
if (options.count("help"))
|
||||
{
|
||||
std::cout << "Transform contents of part nodes in ZooKeeper to more compact storage scheme." << std::endl;
|
||||
std::cout << "Usage: " << argv[0] << " [options]" << std::endl;
|
||||
std::cout << desc << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
zkutil::ZooKeeper zookeeper(options.at("address").as<std::string>());
|
||||
|
||||
std::string initial_path = options.at("path").as<std::string>();
|
||||
|
||||
struct Node
|
||||
{
|
||||
Node(
|
||||
std::string path_,
|
||||
std::future<Coordination::GetResponse> get_future_,
|
||||
std::future<Coordination::ListResponse> children_future_,
|
||||
Node * parent_)
|
||||
: path(std::move(path_))
|
||||
, get_future(std::move(get_future_))
|
||||
, children_future(std::move(children_future_))
|
||||
, parent(parent_)
|
||||
{
|
||||
}
|
||||
|
||||
std::string path;
|
||||
std::future<Coordination::GetResponse> get_future;
|
||||
std::future<Coordination::ListResponse> children_future;
|
||||
|
||||
Node * parent = nullptr;
|
||||
std::future<Coordination::MultiResponse> set_future;
|
||||
};
|
||||
|
||||
std::list<Node> nodes_queue;
|
||||
nodes_queue.emplace_back(
|
||||
initial_path, zookeeper.asyncGet(initial_path), zookeeper.asyncGetChildren(initial_path), nullptr);
|
||||
|
||||
for (auto it = nodes_queue.begin(); it != nodes_queue.end(); ++it)
|
||||
{
|
||||
Coordination::GetResponse get_response;
|
||||
Coordination::ListResponse children_response;
|
||||
try
|
||||
{
|
||||
get_response = it->get_future.get();
|
||||
children_response = it->children_future.get();
|
||||
}
|
||||
catch (const Coordination::Exception & e)
|
||||
{
|
||||
if (e.code == Coordination::ZNONODE)
|
||||
continue;
|
||||
throw;
|
||||
}
|
||||
|
||||
if (get_response.stat.ephemeralOwner)
|
||||
continue;
|
||||
|
||||
if (it->path.find("/parts/") != std::string::npos
|
||||
&& !endsWith(it->path, "/columns")
|
||||
&& !endsWith(it->path, "/checksums"))
|
||||
{
|
||||
if (!children_response.names.empty())
|
||||
{
|
||||
auto part_header = DB::ReplicatedMergeTreePartHeader::fromColumnsAndChecksumsZNodes(
|
||||
zookeeper.get(it->path + "/columns"), zookeeper.get(it->path + "/checksums"));
|
||||
|
||||
Coordination::Requests ops;
|
||||
ops.emplace_back(zkutil::makeRemoveRequest(it->path + "/columns", -1));
|
||||
ops.emplace_back(zkutil::makeRemoveRequest(it->path + "/checksums", -1));
|
||||
ops.emplace_back(zkutil::makeSetRequest(it->path, part_header.toString(), -1));
|
||||
|
||||
it->set_future = zookeeper.asyncMulti(ops);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const auto & name : children_response.names)
|
||||
{
|
||||
std::string child_path = it->path == "/" ? it->path + name : it->path + '/' + name;
|
||||
nodes_queue.emplace_back(
|
||||
child_path, zookeeper.asyncGet(child_path), zookeeper.asyncGetChildren(child_path),
|
||||
&(*it));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto it = nodes_queue.begin(); it != nodes_queue.end(); ++it)
|
||||
{
|
||||
if (it->set_future.valid())
|
||||
{
|
||||
it->set_future.get();
|
||||
std::cerr << it->path << " changed!" << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
std::cerr << DB::getCurrentExceptionMessage(true) << '\n';
|
||||
throw;
|
||||
}
|
@ -12,6 +12,7 @@
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Common/parseRemoteDescription.h>
|
||||
#include <TableFunctions/TableFunctionFactory.h>
|
||||
#include <Core/Defines.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -152,7 +153,8 @@ StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & ast_function, const C
|
||||
if (names.empty())
|
||||
throw Exception("Shard list is empty after parsing first argument", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
cluster = std::make_shared<Cluster>(context.getSettings(), names, username, password, context.getTCPPort(), false);
|
||||
auto maybe_secure_port = context.getTCPPortSecure();
|
||||
cluster = std::make_shared<Cluster>(context.getSettings(), names, username, password, (secure ? (maybe_secure_port ? *maybe_secure_port : DBMS_DEFAULT_SECURE_PORT) : context.getTCPPort()), false, secure);
|
||||
}
|
||||
|
||||
auto structure_remote_table = getStructureOfRemoteTable(*cluster, remote_database, remote_table, context, remote_table_function_ptr);
|
||||
@ -177,8 +179,8 @@ StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & ast_function, const C
|
||||
}
|
||||
|
||||
|
||||
TableFunctionRemote::TableFunctionRemote(const std::string & name_)
|
||||
: name(name_)
|
||||
TableFunctionRemote::TableFunctionRemote(const std::string & name_, bool secure)
|
||||
: name{name_}, secure{secure}
|
||||
{
|
||||
is_cluster_function = name == "cluster";
|
||||
|
||||
@ -193,6 +195,7 @@ TableFunctionRemote::TableFunctionRemote(const std::string & name_)
|
||||
void registerTableFunctionRemote(TableFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("remote", [] () -> TableFunctionPtr { return std::make_shared<TableFunctionRemote>("remote"); });
|
||||
factory.registerFunction("remoteSecure", [] () -> TableFunctionPtr { return std::make_shared<TableFunctionRemote>("remote", /* secure = */ true); });
|
||||
factory.registerFunction("cluster", [] () -> TableFunctionPtr { return std::make_shared<TableFunctionRemote>("cluster"); });
|
||||
}
|
||||
|
||||
|
@ -16,7 +16,7 @@ namespace DB
|
||||
class TableFunctionRemote : public ITableFunction
|
||||
{
|
||||
public:
|
||||
explicit TableFunctionRemote(const std::string & name_ = "remote");
|
||||
explicit TableFunctionRemote(const std::string & name_ = "remote", bool secure = false);
|
||||
|
||||
std::string getName() const override { return name; }
|
||||
|
||||
@ -26,6 +26,7 @@ private:
|
||||
std::string name;
|
||||
bool is_cluster_function;
|
||||
std::string help_message;
|
||||
bool secure;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,46 @@
|
||||
<test>
|
||||
<name>Constant column string search</name>
|
||||
|
||||
<tags>
|
||||
<tag>search</tag>
|
||||
</tags>
|
||||
|
||||
<preconditions>
|
||||
<table_exists>hits_100m_single</table_exists>
|
||||
</preconditions>
|
||||
|
||||
<type>loop</type>
|
||||
|
||||
<stop_conditions>
|
||||
<all_of>
|
||||
<iterations>5</iterations>
|
||||
<min_time_not_changing_for_ms>10000</min_time_not_changing_for_ms>
|
||||
</all_of>
|
||||
<any_of>
|
||||
<iterations>50</iterations>
|
||||
<total_time_ms>60000</total_time_ms>
|
||||
</any_of>
|
||||
</stop_conditions>
|
||||
|
||||
<query><![CDATA[select count(position(URL, 'yandex')), count(position(URL, 'google')) FROM hits_100m_single]]></query>
|
||||
<query><![CDATA[select count(multiPosition(URL, ['yandex', 'google'])) FROM hits_100m_single]]></query>
|
||||
<query><![CDATA[select count(match(URL, 'yandex|google')) FROM hits_100m_single]]></query>
|
||||
|
||||
<query><![CDATA[select sum(match(URL, 'yandex')), sum(match(URL, 'google')), sum(match(URL, 'yahoo')), sum(match(URL, 'pikabu')) FROM hits_100m_single]]></query>
|
||||
<query><![CDATA[select sum(multiSearch(URL, ['yandex', 'google', 'yahoo', 'pikabu'])) from hits_100m_single]]></query>
|
||||
<query><![CDATA[select sum(match(URL, 'yandex|google|yahoo|pikabu')) FROM hits_100m_single]]></query>
|
||||
|
||||
<query><![CDATA[select sum(match(URL, 'yandex')), sum(match(URL, 'google')), sum(match(URL, 'http')) FROM hits_100m_single]]></query>
|
||||
<query><![CDATA[select sum(multiSearch(URL, ['yandex', 'google', 'http'])) from hits_100m_single]]></query>
|
||||
<query><![CDATA[select sum(match(URL, 'yandex|google|http')) FROM hits_100m_single]]></query>
|
||||
|
||||
<query><![CDATA[select sum(match(URL, 'yandex')), sum(match(URL, 'google')), sum(match(URL, 'facebook')), sum(match(URL, 'wikipedia')), sum(match(URL, 'reddit')) FROM hits_100m_single]]></query>
|
||||
<query><![CDATA[select sum(multiSearch(URL, ['yandex', 'google', 'facebook', 'wikipedia', 'reddit'])) from hits_100m_single]]></query>
|
||||
<query><![CDATA[select sum(match(URL, 'yandex|google|facebook|wikipedia|reddit')) FROM hits_100m_single]]></query>
|
||||
|
||||
<query><![CDATA[select sum(firstMatch(URL, ['yandex', 'google', 'http', 'facebook', 'google'])) from hits_100m_single]]></query>
|
||||
|
||||
<main_metric>
|
||||
<min_time/>
|
||||
</main_metric>
|
||||
</test>
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
10
dbms/tests/queries/0_stateless/00505_shard_secure.reference
Normal file
10
dbms/tests/queries/0_stateless/00505_shard_secure.reference
Normal file
@ -0,0 +1,10 @@
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
24
dbms/tests/queries/0_stateless/00505_shard_secure.sh
Executable file
24
dbms/tests/queries/0_stateless/00505_shard_secure.sh
Executable file
@ -0,0 +1,24 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# set -x
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
. $CURDIR/../shell_config.sh
|
||||
|
||||
# Not default server config needed
|
||||
|
||||
tcp_port_secure=`$CLICKHOUSE_EXTRACT_CONFIG -k tcp_port_secure 2>/dev/null`
|
||||
if [ -z $tcp_port_secure ]; then
|
||||
# Secure port disabled. Fake result
|
||||
cat $CURDIR/00505_shard_secure.reference
|
||||
else
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "SELECT * FROM remoteSecure('127.0.0.{1,2}', system.one);"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT * FROM remoteSecure('127.0.0.{1,2}:$CLICKHOUSE_PORT_TCP_SECURE', system.one);"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT * FROM remoteSecure('127.0.0.{1,2}', system.one);"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT * FROM remoteSecure(test_shard_localhost_secure, system.one);"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT * FROM remote(test_shard_localhost_secure, system.one);"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT * FROM remoteSecure(test_shard_localhost, system.one);"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT * FROM remote(test_shard_localhost, system.one);"
|
||||
|
||||
fi
|
@ -0,0 +1,35 @@
|
||||
*** Test fetches ***
|
||||
*** replica 1 ***
|
||||
1 1
|
||||
2 2
|
||||
*** replica 2 ***
|
||||
1 1
|
||||
2 2
|
||||
*** Test merges ***
|
||||
*** replica 1 ***
|
||||
all_0_1_1 1
|
||||
all_0_1_1 2
|
||||
*** replica 2 ***
|
||||
all_0_1_1 1
|
||||
all_0_1_1 2
|
||||
*** Test part removal ***
|
||||
*** replica 1 ***
|
||||
all_0_1_1
|
||||
all_0_1_1
|
||||
*** replica 2 ***
|
||||
all_0_1_1
|
||||
all_0_1_1
|
||||
*** Test ALTER ***
|
||||
*** replica 1 ***
|
||||
1 1
|
||||
2 1
|
||||
*** replica 2 ***
|
||||
1 1
|
||||
2 1
|
||||
*** Test CLEAR COLUMN ***
|
||||
*** replica 1 ***
|
||||
1 0
|
||||
2 0
|
||||
*** replica 2 ***
|
||||
1 0
|
||||
2 0
|
@ -0,0 +1,61 @@
|
||||
DROP TABLE IF EXISTS test.part_header_r1;
|
||||
DROP TABLE IF EXISTS test.part_header_r2;
|
||||
|
||||
CREATE TABLE test.part_header_r1(x UInt32, y UInt32)
|
||||
ENGINE ReplicatedMergeTree('/clickhouse/tables/test/part_header', '1') ORDER BY x
|
||||
SETTINGS use_minimalistic_part_header_in_zookeeper = 0,
|
||||
old_parts_lifetime = 1,
|
||||
cleanup_delay_period = 0,
|
||||
cleanup_delay_period_random_add = 0;
|
||||
CREATE TABLE test.part_header_r2(x UInt32, y UInt32)
|
||||
ENGINE ReplicatedMergeTree('/clickhouse/tables/test/part_header', '2') ORDER BY x
|
||||
SETTINGS use_minimalistic_part_header_in_zookeeper = 1,
|
||||
old_parts_lifetime = 1,
|
||||
cleanup_delay_period = 0,
|
||||
cleanup_delay_period_random_add = 0;
|
||||
|
||||
SELECT '*** Test fetches ***';
|
||||
INSERT INTO test.part_header_r1 VALUES (1, 1);
|
||||
INSERT INTO test.part_header_r2 VALUES (2, 2);
|
||||
SYSTEM SYNC REPLICA test.part_header_r1;
|
||||
SYSTEM SYNC REPLICA test.part_header_r2;
|
||||
SELECT '*** replica 1 ***';
|
||||
SELECT x, y FROM test.part_header_r1 ORDER BY x;
|
||||
SELECT '*** replica 2 ***';
|
||||
SELECT x, y FROM test.part_header_r2 ORDER BY x;
|
||||
|
||||
SELECT '*** Test merges ***';
|
||||
OPTIMIZE TABLE test.part_header_r1;
|
||||
SYSTEM SYNC REPLICA test.part_header_r2;
|
||||
SELECT '*** replica 1 ***';
|
||||
SELECT _part, x FROM test.part_header_r1 ORDER BY x;
|
||||
SELECT '*** replica 2 ***';
|
||||
SELECT _part, x FROM test.part_header_r2 ORDER BY x;
|
||||
|
||||
SELECT sleep(2) FORMAT Null;
|
||||
|
||||
SELECT '*** Test part removal ***';
|
||||
SELECT '*** replica 1 ***';
|
||||
SELECT name FROM system.parts WHERE database = 'test' AND table = 'part_header_r1';
|
||||
SELECT name FROM system.zookeeper WHERE path = '/clickhouse/tables/test/part_header/replicas/1/parts';
|
||||
SELECT '*** replica 2 ***';
|
||||
SELECT name FROM system.parts WHERE database = 'test' AND table = 'part_header_r2';
|
||||
SELECT name FROM system.zookeeper WHERE path = '/clickhouse/tables/test/part_header/replicas/1/parts';
|
||||
|
||||
SELECT '*** Test ALTER ***';
|
||||
ALTER TABLE test.part_header_r1 MODIFY COLUMN y String;
|
||||
SELECT '*** replica 1 ***';
|
||||
SELECT x, length(y) FROM test.part_header_r1 ORDER BY x;
|
||||
SELECT '*** replica 2 ***';
|
||||
SELECT x, length(y) FROM test.part_header_r2 ORDER BY x;
|
||||
|
||||
SELECT '*** Test CLEAR COLUMN ***';
|
||||
SET replication_alter_partitions_sync = 2;
|
||||
ALTER TABLE test.part_header_r1 CLEAR COLUMN y IN PARTITION tuple();
|
||||
SELECT '*** replica 1 ***';
|
||||
SELECT x, length(y) FROM test.part_header_r1 ORDER BY x;
|
||||
SELECT '*** replica 2 ***';
|
||||
SELECT x, length(y) FROM test.part_header_r2 ORDER BY x;
|
||||
|
||||
DROP TABLE test.part_header_r1;
|
||||
DROP TABLE test.part_header_r2;
|
@ -0,0 +1 @@
|
||||
1 2 5 3 4 7 6
|
59
dbms/tests/queries/0_stateless/00823_capnproto_input.sh
Executable file
59
dbms/tests/queries/0_stateless/00823_capnproto_input.sh
Executable file
@ -0,0 +1,59 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
. $CURDIR/../shell_config.sh
|
||||
|
||||
#create the schema file
|
||||
echo "
|
||||
@0x803231eaa402b968;
|
||||
struct NestedNestedOne
|
||||
{
|
||||
nestednestednumber @0 : UInt64;
|
||||
}
|
||||
struct NestedNestedTwo
|
||||
{
|
||||
nestednestedtext @0 : Text;
|
||||
}
|
||||
struct NestedOne
|
||||
{
|
||||
nestednestedone @0 : NestedNestedOne;
|
||||
nestednestedtwo @1 : NestedNestedTwo;
|
||||
nestednumber @2: UInt64;
|
||||
}
|
||||
struct NestedTwo
|
||||
{
|
||||
nestednestedone @0 : NestedNestedOne;
|
||||
nestednestedtwo @1 : NestedNestedTwo;
|
||||
nestedtext @2 : Text;
|
||||
}
|
||||
struct CapnProto
|
||||
{
|
||||
number @0 : UInt64;
|
||||
string @1 : Text;
|
||||
nestedone @2 : NestedOne;
|
||||
nestedtwo @3 : NestedTwo;
|
||||
nestedthree @4 : NestedNestedTwo;
|
||||
}" > test.capnp
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test.capnproto_input"
|
||||
$CLICKHOUSE_CLIENT -q "CREATE TABLE test.capnproto_input
|
||||
(
|
||||
number UInt64,
|
||||
string String,
|
||||
nestedone_nestednumber UInt64,
|
||||
nestedone_nestednestedone_nestednestednumber UInt64,
|
||||
nestedone_nestednestedtwo_nestednestedtext String,
|
||||
nestedtwo_nestednestedtwo_nestednestedtext String,
|
||||
nestedtwo_nestednestedone_nestednestednumber UInt64,
|
||||
nestedtwo_nestedtext String
|
||||
) ENGINE = Memory"
|
||||
|
||||
echo -ne '\x00\x00\x00\x00\x15\x00\x00\x00\x00\x00\x00\x00\x01\x00\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x0d\x00\x00\x00\x12\x00\x00\x00\x0c\x00\x00\x00\x01\x00\x02\x00\x20\x00\x00\x00\x00\x00\x03\x00\x34\x00\x00\x00\x00\x00\x01\x00\x32\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x01\x00\x00\x00\x04\x00\x00\x00\x00\x00\x01\x00\x03\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x12\x00\x00\x00\x34\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x01\x00\x00\x00\x08\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x12\x00\x00\x00\x37\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x12\x00\x00\x00\x39\x00\x00\x00\x00\x00\x00\x00' | $CLICKHOUSE_CLIENT --stacktrace --format_schema='test:CapnProto' --query="INSERT INTO test.capnproto_input FORMAT CapnProto";
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "SELECT * FROM test.capnproto_input"
|
||||
$CLICKHOUSE_CLIENT -q "DROP TABLE test.capnproto_input"
|
||||
|
||||
# remove the schema file
|
||||
rm test.capnp
|
@ -0,0 +1 @@
|
||||
1
|
25
dbms/tests/queries/0_stateless/00823_sequence_match_dfa.sql
Normal file
25
dbms/tests/queries/0_stateless/00823_sequence_match_dfa.sql
Normal file
@ -0,0 +1,25 @@
|
||||
-- this test cannot pass without the new DFA matching algorithm of sequenceMatch
|
||||
|
||||
DROP TABLE IF EXISTS test.sequence;
|
||||
|
||||
CREATE TABLE test.sequence
|
||||
(
|
||||
userID UInt64,
|
||||
eventType Enum8('A' = 1, 'B' = 2, 'C' = 3),
|
||||
EventTime UInt64
|
||||
)
|
||||
ENGINE = Memory;
|
||||
|
||||
INSERT INTO test.sequence SELECT 1, number = 0 ? 'A' : (number < 1000000 ? 'B' : 'C'), number FROM numbers(1000001);
|
||||
|
||||
SELECT userID
|
||||
FROM test.sequence
|
||||
GROUP BY userID
|
||||
HAVING sequenceMatch('(?1).*(?2).*(?3)')(toDateTime(EventTime), eventType = 'A', eventType = 'B', eventType = 'C');
|
||||
|
||||
SELECT userID
|
||||
FROM test.sequence
|
||||
GROUP BY userID
|
||||
HAVING sequenceMatch('(?1).*(?2).*(?3)')(toDateTime(EventTime), eventType = 'A', eventType = 'B', eventType = 'A');
|
||||
|
||||
DROP TABLE test.sequence;
|
4
debian/changelog
vendored
4
debian/changelog
vendored
@ -1,5 +1,5 @@
|
||||
clickhouse (19.1.0) unstable; urgency=low
|
||||
clickhouse (19.1.1) unstable; urgency=low
|
||||
|
||||
* Modified source code
|
||||
|
||||
-- <root@yandex-team.ru> Tue, 01 Jan 2019 07:16:20 +0300
|
||||
-- <root@yandex-team.ru> Wed, 16 Jan 2019 14:04:37 +0300
|
||||
|
@ -1,7 +1,7 @@
|
||||
FROM ubuntu:18.04
|
||||
|
||||
ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/"
|
||||
ARG version=19.1.0
|
||||
ARG version=19.1.1
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install --yes --no-install-recommends \
|
||||
|
@ -1,7 +1,7 @@
|
||||
FROM ubuntu:18.04
|
||||
|
||||
ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/"
|
||||
ARG version=19.1.0
|
||||
ARG version=19.1.1
|
||||
ARG gosu_ver=1.10
|
||||
|
||||
RUN apt-get update \
|
||||
|
@ -1,7 +1,7 @@
|
||||
FROM ubuntu:18.04
|
||||
|
||||
ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/"
|
||||
ARG version=19.1.0
|
||||
ARG version=19.1.1
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y apt-transport-https dirmngr && \
|
||||
|
@ -18,7 +18,8 @@ RUN apt-get update -y \
|
||||
sudo \
|
||||
openssl \
|
||||
netcat-openbsd \
|
||||
telnet
|
||||
telnet \
|
||||
moreutils
|
||||
|
||||
ENV TZ=Europe/Moscow
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
@ -29,8 +30,14 @@ COPY part_log.xml /etc/clickhouse-server/config.d/part_log.xml
|
||||
COPY log_queries.xml /etc/clickhouse-server/users.d/log_queries.xml
|
||||
|
||||
CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \
|
||||
dpkg -i package_folder/clickhouse-common-static-dbg_*.deb; \
|
||||
dpkg -i package_folder/clickhouse-server_*.deb; \
|
||||
dpkg -i package_folder/clickhouse-client_*.deb; \
|
||||
dpkg -i package_folder/clickhouse-test_*.deb; \
|
||||
echo "TSAN_OPTIONS='halt_on_error=1'" >> /etc/environment; \
|
||||
echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment; \
|
||||
echo "ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-6.0/bin/llvm-symbolizer" >> /etc/environment; \
|
||||
echo "UBSAN_SYMBOLIZER_PATH=/usr/lib/llvm-6.0/bin/llvm-symbolizer" >> /etc/environment; \
|
||||
echo "LLVM_SYMBOLIZER_PATH=/usr/lib/llvm-6.0/bin/llvm-symbolizer" >> /etc/environment; \
|
||||
service zookeeper start; sleep 5; \
|
||||
service clickhouse-server start && sleep 5 && clickhouse-test --shard --zookeeper $SKIP_TESTS_OPTION 2>&1 | tee test_output/test_result.txt
|
||||
service clickhouse-server start && sleep 5 && clickhouse-test --shard --zookeeper $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
|
||||
|
@ -1,4 +1,4 @@
|
||||
FROM ubuntu:18.10
|
||||
FROM yandex/clickhouse-deb-builder
|
||||
|
||||
RUN apt-get update -y \
|
||||
&& env DEBIAN_FRONTEND=noninteractive \
|
||||
@ -24,7 +24,14 @@ COPY log_queries.xml /etc/clickhouse-server/users.d/log_queries.xml
|
||||
COPY part_log.xml /etc/clickhouse-server/config.d/part_log.xml
|
||||
|
||||
CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \
|
||||
dpkg -i package_folder/clickhouse-common-static-dbg_*.deb; \
|
||||
dpkg -i package_folder/clickhouse-server_*.deb; \
|
||||
dpkg -i package_folder/clickhouse-client_*.deb; \
|
||||
dpkg -i package_folder/clickhouse-test_*.deb; \
|
||||
echo "TSAN_OPTIONS='halt_on_error=1'" >> /etc/environment; \
|
||||
echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment; \
|
||||
echo "ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-6.0/bin/llvm-symbolizer" >> /etc/environment; \
|
||||
echo "UBSAN_SYMBOLIZER_PATH=/usr/lib/llvm-6.0/bin/llvm-symbolizer" >> /etc/environment; \
|
||||
echo "TSAN_SYMBOLIZER_PATH=/usr/lib/llvm-6.0/bin/llvm-symbolizer" >> /etc/environment; \
|
||||
echo "LLVM_SYMBOLIZER_PATH=/usr/lib/llvm-6.0/bin/llvm-symbolizer" >> /etc/environment; \
|
||||
service clickhouse-server start && sleep 1 && ./stress --output-folder test_output
|
||||
|
@ -1,5 +1,5 @@
|
||||
|
||||
# remote
|
||||
# remote, remoteSecure
|
||||
|
||||
Allows you to access remote servers without creating a `Distributed` table.
|
||||
|
||||
@ -72,5 +72,6 @@ The `remote` table function can be useful in the following cases:
|
||||
If the user is not specified, `default` is used.
|
||||
If the password is not specified, an empty password is used.
|
||||
|
||||
`remoteSecure` - same as `remote` but with secured connection. Default port - `tcp_port_secure` from config or 9440.
|
||||
|
||||
[Original article](https://clickhouse.yandex/docs/en/query_language/table_functions/remote/) <!--hide-->
|
||||
|
@ -50,7 +50,7 @@ sudo apt-get install clickhouse-client clickhouse-server
|
||||
|
||||
### Из исходникого кода
|
||||
|
||||
Для компиляции ClickHouse вручную, испольщуйте инструкцию для [Linux](../development/build.md) или [Mac OS X](../development/build_osx.md).
|
||||
Для компиляции ClickHouse вручную, используйте инструкцию для [Linux](../development/build.md) или [Mac OS X](../development/build_osx.md).
|
||||
|
||||
Можно скомпилировать пакеты и установить их, либо использовать программы без установки пакетов. Также при ручой сборке можно отключить необходимость поддержки набора инструкций SSE 4.2 или собрать под процессоры архитектуры AArch64.
|
||||
|
||||
@ -97,7 +97,7 @@ $ clickhouse-client
|
||||
|
||||
По умолчанию он соединяется с localhost:9000, от имени пользователя `default` без пароля. Также клиент может быть использован для соединения с удалённым сервером с помощью аргемента `--host`.
|
||||
|
||||
Терминал должен использлвать кодировку UTF-8.
|
||||
Терминал должен использовать кодировку UTF-8.
|
||||
|
||||
Более подробная информация о клиенте располагается в разделе [«Клиент командной строки»](../interfaces/cli.md).
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
|
||||
# remote
|
||||
# remote, remoteSecure
|
||||
|
||||
Позволяет обратиться к удалённым серверам без создания таблицы типа `Distributed`.
|
||||
|
||||
@ -72,4 +72,6 @@ example01-{01..02}-{1|2}
|
||||
Если пользователь не задан,то используется `default`.
|
||||
Если пароль не задан, то используется пустой пароль.
|
||||
|
||||
`remoteSecure` - аналогично функции `remote` но с соединением по шифрованому каналу. Порт по умолчанию - `tcp_port_secure` из конфига или 9440.
|
||||
|
||||
[Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/table_functions/remote/) <!--hide-->
|
||||
|
@ -21,6 +21,7 @@ if (NOT DEFINED ENABLE_UTILS OR ENABLE_UTILS)
|
||||
add_subdirectory (corrector_utf8)
|
||||
add_subdirectory (zookeeper-cli)
|
||||
add_subdirectory (zookeeper-dump-tree)
|
||||
add_subdirectory (zookeeper-copy-tree)
|
||||
add_subdirectory (zookeeper-remove-by-list)
|
||||
add_subdirectory (zookeeper-create-entry-to-download-part)
|
||||
add_subdirectory (wikistat-loader)
|
||||
|
2
utils/zookeeper-copy-tree/CMakeLists.txt
Normal file
2
utils/zookeeper-copy-tree/CMakeLists.txt
Normal file
@ -0,0 +1,2 @@
|
||||
add_executable (zookeeper-copy-tree main.cpp ${SRCS})
|
||||
target_link_libraries(zookeeper-copy-tree PRIVATE clickhouse_common_zookeeper clickhouse_common_io ${Boost_PROGRAM_OPTIONS_LIBRARY})
|
149
utils/zookeeper-copy-tree/main.cpp
Normal file
149
utils/zookeeper-copy-tree/main.cpp
Normal file
@ -0,0 +1,149 @@
|
||||
#include <Common/ZooKeeper/ZooKeeper.h>
|
||||
#include <Common/ZooKeeper/KeeperException.h>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
#include <boost/program_options.hpp>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
|
||||
extern const int UNEXPECTED_NODE_IN_ZOOKEEPER;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv)
|
||||
try
|
||||
{
|
||||
boost::program_options::options_description desc("Allowed options");
|
||||
desc.add_options()
|
||||
("help,h", "produce help message")
|
||||
("from", boost::program_options::value<std::string>()->required(),
|
||||
"addresses of source ZooKeeper instances, comma separated. Example: example01e.yandex.ru:2181")
|
||||
("from-path", boost::program_options::value<std::string>()->required(),
|
||||
"where to copy from")
|
||||
("to", boost::program_options::value<std::string>()->required(),
|
||||
"addresses of destination ZooKeeper instances, comma separated. Example: example01e.yandex.ru:2181")
|
||||
("to-path", boost::program_options::value<std::string>()->required(),
|
||||
"where to copy to")
|
||||
;
|
||||
|
||||
boost::program_options::variables_map options;
|
||||
boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options);
|
||||
|
||||
if (options.count("help"))
|
||||
{
|
||||
std::cout << "Copy a ZooKeeper tree to another cluster." << std::endl;
|
||||
std::cout << "Usage: " << argv[0] << " [options]" << std::endl;
|
||||
std::cout << "WARNING: it is almost useless as it is impossible to corretly copy sequential nodes" << std::endl;
|
||||
std::cout << desc << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
zkutil::ZooKeeper from_zookeeper(options.at("from").as<std::string>());
|
||||
zkutil::ZooKeeper to_zookeeper(options.at("to").as<std::string>());
|
||||
|
||||
std::string from_path = options.at("from-path").as<std::string>();
|
||||
std::string to_path = options.at("to-path").as<std::string>();
|
||||
|
||||
if (to_zookeeper.exists(to_path))
|
||||
throw DB::Exception("Destination path: " + to_path + " already exists, aborting.",
|
||||
DB::ErrorCodes::UNEXPECTED_NODE_IN_ZOOKEEPER);
|
||||
|
||||
struct Node
|
||||
{
|
||||
Node(
|
||||
std::string path_,
|
||||
std::future<Coordination::GetResponse> get_future_,
|
||||
std::future<Coordination::ListResponse> children_future_,
|
||||
Node * parent_)
|
||||
: path(std::move(path_))
|
||||
, get_future(std::move(get_future_))
|
||||
, children_future(std::move(children_future_))
|
||||
, parent(parent_)
|
||||
{
|
||||
}
|
||||
|
||||
std::string path;
|
||||
std::future<Coordination::GetResponse> get_future;
|
||||
std::future<Coordination::ListResponse> children_future;
|
||||
|
||||
Node * parent = nullptr;
|
||||
std::future<Coordination::CreateResponse> create_future;
|
||||
bool created = false;
|
||||
bool deleted = false;
|
||||
bool ephemeral = false;
|
||||
};
|
||||
|
||||
std::list<Node> nodes_queue;
|
||||
nodes_queue.emplace_back(
|
||||
from_path, from_zookeeper.asyncGet(from_path), from_zookeeper.asyncGetChildren(from_path), nullptr);
|
||||
|
||||
to_zookeeper.createAncestors(to_path);
|
||||
|
||||
for (auto it = nodes_queue.begin(); it != nodes_queue.end(); ++it)
|
||||
{
|
||||
Coordination::GetResponse get_response;
|
||||
Coordination::ListResponse children_response;
|
||||
try
|
||||
{
|
||||
get_response = it->get_future.get();
|
||||
children_response = it->children_future.get();
|
||||
}
|
||||
catch (const Coordination::Exception & e)
|
||||
{
|
||||
if (e.code == Coordination::ZNONODE)
|
||||
{
|
||||
it->deleted = true;
|
||||
continue;
|
||||
}
|
||||
throw;
|
||||
}
|
||||
|
||||
if (get_response.stat.ephemeralOwner)
|
||||
{
|
||||
it->ephemeral = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (it->parent && !it->parent->created)
|
||||
{
|
||||
it->parent->create_future.get();
|
||||
it->parent->created = true;
|
||||
std::cerr << it->parent->path << " copied!" << std::endl;
|
||||
}
|
||||
|
||||
std::string new_path = it->path;
|
||||
new_path.replace(0, from_path.length(), to_path);
|
||||
it->create_future = to_zookeeper.asyncCreate(new_path, get_response.data, zkutil::CreateMode::Persistent);
|
||||
get_response.data.clear();
|
||||
get_response.data.shrink_to_fit();
|
||||
|
||||
for (const auto & name : children_response.names)
|
||||
{
|
||||
std::string child_path = it->path == "/" ? it->path + name : it->path + '/' + name;
|
||||
nodes_queue.emplace_back(
|
||||
child_path, from_zookeeper.asyncGet(child_path), from_zookeeper.asyncGetChildren(child_path),
|
||||
&(*it));
|
||||
}
|
||||
}
|
||||
|
||||
for (auto it = nodes_queue.begin(); it != nodes_queue.end(); ++it)
|
||||
{
|
||||
if (!it->created && !it->deleted && !it->ephemeral)
|
||||
{
|
||||
it->create_future.get();
|
||||
it->created = true;
|
||||
std::cerr << it->path << " copied!" << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
std::cerr << DB::getCurrentExceptionMessage(true) << '\n';
|
||||
throw;
|
||||
}
|
@ -192,6 +192,8 @@
|
||||
rel="external nofollow" target="_blank">Column Store Database Benchmarks</a> by Percona</li>
|
||||
<li><a href="http://tech.marksblogg.com/billion-nyc-taxi-clickhouse.html"
|
||||
rel="external nofollow" target="_blank">1.1 Billion Taxi Rides on ClickHouse & an Intel Core i5</a> by Mark Litwintschik</li>
|
||||
<li><a href="https://tech.marksblogg.com/billion-nyc-taxi-rides-clickhouse-cluster.html"
|
||||
rel="external nofollow" target="_blank">1.1 Billion Taxi Rides: 108-core ClickHouse Cluster</a> by Mark Litwintschik</li>
|
||||
<li><a href="https://www.altinity.com/blog/2017/6/20/clickhouse-vs-redshift"
|
||||
rel="external nofollow" target="_blank">ClickHouse vs Amazon RedShift Benchmark</a> by Altinity</li>
|
||||
<li><a href="https://carto.com/blog/inside/geospatial-processing-with-clickhouse"
|
||||
|
Loading…
Reference in New Issue
Block a user