mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 23:52:03 +00:00
Merge branch 'master' into fix-substr-zk-metadata
This commit is contained in:
commit
d8bea598b2
6
.gitmodules
vendored
6
.gitmodules
vendored
@ -217,6 +217,9 @@
|
||||
[submodule "contrib/yaml-cpp"]
|
||||
path = contrib/yaml-cpp
|
||||
url = https://github.com/ClickHouse-Extras/yaml-cpp.git
|
||||
[submodule "contrib/cld2"]
|
||||
path = contrib/cld2
|
||||
url = https://github.com/ClickHouse-Extras/cld2.git
|
||||
[submodule "contrib/libstemmer_c"]
|
||||
path = contrib/libstemmer_c
|
||||
url = https://github.com/ClickHouse-Extras/libstemmer_c.git
|
||||
@ -247,6 +250,9 @@
|
||||
[submodule "contrib/sysroot"]
|
||||
path = contrib/sysroot
|
||||
url = https://github.com/ClickHouse-Extras/sysroot.git
|
||||
[submodule "contrib/nlp-data"]
|
||||
path = contrib/nlp-data
|
||||
url = https://github.com/ClickHouse-Extras/nlp-data.git
|
||||
[submodule "contrib/hive-metastore"]
|
||||
path = contrib/hive-metastore
|
||||
url = https://github.com/ClickHouse-Extras/hive-metastore
|
||||
|
@ -247,8 +247,6 @@ endif()
|
||||
|
||||
if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
|
||||
set(USE_DEBUG_HELPERS ON)
|
||||
else ()
|
||||
set(USE_DEBUG_HELPERS ON)
|
||||
endif()
|
||||
option(USE_DEBUG_HELPERS "Enable debug helpers" ${USE_DEBUG_HELPERS})
|
||||
|
||||
@ -403,17 +401,6 @@ else ()
|
||||
option(WERROR "Enable -Werror compiler option" ON)
|
||||
endif ()
|
||||
|
||||
if (WERROR)
|
||||
# Don't pollute CMAKE_CXX_FLAGS with -Werror as it will break some CMake checks.
|
||||
# Instead, adopt modern cmake usage requirement.
|
||||
target_compile_options(global-libs INTERFACE "-Werror")
|
||||
endif ()
|
||||
|
||||
# Make this extra-checks for correct library dependencies.
|
||||
if (OS_LINUX AND NOT SANITIZE)
|
||||
target_link_options(global-libs INTERFACE "-Wl,--no-undefined")
|
||||
endif ()
|
||||
|
||||
# Increase stack size on Musl. We need big stack for our recursive-descend parser.
|
||||
if (USE_MUSL)
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-z,stack-size=2097152")
|
||||
@ -421,6 +408,7 @@ endif ()
|
||||
|
||||
include(cmake/dbms_glob_sources.cmake)
|
||||
|
||||
add_library(global-group INTERFACE)
|
||||
if (OS_LINUX OR OS_ANDROID)
|
||||
include(cmake/linux/default_libs.cmake)
|
||||
elseif (OS_DARWIN)
|
||||
@ -428,6 +416,18 @@ elseif (OS_DARWIN)
|
||||
elseif (OS_FREEBSD)
|
||||
include(cmake/freebsd/default_libs.cmake)
|
||||
endif ()
|
||||
link_libraries(global-group)
|
||||
|
||||
if (WERROR)
|
||||
# Don't pollute CMAKE_CXX_FLAGS with -Werror as it will break some CMake checks.
|
||||
# Instead, adopt modern cmake usage requirement.
|
||||
target_compile_options(global-group INTERFACE "-Werror")
|
||||
endif ()
|
||||
|
||||
# Make this extra-checks for correct library dependencies.
|
||||
if (OS_LINUX AND NOT SANITIZE)
|
||||
target_link_options(global-group INTERFACE "-Wl,--no-undefined")
|
||||
endif ()
|
||||
|
||||
######################################
|
||||
### Add targets below this comment ###
|
||||
|
4
LICENSE
4
LICENSE
@ -1,4 +1,4 @@
|
||||
Copyright 2016-2021 ClickHouse, Inc.
|
||||
Copyright 2016-2022 ClickHouse, Inc.
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
@ -188,7 +188,7 @@ Copyright 2016-2021 ClickHouse, Inc.
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright 2016-2021 ClickHouse, Inc.
|
||||
Copyright 2016-2022 ClickHouse, Inc.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
|
@ -2,7 +2,9 @@
|
||||
|
||||
#include <iostream>
|
||||
#include <string_view>
|
||||
#include <algorithm>
|
||||
|
||||
#include <cassert>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/select.h>
|
||||
@ -34,13 +36,37 @@ bool hasInputData()
|
||||
return select(1, &fds, nullptr, nullptr, &timeout) == 1;
|
||||
}
|
||||
|
||||
struct NoCaseCompare
|
||||
{
|
||||
bool operator()(const std::string & str1, const std::string & str2)
|
||||
{
|
||||
return std::lexicographical_compare(begin(str1), end(str1), begin(str2), end(str2), [](const char c1, const char c2)
|
||||
{
|
||||
return std::tolower(c1) < std::tolower(c2);
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
using Words = std::vector<std::string>;
|
||||
template <class Compare>
|
||||
void addNewWords(Words & to, const Words & from, Compare comp)
|
||||
{
|
||||
size_t old_size = to.size();
|
||||
size_t new_size = old_size + from.size();
|
||||
|
||||
to.reserve(new_size);
|
||||
to.insert(to.end(), from.begin(), from.end());
|
||||
auto middle = to.begin() + old_size;
|
||||
std::inplace_merge(to.begin(), middle, to.end(), comp);
|
||||
|
||||
auto last_unique = std::unique(to.begin(), to.end());
|
||||
to.erase(last_unique, to.end());
|
||||
}
|
||||
|
||||
std::optional<LineReader::Suggest::WordsRange> LineReader::Suggest::getCompletions(const String & prefix, size_t prefix_length) const
|
||||
{
|
||||
if (!ready)
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
replxx::Replxx::completions_t LineReader::Suggest::getCompletions(const String & prefix, size_t prefix_length)
|
||||
{
|
||||
std::string_view last_word;
|
||||
|
||||
auto last_word_pos = prefix.find_last_of(word_break_characters);
|
||||
@ -48,21 +74,45 @@ std::optional<LineReader::Suggest::WordsRange> LineReader::Suggest::getCompletio
|
||||
last_word = prefix;
|
||||
else
|
||||
last_word = std::string_view(prefix).substr(last_word_pos + 1, std::string::npos);
|
||||
|
||||
/// last_word can be empty.
|
||||
|
||||
std::pair<Words::const_iterator, Words::const_iterator> range;
|
||||
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
/// Only perform case sensitive completion when the prefix string contains any uppercase characters
|
||||
if (std::none_of(prefix.begin(), prefix.end(), [&](auto c) { return c >= 'A' && c <= 'Z'; }))
|
||||
return std::equal_range(
|
||||
if (std::none_of(prefix.begin(), prefix.end(), [](char32_t x) { return iswupper(static_cast<wint_t>(x)); }))
|
||||
range = std::equal_range(
|
||||
words_no_case.begin(), words_no_case.end(), last_word, [prefix_length](std::string_view s, std::string_view prefix_searched)
|
||||
{
|
||||
return strncasecmp(s.data(), prefix_searched.data(), prefix_length) < 0;
|
||||
});
|
||||
else
|
||||
return std::equal_range(words.begin(), words.end(), last_word, [prefix_length](std::string_view s, std::string_view prefix_searched)
|
||||
range = std::equal_range(words.begin(), words.end(), last_word, [prefix_length](std::string_view s, std::string_view prefix_searched)
|
||||
{
|
||||
return strncmp(s.data(), prefix_searched.data(), prefix_length) < 0;
|
||||
});
|
||||
|
||||
return replxx::Replxx::completions_t(range.first, range.second);
|
||||
}
|
||||
|
||||
void LineReader::Suggest::addWords(Words && new_words)
|
||||
{
|
||||
Words new_words_no_case = new_words;
|
||||
if (!new_words.empty())
|
||||
{
|
||||
std::sort(new_words.begin(), new_words.end());
|
||||
std::sort(new_words_no_case.begin(), new_words_no_case.end(), NoCaseCompare{});
|
||||
}
|
||||
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
addNewWords(words, new_words, std::less<std::string>{});
|
||||
addNewWords(words_no_case, new_words_no_case, NoCaseCompare{});
|
||||
}
|
||||
|
||||
assert(std::is_sorted(words.begin(), words.end()));
|
||||
assert(std::is_sorted(words_no_case.begin(), words_no_case.end(), NoCaseCompare{}));
|
||||
}
|
||||
|
||||
LineReader::LineReader(const String & history_file_path_, bool multiline_, Patterns extenders_, Patterns delimiters_)
|
||||
|
@ -1,10 +1,12 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/types.h>
|
||||
|
||||
#include <mutex>
|
||||
#include <atomic>
|
||||
#include <vector>
|
||||
#include <optional>
|
||||
#include <replxx.hxx>
|
||||
|
||||
#include <base/types.h>
|
||||
|
||||
class LineReader
|
||||
{
|
||||
@ -12,14 +14,16 @@ public:
|
||||
struct Suggest
|
||||
{
|
||||
using Words = std::vector<std::string>;
|
||||
using WordsRange = std::pair<Words::const_iterator, Words::const_iterator>;
|
||||
|
||||
/// Get vector for the matched range of words if any.
|
||||
replxx::Replxx::completions_t getCompletions(const String & prefix, size_t prefix_length);
|
||||
void addWords(Words && new_words);
|
||||
|
||||
private:
|
||||
Words words;
|
||||
Words words_no_case;
|
||||
std::atomic<bool> ready{false};
|
||||
|
||||
/// Get iterators for the matched range of words if any.
|
||||
std::optional<WordsRange> getCompletions(const String & prefix, size_t prefix_length) const;
|
||||
std::mutex mutex;
|
||||
};
|
||||
|
||||
using Patterns = std::vector<const char *>;
|
||||
|
@ -25,13 +25,6 @@ void trim(String & s)
|
||||
s.erase(std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !std::isspace(ch); }).base(), s.end());
|
||||
}
|
||||
|
||||
/// Check if string ends with given character after skipping whitespaces.
|
||||
bool ends_with(const std::string_view & s, const std::string_view & p)
|
||||
{
|
||||
auto ss = std::string_view(s.data(), s.rend() - std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !std::isspace(ch); }));
|
||||
return ss.ends_with(p);
|
||||
}
|
||||
|
||||
std::string getEditor()
|
||||
{
|
||||
const char * editor = std::getenv("EDITOR");
|
||||
@ -132,8 +125,14 @@ void convertHistoryFile(const std::string & path, replxx::Replxx & rx)
|
||||
|
||||
}
|
||||
|
||||
static bool replxx_last_is_delimiter = false;
|
||||
void ReplxxLineReader::setLastIsDelimiter(bool flag)
|
||||
{
|
||||
replxx_last_is_delimiter = flag;
|
||||
}
|
||||
|
||||
ReplxxLineReader::ReplxxLineReader(
|
||||
const Suggest & suggest,
|
||||
Suggest & suggest,
|
||||
const String & history_file_path_,
|
||||
bool multiline_,
|
||||
Patterns extenders_,
|
||||
@ -179,14 +178,13 @@ ReplxxLineReader::ReplxxLineReader(
|
||||
|
||||
auto callback = [&suggest] (const String & context, size_t context_size)
|
||||
{
|
||||
if (auto range = suggest.getCompletions(context, context_size))
|
||||
return Replxx::completions_t(range->first, range->second);
|
||||
return Replxx::completions_t();
|
||||
return suggest.getCompletions(context, context_size);
|
||||
};
|
||||
|
||||
rx.set_completion_callback(callback);
|
||||
rx.set_complete_on_empty(false);
|
||||
rx.set_word_break_characters(word_break_characters);
|
||||
rx.set_ignore_case(true);
|
||||
|
||||
if (highlighter)
|
||||
rx.set_highlighter_callback(highlighter);
|
||||
@ -198,21 +196,11 @@ ReplxxLineReader::ReplxxLineReader(
|
||||
|
||||
auto commit_action = [this](char32_t code)
|
||||
{
|
||||
std::string_view str = rx.get_state().text();
|
||||
|
||||
/// Always commit line when we see extender at the end. It will start a new prompt.
|
||||
for (const auto * extender : extenders)
|
||||
if (ends_with(str, extender))
|
||||
return rx.invoke(Replxx::ACTION::COMMIT_LINE, code);
|
||||
|
||||
/// If we see an delimiter at the end, commit right away.
|
||||
for (const auto * delimiter : delimiters)
|
||||
if (ends_with(str, delimiter))
|
||||
return rx.invoke(Replxx::ACTION::COMMIT_LINE, code);
|
||||
|
||||
/// If we allow multiline and there is already something in the input, start a newline.
|
||||
if (multiline && !input.empty())
|
||||
/// NOTE: Lexer is only available if we use highlighter.
|
||||
if (highlighter && multiline && !replxx_last_is_delimiter)
|
||||
return rx.invoke(Replxx::ACTION::NEW_LINE, code);
|
||||
replxx_last_is_delimiter = false;
|
||||
return rx.invoke(Replxx::ACTION::COMMIT_LINE, code);
|
||||
};
|
||||
/// bind C-j to ENTER action.
|
||||
|
@ -9,7 +9,7 @@ class ReplxxLineReader : public LineReader
|
||||
{
|
||||
public:
|
||||
ReplxxLineReader(
|
||||
const Suggest & suggest,
|
||||
Suggest & suggest,
|
||||
const String & history_file_path,
|
||||
bool multiline,
|
||||
Patterns extenders_,
|
||||
@ -19,6 +19,9 @@ public:
|
||||
|
||||
void enableBracketedPaste() override;
|
||||
|
||||
/// If highlight is on, we will set a flag to denote whether the last token is a delimiter.
|
||||
/// This is useful to determine the behavior of <ENTER> key when multiline is enabled.
|
||||
static void setLastIsDelimiter(bool flag);
|
||||
private:
|
||||
InputStatus readOneLine(const String & prompt) override;
|
||||
void addToHistory(const String & line) override;
|
||||
|
@ -1,26 +1,42 @@
|
||||
#pragma once
|
||||
|
||||
#include <pdqsort.h>
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wold-style-cast"
|
||||
|
||||
#include <miniselect/floyd_rivest_select.h>
|
||||
|
||||
template <class RandomIt>
|
||||
template <typename RandomIt>
|
||||
void nth_element(RandomIt first, RandomIt nth, RandomIt last)
|
||||
{
|
||||
::miniselect::floyd_rivest_select(first, nth, last);
|
||||
}
|
||||
|
||||
template <class RandomIt>
|
||||
template <typename RandomIt>
|
||||
void partial_sort(RandomIt first, RandomIt middle, RandomIt last)
|
||||
{
|
||||
::miniselect::floyd_rivest_partial_sort(first, middle, last);
|
||||
}
|
||||
|
||||
template <class RandomIt, class Compare>
|
||||
template <typename RandomIt, typename Compare>
|
||||
void partial_sort(RandomIt first, RandomIt middle, RandomIt last, Compare compare)
|
||||
{
|
||||
::miniselect::floyd_rivest_partial_sort(first, middle, last, compare);
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
template <typename RandomIt, typename Compare>
|
||||
void sort(RandomIt first, RandomIt last, Compare compare)
|
||||
{
|
||||
::pdqsort(first, last, compare);
|
||||
}
|
||||
|
||||
template <typename RandomIt>
|
||||
void sort(RandomIt first, RandomIt last)
|
||||
{
|
||||
using value_type = typename std::iterator_traits<RandomIt>::value_type;
|
||||
using comparator = std::less<value_type>;
|
||||
::pdqsort(first, last, comparator());
|
||||
}
|
||||
|
@ -24,14 +24,10 @@ find_package(Threads REQUIRED)
|
||||
|
||||
include (cmake/find/cxx.cmake)
|
||||
|
||||
add_library(global-group INTERFACE)
|
||||
|
||||
target_link_libraries(global-group INTERFACE
|
||||
$<TARGET_PROPERTY:global-libs,INTERFACE_LINK_LIBRARIES>
|
||||
)
|
||||
|
||||
link_libraries(global-group)
|
||||
|
||||
# FIXME: remove when all contribs will get custom cmake lists
|
||||
install(
|
||||
TARGETS global-group global-libs
|
||||
|
@ -25,14 +25,10 @@ find_package(Threads REQUIRED)
|
||||
include (cmake/find/unwind.cmake)
|
||||
include (cmake/find/cxx.cmake)
|
||||
|
||||
add_library(global-group INTERFACE)
|
||||
|
||||
target_link_libraries(global-group INTERFACE
|
||||
$<TARGET_PROPERTY:global-libs,INTERFACE_LINK_LIBRARIES>
|
||||
)
|
||||
|
||||
link_libraries(global-group)
|
||||
|
||||
# FIXME: remove when all contribs will get custom cmake lists
|
||||
install(
|
||||
TARGETS global-group global-libs
|
||||
|
@ -45,15 +45,12 @@ endif ()
|
||||
include (cmake/find/unwind.cmake)
|
||||
include (cmake/find/cxx.cmake)
|
||||
|
||||
add_library(global-group INTERFACE)
|
||||
target_link_libraries(global-group INTERFACE
|
||||
-Wl,--start-group
|
||||
$<TARGET_PROPERTY:global-libs,INTERFACE_LINK_LIBRARIES>
|
||||
-Wl,--end-group
|
||||
)
|
||||
|
||||
link_libraries(global-group)
|
||||
|
||||
# FIXME: remove when all contribs will get custom cmake lists
|
||||
install(
|
||||
TARGETS global-group global-libs
|
||||
|
2
contrib/CMakeLists.txt
vendored
2
contrib/CMakeLists.txt
vendored
@ -140,6 +140,8 @@ if (ENABLE_NLP)
|
||||
add_contrib (libstemmer-c-cmake libstemmer_c)
|
||||
add_contrib (wordnet-blast-cmake wordnet-blast)
|
||||
add_contrib (lemmagen-c-cmake lemmagen-c)
|
||||
add_contrib (nlp-data-cmake nlp-data)
|
||||
add_contrib (cld2-cmake cld2)
|
||||
endif()
|
||||
|
||||
add_contrib (sqlite-cmake sqlite-amalgamation)
|
||||
|
1
contrib/cld2
vendored
Submodule
1
contrib/cld2
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit bc6d493a2f64ed1fc1c4c4b4294a542a04e04217
|
33
contrib/cld2-cmake/CMakeLists.txt
Normal file
33
contrib/cld2-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,33 @@
|
||||
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/cld2")
|
||||
|
||||
set (SRCS
|
||||
"${LIBRARY_DIR}/internal/cldutil.cc"
|
||||
"${LIBRARY_DIR}/internal/compact_lang_det.cc"
|
||||
"${LIBRARY_DIR}/internal/cldutil_shared.cc"
|
||||
"${LIBRARY_DIR}/internal/compact_lang_det_hint_code.cc"
|
||||
"${LIBRARY_DIR}/internal/compact_lang_det_impl.cc"
|
||||
"${LIBRARY_DIR}/internal/debug.cc"
|
||||
"${LIBRARY_DIR}/internal/fixunicodevalue.cc"
|
||||
"${LIBRARY_DIR}/internal/generated_entities.cc"
|
||||
"${LIBRARY_DIR}/internal/generated_language.cc"
|
||||
"${LIBRARY_DIR}/internal/generated_ulscript.cc"
|
||||
"${LIBRARY_DIR}/internal/getonescriptspan.cc"
|
||||
"${LIBRARY_DIR}/internal/lang_script.cc"
|
||||
"${LIBRARY_DIR}/internal/offsetmap.cc"
|
||||
"${LIBRARY_DIR}/internal/scoreonescriptspan.cc"
|
||||
"${LIBRARY_DIR}/internal/tote.cc"
|
||||
"${LIBRARY_DIR}/internal/utf8statetable.cc"
|
||||
"${LIBRARY_DIR}/internal/cld_generated_cjk_uni_prop_80.cc"
|
||||
"${LIBRARY_DIR}/internal/cld2_generated_cjk_compatible.cc"
|
||||
"${LIBRARY_DIR}/internal/cld_generated_cjk_delta_bi_4.cc"
|
||||
"${LIBRARY_DIR}/internal/generated_distinct_bi_0.cc"
|
||||
"${LIBRARY_DIR}/internal/cld2_generated_quadchrome_2.cc"
|
||||
"${LIBRARY_DIR}/internal/cld2_generated_deltaoctachrome.cc"
|
||||
"${LIBRARY_DIR}/internal/cld2_generated_distinctoctachrome.cc"
|
||||
"${LIBRARY_DIR}/internal/cld_generated_score_quad_octa_2.cc"
|
||||
)
|
||||
add_library(_cld2 ${SRCS})
|
||||
set_property(TARGET _cld2 PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||
target_compile_options (_cld2 PRIVATE -Wno-reserved-id-macro -Wno-c++11-narrowing)
|
||||
target_include_directories(_cld2 SYSTEM BEFORE PUBLIC "${LIBRARY_DIR}/public")
|
||||
add_library(ch_contrib::cld2 ALIAS _cld2)
|
2
contrib/lz4
vendored
2
contrib/lz4
vendored
@ -1 +1 @@
|
||||
Subproject commit f39b79fb02962a1cd880bbdecb6dffba4f754a11
|
||||
Subproject commit 4c9431e9af596af0556e5da0ae99305bafb2b10b
|
1
contrib/nlp-data
vendored
Submodule
1
contrib/nlp-data
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 5591f91f5e748cba8fb9ef81564176feae774853
|
15
contrib/nlp-data-cmake/CMakeLists.txt
Normal file
15
contrib/nlp-data-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,15 @@
|
||||
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
|
||||
|
||||
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/nlp-data")
|
||||
|
||||
add_library (_nlp_data INTERFACE)
|
||||
|
||||
clickhouse_embed_binaries(
|
||||
TARGET nlp_dictionaries
|
||||
RESOURCE_DIR "${LIBRARY_DIR}"
|
||||
RESOURCES charset.zst tonality_ru.zst programming.zst
|
||||
)
|
||||
|
||||
add_dependencies(_nlp_data nlp_dictionaries)
|
||||
target_link_libraries(_nlp_data INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:nlp_dictionaries> -Wl,${NO_WHOLE_ARCHIVE}")
|
||||
add_library(ch_contrib::nlp_data ALIAS _nlp_data)
|
2
contrib/orc
vendored
2
contrib/orc
vendored
@ -1 +1 @@
|
||||
Subproject commit 0a936f6bbdb9303308973073f8623b5a8d82eae1
|
||||
Subproject commit f9a393ed2433a60034795284f82d093b348f2102
|
2
contrib/replxx
vendored
2
contrib/replxx
vendored
@ -1 +1 @@
|
||||
Subproject commit f019cba7ea1bcd1b4feb7826f28ed57fb581b04c
|
||||
Subproject commit c745b3fb012ee5ae762fbc8cd7a40c4dc3fe15df
|
@ -65,7 +65,12 @@ do
|
||||
# check if variable not empty
|
||||
[ -z "$dir" ] && continue
|
||||
# ensure directories exist
|
||||
if ! mkdir -p "$dir"; then
|
||||
if [ "$DO_CHOWN" = "1" ]; then
|
||||
mkdir="mkdir"
|
||||
else
|
||||
mkdir="$gosu mkdir"
|
||||
fi
|
||||
if ! $mkdir -p "$dir"; then
|
||||
echo "Couldn't create necessary directory: $dir"
|
||||
exit 1
|
||||
fi
|
||||
|
@ -125,10 +125,6 @@ For installing CMake and Ninja on Mac OS X first install Homebrew and then insta
|
||||
|
||||
Next, check the version of CMake: `cmake --version`. If it is below 3.12, you should install a newer version from the website: https://cmake.org/download/.
|
||||
|
||||
## Optional External Libraries {#optional-external-libraries}
|
||||
|
||||
ClickHouse uses several external libraries for building. All of them do not need to be installed separately as they are built together with ClickHouse from the sources located in the submodules. You can check the list in `contrib`.
|
||||
|
||||
## C++ Compiler {#c-compiler}
|
||||
|
||||
Compilers Clang starting from version 11 is supported for building ClickHouse.
|
||||
|
@ -78,15 +78,21 @@ When working with the `MaterializedMySQL` database engine, [ReplacingMergeTree](
|
||||
| DATE, NEWDATE | [Date](../../sql-reference/data-types/date.md) |
|
||||
| DATETIME, TIMESTAMP | [DateTime](../../sql-reference/data-types/datetime.md) |
|
||||
| DATETIME2, TIMESTAMP2 | [DateTime64](../../sql-reference/data-types/datetime64.md) |
|
||||
| YEAR | [UInt16](../../sql-reference/data-types/int-uint.md) |
|
||||
| TIME | [Int64](../../sql-reference/data-types/int-uint.md) |
|
||||
| ENUM | [Enum](../../sql-reference/data-types/enum.md) |
|
||||
| STRING | [String](../../sql-reference/data-types/string.md) |
|
||||
| VARCHAR, VAR_STRING | [String](../../sql-reference/data-types/string.md) |
|
||||
| BLOB | [String](../../sql-reference/data-types/string.md) |
|
||||
| GEOMETRY | [String](../../sql-reference/data-types/string.md) |
|
||||
| BINARY | [FixedString](../../sql-reference/data-types/fixedstring.md) |
|
||||
| BIT | [UInt64](../../sql-reference/data-types/int-uint.md) |
|
||||
| SET | [UInt64](../../sql-reference/data-types/int-uint.md) |
|
||||
|
||||
[Nullable](../../sql-reference/data-types/nullable.md) is supported.
|
||||
|
||||
The data of TIME type in MySQL is converted to microseconds in ClickHouse.
|
||||
|
||||
Other types are not supported. If MySQL table contains a column of such type, ClickHouse throws exception "Unhandled data type" and stops replication.
|
||||
|
||||
## Specifics and Recommendations {#specifics-and-recommendations}
|
||||
|
@ -97,13 +97,16 @@ Structure of the `patterns` section:
|
||||
|
||||
``` text
|
||||
pattern
|
||||
rule_type
|
||||
regexp
|
||||
function
|
||||
pattern
|
||||
rule_type
|
||||
regexp
|
||||
age + precision
|
||||
...
|
||||
pattern
|
||||
rule_type
|
||||
regexp
|
||||
function
|
||||
age + precision
|
||||
@ -127,12 +130,20 @@ When processing a row, ClickHouse checks the rules in the `pattern` sections. Ea
|
||||
|
||||
Fields for `pattern` and `default` sections:
|
||||
|
||||
- `regexp`– A pattern for the metric name.
|
||||
- `rule_type` - a rule's type. It's applied only to a particular metrics. The engine use it to separate plain and tagged metrics. Optional parameter. Default value: `all`.
|
||||
It's unnecessary when performance is not critical, or only one metrics type is used, e.g. plain metrics. By default only one type of rules set is created. Otherwise, if any of special types is defined, two different sets are created. One for plain metrics (root.branch.leaf) and one for tagged metrics (root.branch.leaf;tag1=value1).
|
||||
The default rules are ended up in both sets.
|
||||
Valid values:
|
||||
- `all` (default) - a universal rule, used when `rule_type` is omitted.
|
||||
- `plain` - a rule for plain metrics. The field `regexp` is processed as regular expression.
|
||||
- `tagged` - a rule for tagged metrics (metrics are stored in DB in the format of `someName?tag1=value1&tag2=value2&tag3=value3`). Regular expression must be sorted by tags' names, first tag must be `__name__` if exists. The field `regexp` is processed as regular expression.
|
||||
- `tag_list` - a rule for tagged matrics, a simple DSL for easier metric description in graphite format `someName;tag1=value1;tag2=value2`, `someName`, or `tag1=value1;tag2=value2`. The field `regexp` is translated into a `tagged` rule. The sorting by tags' names is unnecessary, ti will be done automatically. A tag's value (but not a name) can be set as a regular expression, e.g. `env=(dev|staging)`.
|
||||
- `regexp` – A pattern for the metric name (a regular or DSL).
|
||||
- `age` – The minimum age of the data in seconds.
|
||||
- `precision`– How precisely to define the age of the data in seconds. Should be a divisor for 86400 (seconds in a day).
|
||||
- `function` – The name of the aggregating function to apply to data whose age falls within the range `[age, age + precision]`. Accepted functions: min / max / any / avg. The average is calculated imprecisely, like the average of the averages.
|
||||
|
||||
### Configuration Example {#configuration-example}
|
||||
### Configuration Example without rules types {#configuration-example}
|
||||
|
||||
``` xml
|
||||
<graphite_rollup>
|
||||
@ -167,6 +178,81 @@ Fields for `pattern` and `default` sections:
|
||||
</graphite_rollup>
|
||||
```
|
||||
|
||||
### Configuration Example with rules types {#configuration-typed-example}
|
||||
|
||||
``` xml
|
||||
<graphite_rollup>
|
||||
<version_column_name>Version</version_column_name>
|
||||
<pattern>
|
||||
<rule_type>plain</rule_type>
|
||||
<regexp>click_cost</regexp>
|
||||
<function>any</function>
|
||||
<retention>
|
||||
<age>0</age>
|
||||
<precision>5</precision>
|
||||
</retention>
|
||||
<retention>
|
||||
<age>86400</age>
|
||||
<precision>60</precision>
|
||||
</retention>
|
||||
</pattern>
|
||||
<pattern>
|
||||
<rule_type>tagged</rule_type>
|
||||
<regexp>^((.*)|.)min\?</regexp>
|
||||
<function>min</function>
|
||||
<retention>
|
||||
<age>0</age>
|
||||
<precision>5</precision>
|
||||
</retention>
|
||||
<retention>
|
||||
<age>86400</age>
|
||||
<precision>60</precision>
|
||||
</retention>
|
||||
</pattern>
|
||||
<pattern>
|
||||
<rule_type>tagged</rule_type>
|
||||
<regexp><![CDATA[^someName\?(.*&)*tag1=value1(&|$)]]></regexp>
|
||||
<function>min</function>
|
||||
<retention>
|
||||
<age>0</age>
|
||||
<precision>5</precision>
|
||||
</retention>
|
||||
<retention>
|
||||
<age>86400</age>
|
||||
<precision>60</precision>
|
||||
</retention>
|
||||
</pattern>
|
||||
<pattern>
|
||||
<rule_type>tag_list</rule_type>
|
||||
<regexp>someName;tag2=value2</regexp>
|
||||
<retention>
|
||||
<age>0</age>
|
||||
<precision>5</precision>
|
||||
</retention>
|
||||
<retention>
|
||||
<age>86400</age>
|
||||
<precision>60</precision>
|
||||
</retention>
|
||||
</pattern>
|
||||
<default>
|
||||
<function>max</function>
|
||||
<retention>
|
||||
<age>0</age>
|
||||
<precision>60</precision>
|
||||
</retention>
|
||||
<retention>
|
||||
<age>3600</age>
|
||||
<precision>300</precision>
|
||||
</retention>
|
||||
<retention>
|
||||
<age>86400</age>
|
||||
<precision>3600</precision>
|
||||
</retention>
|
||||
</default>
|
||||
</graphite_rollup>
|
||||
```
|
||||
|
||||
|
||||
!!! warning "Warning"
|
||||
Data rollup is performed during merges. Usually, for old partitions, merges are not started, so for rollup it is necessary to trigger an unscheduled merge using [optimize](../../../sql-reference/statements/optimize.md). Or use additional tools, for example [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer).
|
||||
|
||||
|
@ -54,10 +54,8 @@ If the set of columns in the Buffer table does not match the set of columns in a
|
||||
If the types do not match for one of the columns in the Buffer table and a subordinate table, an error message is entered in the server log, and the buffer is cleared.
|
||||
The same thing happens if the subordinate table does not exist when the buffer is flushed.
|
||||
|
||||
If you need to run ALTER for a subordinate table, and the Buffer table, we recommend first deleting the Buffer table, running ALTER for the subordinate table, then creating the Buffer table again.
|
||||
|
||||
!!! attention "Attention"
|
||||
Running ALTER on the Buffer table in releases made before 28 Sep 2020 will cause a `Block structure mismatch` error (see [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117)), so deleting the Buffer table and then recreating is the only option. It is advisable to check that this error is fixed in your release before trying to run ALTER on the Buffer table.
|
||||
Running ALTER on the Buffer table in releases made before 26 Oct 2021 will cause a `Block structure mismatch` error (see [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117) and [#30565](https://github.com/ClickHouse/ClickHouse/pull/30565)), so deleting the Buffer table and then recreating is the only option. It is advisable to check that this error is fixed in your release before trying to run ALTER on the Buffer table.
|
||||
|
||||
If the server is restarted abnormally, the data in the buffer is lost.
|
||||
|
||||
|
@ -25,6 +25,7 @@ Categories:
|
||||
- **[Operations](../faq/operations/index.md)**
|
||||
- [Which ClickHouse version to use in production?](../faq/operations/production.md)
|
||||
- [Is it possible to delete old records from a ClickHouse table?](../faq/operations/delete-old-data.md)
|
||||
- [Does ClickHouse support multi-region replication?](../faq/operations/multi-region-replication.md)
|
||||
- **[Integration](../faq/integration/index.md)**
|
||||
- [How do I export data from ClickHouse to a file?](../faq/integration/file-export.md)
|
||||
- [What if I have a problem with encodings when connecting to Oracle via ODBC?](../faq/integration/oracle-odbc.md)
|
||||
|
@ -23,11 +23,13 @@ Web UI can be accessed here: `http://localhost:8123/play`.
|
||||
![Web UI](../images/play.png)
|
||||
|
||||
|
||||
In health-check scripts use `GET /ping` request. This handler always returns “Ok.” (with a line feed at the end). Available from version 18.12.13.
|
||||
In health-check scripts use `GET /ping` request. This handler always returns “Ok.” (with a line feed at the end). Available from version 18.12.13. See also `/replicas_status` to check replica's delay.
|
||||
|
||||
``` bash
|
||||
$ curl 'http://localhost:8123/ping'
|
||||
Ok.
|
||||
$ curl 'http://localhost:8123/replicas_status'
|
||||
Ok.
|
||||
```
|
||||
|
||||
Send the request as a URL ‘query’ parameter, or as a POST. Or send the beginning of the query in the ‘query’ parameter, and the rest in the POST (we’ll explain later why this is necessary). The size of the URL is limited to 16 KB, so keep this in mind when sending large queries.
|
||||
|
@ -27,6 +27,7 @@ toc_title: Client Libraries
|
||||
- Go
|
||||
- [clickhouse](https://github.com/kshvakov/clickhouse/)
|
||||
- [go-clickhouse](https://github.com/roistat/go-clickhouse)
|
||||
- [chconn](https://github.com/vahid-sohrabloo/chconn)
|
||||
- [mailrugo-clickhouse](https://github.com/mailru/go-clickhouse)
|
||||
- [golang-clickhouse](https://github.com/leprosus/golang-clickhouse)
|
||||
- Swift
|
||||
|
@ -159,8 +159,7 @@ Configuration fields:
|
||||
| Tag | Description | Required |
|
||||
|------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|
|
||||
| `name` | Column name. | Yes |
|
||||
| `type` | ClickHouse data type: [UInt8](../../../sql-reference/data-types/int-uint.md), [UInt16](../../../sql-reference/data-types/int-uint.md), [UInt32](../../../sql-reference/data-types/int-uint.md), [UInt64](../../../sql-reference/data-types/int-uint.md), [Int8](../../../sql-reference/data-types/int-uint.md), [Int16](../../../sql-reference/data-types/int-uint.md), [Int32](../../../sql-reference/data-types/int-uint.md), [Int64](../../../sql-reference/data-types/int-uint.md), [Float32](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md), [UUID](../../../sql-reference/data-types/uuid.md), [Decimal32](../../../sql-reference/data-types/decimal.md), [Decimal64](../../../sql-reference/data-types/decimal.md), [Decimal128](../../../sql-reference/data-types/decimal.md), [Decimal256](../../../sql-reference/data-types/decimal.md),
|
||||
[Date](../../../sql-reference/data-types/date.md), [Date32](../../../sql-reference/data-types/date32.md), [DateTime](../../../sql-reference/data-types/datetime.md), [DateTime64](../../../sql-reference/data-types/datetime64.md), [String](../../../sql-reference/data-types/string.md), [Array](../../../sql-reference/data-types/array.md).<br/>ClickHouse tries to cast value from dictionary to the specified data type. For example, for MySQL, the field might be `TEXT`, `VARCHAR`, or `BLOB` in the MySQL source table, but it can be uploaded as `String` in ClickHouse.<br/>[Nullable](../../../sql-reference/data-types/nullable.md) is currently supported for [Flat](external-dicts-dict-layout.md#flat), [Hashed](external-dicts-dict-layout.md#dicts-external_dicts_dict_layout-hashed), [ComplexKeyHashed](external-dicts-dict-layout.md#complex-key-hashed), [Direct](external-dicts-dict-layout.md#direct), [ComplexKeyDirect](external-dicts-dict-layout.md#complex-key-direct), [RangeHashed](external-dicts-dict-layout.md#range-hashed), [Polygon](external-dicts-dict-polygon.md), [Cache](external-dicts-dict-layout.md#cache), [ComplexKeyCache](external-dicts-dict-layout.md#complex-key-cache), [SSDCache](external-dicts-dict-layout.md#ssd-cache), [SSDComplexKeyCache](external-dicts-dict-layout.md#complex-key-ssd-cache) dictionaries. In [IPTrie](external-dicts-dict-layout.md#ip-trie) dictionaries `Nullable` types are not supported. | Yes |
|
||||
| `type` | ClickHouse data type: [UInt8](../../../sql-reference/data-types/int-uint.md), [UInt16](../../../sql-reference/data-types/int-uint.md), [UInt32](../../../sql-reference/data-types/int-uint.md), [UInt64](../../../sql-reference/data-types/int-uint.md), [Int8](../../../sql-reference/data-types/int-uint.md), [Int16](../../../sql-reference/data-types/int-uint.md), [Int32](../../../sql-reference/data-types/int-uint.md), [Int64](../../../sql-reference/data-types/int-uint.md), [Float32](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md), [UUID](../../../sql-reference/data-types/uuid.md), [Decimal32](../../../sql-reference/data-types/decimal.md), [Decimal64](../../../sql-reference/data-types/decimal.md), [Decimal128](../../../sql-reference/data-types/decimal.md), [Decimal256](../../../sql-reference/data-types/decimal.md),[Date](../../../sql-reference/data-types/date.md), [Date32](../../../sql-reference/data-types/date32.md), [DateTime](../../../sql-reference/data-types/datetime.md), [DateTime64](../../../sql-reference/data-types/datetime64.md), [String](../../../sql-reference/data-types/string.md), [Array](../../../sql-reference/data-types/array.md).<br/>ClickHouse tries to cast value from dictionary to the specified data type. For example, for MySQL, the field might be `TEXT`, `VARCHAR`, or `BLOB` in the MySQL source table, but it can be uploaded as `String` in ClickHouse.<br/>[Nullable](../../../sql-reference/data-types/nullable.md) is currently supported for [Flat](external-dicts-dict-layout.md#flat), [Hashed](external-dicts-dict-layout.md#dicts-external_dicts_dict_layout-hashed), [ComplexKeyHashed](external-dicts-dict-layout.md#complex-key-hashed), [Direct](external-dicts-dict-layout.md#direct), [ComplexKeyDirect](external-dicts-dict-layout.md#complex-key-direct), [RangeHashed](external-dicts-dict-layout.md#range-hashed), [Polygon](external-dicts-dict-polygon.md), [Cache](external-dicts-dict-layout.md#cache), [ComplexKeyCache](external-dicts-dict-layout.md#complex-key-cache), [SSDCache](external-dicts-dict-layout.md#ssd-cache), [SSDComplexKeyCache](external-dicts-dict-layout.md#complex-key-ssd-cache) dictionaries. In [IPTrie](external-dicts-dict-layout.md#ip-trie) dictionaries `Nullable` types are not supported. | Yes |
|
||||
| `null_value` | Default value for a non-existing element.<br/>In the example, it is an empty string. [NULL](../../syntax.md#null-literal) value can be used only for the `Nullable` types (see the previous line with types description). | Yes |
|
||||
| `expression` | [Expression](../../../sql-reference/syntax.md#syntax-expressions) that ClickHouse executes on the value.<br/>The expression can be a column name in the remote SQL database. Thus, you can use it to create an alias for the remote column.<br/><br/>Default value: no expression. | No |
|
||||
| <a name="hierarchical-dict-attr"></a> `hierarchical` | If `true`, the attribute contains the value of a parent key for the current key. See [Hierarchical Dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md).<br/><br/>Default value: `false`. | No |
|
||||
|
@ -252,7 +252,6 @@ CREATE TABLE codec_example
|
||||
ENGINE = MergeTree()
|
||||
```
|
||||
|
||||
|
||||
### Encryption Codecs {#create-query-encryption-codecs}
|
||||
|
||||
These codecs don't actually compress data, but instead encrypt data on disk. These are only available when an encryption key is specified by [encryption](../../../operations/server-configuration-parameters/settings.md#server-settings-encryption) settings. Note that encryption only makes sense at the end of codec pipelines, because encrypted data usually can't be compressed in any meaningful way.
|
||||
@ -260,6 +259,7 @@ These codecs don't actually compress data, but instead encrypt data on disk. The
|
||||
Encryption codecs:
|
||||
|
||||
- `CODEC('AES-128-GCM-SIV')` — Encrypts data with AES-128 in [RFC 8452](https://tools.ietf.org/html/rfc8452) GCM-SIV mode.
|
||||
|
||||
- `CODEC('AES-256-GCM-SIV')` — Encrypts data with AES-256 in GCM-SIV mode.
|
||||
|
||||
These codecs use a fixed nonce and encryption is therefore deterministic. This makes it compatible with deduplicating engines such as [ReplicatedMergeTree](../../../engines/table-engines/mergetree-family/replication.md) but has a weakness: when the same data block is encrypted twice, the resulting ciphertext will be exactly the same so an adversary who can read the disk can see this equivalence (although only the equivalence, without getting its content).
|
||||
@ -269,7 +269,7 @@ These codecs use a fixed nonce and encryption is therefore deterministic. This m
|
||||
|
||||
!!! attention "Attention"
|
||||
If you perform a SELECT query mentioning a specific value in an encrypted column (such as in its WHERE clause), the value may appear in [system.query_log](../../../operations/system-tables/query_log.md). You may want to disable the logging.
|
||||
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
|
@ -43,7 +43,7 @@ User host is a host from which a connection to ClickHouse server could be establ
|
||||
- `HOST ANY` — User can connect from any location. This is a default option.
|
||||
- `HOST LOCAL` — User can connect only locally.
|
||||
- `HOST NAME 'fqdn'` — User host can be specified as FQDN. For example, `HOST NAME 'mysite.com'`.
|
||||
- `HOST NAME REGEXP 'regexp'` — You can use [pcre](http://www.pcre.org/) regular expressions when specifying user hosts. For example, `HOST NAME REGEXP '.*\.mysite\.com'`.
|
||||
- `HOST REGEXP 'regexp'` — You can use [pcre](http://www.pcre.org/) regular expressions when specifying user hosts. For example, `HOST REGEXP '.*\.mysite\.com'`.
|
||||
- `HOST LIKE 'template'` — Allows you to use the [LIKE](../../../sql-reference/functions/string-search-functions.md#function-like) operator to filter the user hosts. For example, `HOST LIKE '%'` is equivalent to `HOST ANY`, `HOST LIKE '%.mysite.com'` filters all the hosts in the `mysite.com` domain.
|
||||
|
||||
Another way of specifying host is to use `@` syntax following the username. Examples:
|
||||
|
@ -72,7 +72,7 @@ Reloads all [CatBoost](../../guides/apply-catboost-model.md#applying-catboost-mo
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
SYSTEM RELOAD MODELS
|
||||
SYSTEM RELOAD MODELS [ON CLUSTER cluster_name]
|
||||
```
|
||||
|
||||
## RELOAD MODEL {#query_language-system-reload-model}
|
||||
@ -82,7 +82,7 @@ Completely reloads a CatBoost model `model_name` if the configuration was update
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
SYSTEM RELOAD MODEL <model_name>
|
||||
SYSTEM RELOAD MODEL [ON CLUSTER cluster_name] <model_name>
|
||||
```
|
||||
|
||||
## RELOAD FUNCTIONS {#query_language-system-reload-functions}
|
||||
@ -92,8 +92,8 @@ Reloads all registered [executable user defined functions](../functions/index.md
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
RELOAD FUNCTIONS
|
||||
RELOAD FUNCTION function_name
|
||||
RELOAD FUNCTIONS [ON CLUSTER cluster_name]
|
||||
RELOAD FUNCTION [ON CLUSTER cluster_name] function_name
|
||||
```
|
||||
|
||||
## DROP DNS CACHE {#query_language-system-drop-dns-cache}
|
||||
|
@ -3,14 +3,14 @@ toc_priority: 53
|
||||
toc_title: USE
|
||||
---
|
||||
|
||||
# USE 语句 {#use}
|
||||
# USE Statement {#use}
|
||||
|
||||
``` sql
|
||||
USE db
|
||||
```
|
||||
|
||||
用于设置会话的当前数据库。
|
||||
Lets you set the current database for the session.
|
||||
|
||||
如果查询语句中没有在表名前面以加点的方式指明数据库名, 则用当前数据库进行搜索。
|
||||
The current database is used for searching for tables if the database is not explicitly defined in the query with a dot before the table name.
|
||||
|
||||
使用 HTTP 协议时无法进行此查询,因为没有会话的概念。
|
||||
This query can’t be made when using the HTTP protocol, since there is no concept of a session.
|
||||
|
BIN
docs/ko/images/column-oriented.gif
Normal file
BIN
docs/ko/images/column-oriented.gif
Normal file
Binary file not shown.
After Width: | Height: | Size: 43 KiB |
1
docs/ko/images/logo.svg
Normal file
1
docs/ko/images/logo.svg
Normal file
@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="54" height="48" markdown="1" viewBox="0 0 9 8"><style>.o{fill:#fc0}.r{fill:red}</style><path d="M0,7 h1 v1 h-1 z" class="r"/><path d="M0,0 h1 v7 h-1 z" class="o"/><path d="M2,0 h1 v8 h-1 z" class="o"/><path d="M4,0 h1 v8 h-1 z" class="o"/><path d="M6,0 h1 v8 h-1 z" class="o"/><path d="M8,3.25 h1 v1.5 h-1 z" class="o"/></svg>
|
After Width: | Height: | Size: 373 B |
BIN
docs/ko/images/play.png
Normal file
BIN
docs/ko/images/play.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 26 KiB |
BIN
docs/ko/images/row-oriented.gif
Normal file
BIN
docs/ko/images/row-oriented.gif
Normal file
Binary file not shown.
After Width: | Height: | Size: 38 KiB |
94
docs/ko/index.md
Normal file
94
docs/ko/index.md
Normal file
@ -0,0 +1,94 @@
|
||||
---
|
||||
toc_priority: 0
|
||||
toc_title: 목차
|
||||
---
|
||||
|
||||
# ClickHouse란? {#what-is-clickhouse}
|
||||
|
||||
ClickHouse® 는 query의 온라인 분석 처리(OLAP)를 위한 열 지향(column-oriented) 데이터베이스 관리 시스템(DBMS)입니다.
|
||||
|
||||
"보통의" 행 지향(row-oriented) DMBS에서는 데이터가 다음과 같은 순서로 저장됩니다.
|
||||
|
||||
| row | WatchID | JavaEnable | Title | GoodEvent | EventTime |
|
||||
|-----|-------------|------------|--------------------|-----------|---------------------|
|
||||
| #0 | 89354350662 | 1 | Investor Relations | 1 | 2016-05-18 05:19:20 |
|
||||
| #1 | 90329509958 | 0 | Contact us | 1 | 2016-05-18 08:10:20 |
|
||||
| #2 | 89953706054 | 1 | Mission | 1 | 2016-05-18 07:38:00 |
|
||||
| #N | … | … | … | … | … |
|
||||
|
||||
즉, 행과 관련된 모든 값들은 물리적으로 나란히 저장됩니다.
|
||||
|
||||
행 지향(row-oriented) DMBS의 예시로는 MySQL, Postgres, 그리고 MS SQL 서버 등이 있습니다.
|
||||
|
||||
열 지향 (column-oriented) DBMS에서는 데이터가 아래와 같은 방식으로 저장됩니다:
|
||||
|
||||
| Row: | #0 | #1 | #2 | #N |
|
||||
|-------------|---------------------|---------------------|---------------------|-----|
|
||||
| WatchID: | 89354350662 | 90329509958 | 89953706054 | … |
|
||||
| JavaEnable: | 1 | 0 | 1 | … |
|
||||
| Title: | Investor Relations | Contact us | Mission | … |
|
||||
| GoodEvent: | 1 | 1 | 1 | … |
|
||||
| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | … |
|
||||
|
||||
이 예에서는 데이터가 정렬된 순서만을 보여줍니다. 다른 열의 값들은 서로 분리되어 저장되고, 같은 열의 정보들은 함께 저장됩니다.
|
||||
|
||||
열 지향(column-oriented) DBMS 의 종류는 Vertica, Paraccel (Actian Matrix and Amazon Redshift), Sybase IQ, Exasol, Infobright, InfiniDB, MonetDB (VectorWise and Actian Vector), LucidDB, SAP HANA, Google Dremel, Google PowerDrill, Druid, 그리고 kdb+ 등이 있습니다.
|
||||
|
||||
데이터를 저장하기 위한 서로 다른 순서는 다른 시나리오에 더 적합합니다. 데이터 접근 시나리오는 쿼리가 수행되는 빈도, 비율 및 비율을 나타내거나, 각 쿼리 유형(행, 열 및 바이트)에 대해 읽은 데이터의 양 데이터 읽기와 업데이트 사이의 관계, 데이터의 작업 크기 및 로컬에서 사용되는 방법 트랜잭션이 사용되는지 여부, 트랜잭션이 얼마나 격리되어 있는지, 데이터 복제 및 논리적 무결성에 대한 요구 사항, 각 쿼리 유형에 대한 대기 시간 및 처리량 요구 사항 등이 있습니다.
|
||||
|
||||
시스템의 부하가 높을수록 사용 시나리오의 요구 사항에 맞게 시스템 설정을 사용자 지정하는 것이 더 중요하며 이 사용자 지정은 더욱 세분화됩니다. 상당히 다른 시나리오에 똑같이 적합한 시스템은 없습니다. 만약 높은 부하에서 시스템이 넓은 시나리오 집합에 대해 적응한다면 시스템은 모든 시나리오를 모두 제대로 처리하지 못하거나 가능한 시나리오 중 하나 또는 몇 개에 대해서만 잘 작동할 것입니다.
|
||||
|
||||
## OLAP 시나리오의 중요 속성들 {#key-properties-of-olap-scenario}
|
||||
|
||||
- 요청(request)의 대부분은 읽기 접근에 관한 것입니다.
|
||||
- 데이터는 단일 행이 아니라 상당히 큰 일괄 처리(\> 1000개 행)로 업데이트됩니다. 또는 전혀 업데이트되지 않습니다.
|
||||
- 데이터는 DB에 추가되지만 수정되지는 않습니다.
|
||||
- 읽기의 경우 DB에서 상당히 많은 수의 행이 추출되지만 열은 일부만 추출됩니다.
|
||||
- 테이블은 "넓습니다". 이는 열의 수가 많다는 것을 의미합니다.
|
||||
- 쿼리는 상대적으로 드뭅니다(일반적으로 서버당 수백 또는 초당 쿼리 미만).
|
||||
- 간단한 쿼리의 경우 약 50ms의 대기 시간이 허용됩니다.
|
||||
- 열 값은 숫자와 짧은 문자열(예: URL당 60바이트)과 같이 상당히 작습니다
|
||||
- 단일 쿼리를 처리할 때 높은 처리량이 필요합니다(서버당 초당 최대 수십억 행).
|
||||
- 트랜잭션이 필요하지 않습니다.
|
||||
- 데이터 일관성에 대한 요구 사항이 낮습니다.
|
||||
- 쿼리당 하나의 큰 테이블이 존재하고 하나를 제외한 모든 테이블은 작습니다.
|
||||
- 쿼리 결과가 원본 데이터보다 훨씬 작습니다. 즉, 데이터가 필터링되거나 집계되므로 결과가 단일 서버의 RAM에 꼭 들어맞습니다.
|
||||
|
||||
OLAP 시나리오가 다른 일반적인 시나리오(OLTP 또는 키-값 액세스와 같은)와 매우 다르다는 것을 쉽게 알 수 있습니다. 따라서 적절한 성능을 얻으려면 분석 쿼리를 처리하기 위해 OLTP 또는 키-값 DB를 사용하는 것은 의미가 없습니다. 예를 들어 분석에 MongoDB나 Redis를 사용하려고 하면 OLAP 데이터베이스에 비해 성능이 매우 저하됩니다.
|
||||
|
||||
## 왜 열 지향 데이터베이스가 OLAP 시나리오에 적합한가{#why-column-oriented-databases-work-better-in-the-olap-scenario}
|
||||
|
||||
열 지향(column-oriented) 데이터베이스는 OLAP 시나리오에 더 적합합니다. 대부분의 쿼리를 처리하는 데 있어서 행 지향(row-oriented) 데이터베이스보다 100배 이상 빠릅니다. 그 이유는 아래에 자세히 설명되어 있지만 사실은 시각적으로 더 쉽게 설명할 수 있습니다.
|
||||
|
||||
**행 지향 DBMS**
|
||||
|
||||
![Row-oriented](images/row-oriented.gif#)
|
||||
|
||||
**열 지향 DBMS**
|
||||
|
||||
![Column-oriented](images/column-oriented.gif#)
|
||||
|
||||
차이가 보이시나요?
|
||||
|
||||
### 입출력 {#inputoutput}
|
||||
|
||||
1. 분석 쿼리의 경우 적은 수의 테이블 열만 읽어야 합니다. 열 지향 데이터베이스에서는 필요한 데이터만 읽을 수 있습니다. 예를 들어 100개 중 5개의 열이 필요한 경우 I/O가 20배 감소할 것으로 예상할 수 있습니다.
|
||||
2. 데이터는 패킷으로 읽히므로 압축하기가 더 쉽습니다. 열의 데이터도 압축하기 쉽습니다. 이것은 I/O의 볼륨을 더욱 감소시킵니다.
|
||||
3. 감소된 I/O로 인해 시스템 캐시에 더 많은 데이터가 들어갑니다.
|
||||
|
||||
예를 들어, "각 광고 플랫폼에 대한 레코드 수 계산" 쿼리는 압축되지 않은 1바이트를 차지하는 하나의 "광고 플랫폼 ID" 열을 읽어야 합니다. 트래픽의 대부분이 광고 플랫폼에서 발생하지 않은 경우 이 열의 최소 10배 압축을 기대할 수 있습니다. 빠른 압축 알고리즘을 사용하면 초당 최소 몇 기가바이트의 압축되지 않은 데이터의 속도로 데이터 압축 해제가 가능합니다. 즉, 이 쿼리는 단일 서버에서 초당 약 수십억 행의 속도로 처리될 수 있습니다. 이 속도는 정말 실제로 달성됩니다.
|
||||
|
||||
### CPU {#cpu}
|
||||
|
||||
쿼리를 수행하려면 많은 행을 처리해야 하므로 별도의 행이 아닌 전체 벡터에 대한 모든 연산을 디스패치하거나 쿼리 엔진을 구현하여 디스패치 비용이 거의 들지 않습니다. 반쯤 괜찮은 디스크 하위 시스템에서 이렇게 하지 않으면 쿼리 인터프리터가 불가피하게 CPU를 정지시킵니다. 데이터를 열에 저장하고 가능한 경우 열별로 처리하는 것이 좋습니다.
|
||||
|
||||
이를 수행하기위한 두가지 방법이 있습니다.
|
||||
|
||||
1. 벡터 엔진. 모든 연산은 별도의 값 대신 벡터에 대해 작성됩니다. 즉, 작업을 자주 호출할 필요가 없으며 파견 비용도 무시할 수 있습니다. 작업 코드에는 최적화된 내부 주기가 포함되어 있습니다.
|
||||
2. 코드 생성. 쿼리에 대해 생성된 코드에는 모든 간접 호출이 있습니다.
|
||||
|
||||
이것은 단순한 쿼리를 실행할 때 의미가 없기 때문에 "일반" 데이터베이스에서는 수행되지 않습니다. 그러나 예외가 있습니다. 예를 들어 MemSQL은 코드 생성을 사용하여 SQL 쿼리를 처리할 때 대기 시간을 줄입니다. (비교되게, 분석 DBMS는 대기 시간이 아닌 처리량 최적화가 필요합니다.)
|
||||
|
||||
CPU 효율성을 위해 쿼리 언어는 선언적(SQL 또는 MDX)이거나 최소한 벡터(J, K)여야 합니다. 쿼리는 최적화를 허용하는 암시적 루프만 포함해야 합니다.
|
||||
|
||||
{## [원문](https://clickhouse.com/docs/en/) ##}
|
@ -99,13 +99,16 @@ patterns
|
||||
|
||||
``` text
|
||||
pattern
|
||||
rule_type
|
||||
regexp
|
||||
function
|
||||
pattern
|
||||
rule_type
|
||||
regexp
|
||||
age + precision
|
||||
...
|
||||
pattern
|
||||
rule_type
|
||||
regexp
|
||||
function
|
||||
age + precision
|
||||
@ -129,12 +132,20 @@ default
|
||||
|
||||
Поля для разделов `pattern` и `default`:
|
||||
|
||||
- `regexp` – шаблон имени метрики.
|
||||
- `rule_type` - тип правила (применяется только к метрикам указанных типов), используется для разделения правил проверки плоских/теггированных метрик. Опциональное поле. Значение по умолчанию: `all`.
|
||||
Если используются метрики только одного типа или производительность проверки правил некритична, можно не использовать. По умолчанию создается только один тип правил для проверки. Иначе, если хотя бы для одного правила указано отличное от умолчания значение, создаются 2 независимых типа правил - для обычных (классические root.branch.leaf) и теггированных метрик (root.branch.leaf;tag1=value1).
|
||||
Правила по умолчанию попадают в оба правила обоих типов.
|
||||
Возможные значения:
|
||||
- `all` (default) - универсальное правило, назначается также по умолчанию, если поле не задано
|
||||
- `plain` - правило для плоских метрик (без тегов). Поле `regexp` обрабатывается как регулярное выражение.
|
||||
- `tagged` - правило для теггированных метрик (метрика хранится в БД в формате `someName?tag1=value1&tag2=value2&tag3=value3`), регулярное выражение должно быть отсортированно по именам тегов, первым - значение тега `__name__`, если есть. Поле `regexp` обрабатывается как регулярное выражение.
|
||||
- `tag_list` - правило для теггированных метрик, простой DSL для упрощения задания регулярного выражения в формате тегов graphite `someName;tag1=value1;tag2=value2`, `someName` или `tag1=value1;tag2=value2`. Поле `regexp` транслируется в правило `tagged`. Cортировать по именам тегов не обязательно, оно отсортируется автоматически. Значение тега (но не имя) может быть регулярным выражением (например `env=(dev|staging)`).
|
||||
- `regexp` – шаблон имени метрики (регулярное выражение или DSL).
|
||||
- `age` – минимальный возраст данных в секундах.
|
||||
- `precision` – точность определения возраста данных в секундах. Должен быть делителем для 86400 (количество секунд в сутках).
|
||||
- `function` – имя агрегирующей функции, которую следует применить к данным, чей возраст оказался в интервале `[age, age + precision]`. Допустимые функции: min/max/any/avg. Avg вычисляется неточно, как среднее от средних.
|
||||
|
||||
### Пример конфигурации {#configuration-example}
|
||||
### Пример конфигурации без разделения типа правил {#configuration-example}
|
||||
|
||||
``` xml
|
||||
<graphite_rollup>
|
||||
@ -169,6 +180,80 @@ default
|
||||
</graphite_rollup>
|
||||
```
|
||||
|
||||
### Пример конфигурации c разделением типа правил {#configuration-typed-example}
|
||||
|
||||
``` xml
|
||||
<graphite_rollup>
|
||||
<version_column_name>Version</version_column_name>
|
||||
<pattern>
|
||||
<rule_type>plain</rule_type>
|
||||
<regexp>click_cost</regexp>
|
||||
<function>any</function>
|
||||
<retention>
|
||||
<age>0</age>
|
||||
<precision>5</precision>
|
||||
</retention>
|
||||
<retention>
|
||||
<age>86400</age>
|
||||
<precision>60</precision>
|
||||
</retention>
|
||||
</pattern>
|
||||
<pattern>
|
||||
<rule_type>tagged</rule_type>
|
||||
<regexp>^((.*)|.)min\?</regexp>
|
||||
<function>min</function>
|
||||
<retention>
|
||||
<age>0</age>
|
||||
<precision>5</precision>
|
||||
</retention>
|
||||
<retention>
|
||||
<age>86400</age>
|
||||
<precision>60</precision>
|
||||
</retention>
|
||||
</pattern>
|
||||
<pattern>
|
||||
<rule_type>tagged</rule_type>
|
||||
<regexp><![CDATA[^someName\?(.*&)*tag1=value1(&|$)]]></regexp>
|
||||
<function>min</function>
|
||||
<retention>
|
||||
<age>0</age>
|
||||
<precision>5</precision>
|
||||
</retention>
|
||||
<retention>
|
||||
<age>86400</age>
|
||||
<precision>60</precision>
|
||||
</retention>
|
||||
</pattern>
|
||||
<pattern>
|
||||
<rule_type>tag_list</rule_type>
|
||||
<regexp>someName;tag2=value2</regexp>
|
||||
<retention>
|
||||
<age>0</age>
|
||||
<precision>5</precision>
|
||||
</retention>
|
||||
<retention>
|
||||
<age>86400</age>
|
||||
<precision>60</precision>
|
||||
</retention>
|
||||
</pattern>
|
||||
<default>
|
||||
<function>max</function>
|
||||
<retention>
|
||||
<age>0</age>
|
||||
<precision>60</precision>
|
||||
</retention>
|
||||
<retention>
|
||||
<age>3600</age>
|
||||
<precision>300</precision>
|
||||
</retention>
|
||||
<retention>
|
||||
<age>86400</age>
|
||||
<precision>3600</precision>
|
||||
</retention>
|
||||
</default>
|
||||
</graphite_rollup>
|
||||
```
|
||||
|
||||
|
||||
!!! warning "Внимание"
|
||||
Прореживание данных производится во время слияний. Обычно для старых партиций слияния не запускаются, поэтому для прореживания надо инициировать незапланированное слияние используя [optimize](../../../sql-reference/statements/optimize.md). Или использовать дополнительные инструменты, например [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer).
|
||||
|
@ -48,10 +48,8 @@ CREATE TABLE merge.hits_buffer AS merge.hits ENGINE = Buffer(merge, hits, 16, 10
|
||||
Если у одного из столбцов таблицы Buffer и подчинённой таблицы не совпадает тип, то в лог сервера будет записано сообщение об ошибке и буфер будет очищен.
|
||||
То же самое происходит, если подчинённая таблица не существует в момент сброса буфера.
|
||||
|
||||
Если есть необходимость выполнить ALTER для подчинённой таблицы и для таблицы Buffer, то рекомендуется удалить таблицу Buffer, затем выполнить ALTER подчинённой таблицы, а после создать таблицу Buffer заново.
|
||||
|
||||
!!! attention "Внимание"
|
||||
В релизах до 28 сентября 2020 года выполнение ALTER на таблице Buffer ломает структуру блоков и вызывает ошибку (см. [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117)), поэтому удаление буфера и его пересоздание — единственный вариант миграции для данного движка. Перед выполнением ALTER на таблице Buffer убедитесь, что в вашей версии эта ошибка устранена.
|
||||
В релизах до 26 октября 2021 года выполнение ALTER на таблице Buffer ломает структуру блоков и вызывает ошибку (см. [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117) и [#30565](https://github.com/ClickHouse/ClickHouse/pull/30565)), поэтому удаление буфера и его пересоздание — единственный вариант миграции для данного движка. Перед выполнением ALTER на таблице Buffer убедитесь, что в вашей версии эта ошибка устранена.
|
||||
|
||||
При нештатном перезапуске сервера, данные, находящиеся в буфере, будут потеряны.
|
||||
|
||||
|
@ -105,7 +105,7 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
|
||||
```xml
|
||||
<encryption_codecs>
|
||||
<aes_128_gcm_siv>
|
||||
<key_hex from_env="KEY"></key_hex>
|
||||
<key_hex from_env="ENVVAR"></key_hex>
|
||||
</aes_128_gcm_siv>
|
||||
</encryption_codecs>
|
||||
```
|
||||
@ -118,7 +118,7 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
|
||||
<encryption_codecs>
|
||||
<aes_128_gcm_siv>
|
||||
<key_hex id="0">00112233445566778899aabbccddeeff</key_hex>
|
||||
<key_hex id="1" from_env=".."></key_hex>
|
||||
<key_hex id="1" from_env="ENVVAR"></key_hex>
|
||||
<current_key_id>1</current_key_id>
|
||||
</aes_128_gcm_siv>
|
||||
</encryption_codecs>
|
||||
|
@ -246,6 +246,46 @@ CREATE TABLE codec_example
|
||||
ENGINE = MergeTree()
|
||||
```
|
||||
|
||||
### Кодеки шифрования {#create-query-encryption-codecs}
|
||||
|
||||
Эти кодеки не сжимают данные, вместо этого они зашифровывают данные на диске. Воспользоваться кодеками можно, только когда ключ шифрования задан параметрами [шифрования](../../../operations/server-configuration-parameters/settings.md#server-settings-encryption). Обратите внимание: ставить кодеки шифрования имеет смысл в самый конец цепочки кодеков, потому что зашифрованные данные, как правило, нельзя сжать релевантным образом.
|
||||
|
||||
Кодеки шифрования:
|
||||
|
||||
- `CODEC('AES-128-GCM-SIV')` — Зашифровывает данные с помощью AES-128 в режиме [RFC 8452](https://tools.ietf.org/html/rfc8452) GCM-SIV.
|
||||
- `CODEC('AES-256-GCM-SIV')` — Зашифровывает данные с помощью AES-256 в режиме GCM-SIV.
|
||||
|
||||
Эти кодеки используют фиксированный одноразовый ключ шифрования. Таким образом, это детерминированное шифрование. Оно совместимо с поддерживающими дедупликацию движками, в частности, [ReplicatedMergeTree](../../../engines/table-engines/mergetree-family/replication.md). Однако у шифрования имеется недостаток: если дважды зашифровать один и тот же блок данных, текст на выходе получится одинаковым, и злоумышленник, у которого есть доступ к диску, заметит эту эквивалентность (при этом доступа к содержимому он не получит).
|
||||
|
||||
!!! attention "Внимание"
|
||||
Большинство движков, включая семейство `MergeTree`, создают на диске индексные файлы, не применяя кодеки. А значит, в том случае, если зашифрованный столбец индексирован, на диске отобразится незашифрованный текст.
|
||||
|
||||
!!! attention "Внимание"
|
||||
Если вы выполняете запрос SELECT с упоминанием конкретного значения в зашифрованном столбце (например, при использовании секции WHERE), это значение может появиться в [system.query_log](../../../operations/system-tables/query_log.md). Рекомендуем отключить логирование.
|
||||
|
||||
**Пример**
|
||||
|
||||
```sql
|
||||
CREATE TABLE mytable
|
||||
(
|
||||
x String Codec(AES_128_GCM_SIV)
|
||||
)
|
||||
ENGINE = MergeTree ORDER BY x;
|
||||
```
|
||||
|
||||
!!!note "Замечание"
|
||||
Если необходимо применить сжатие, это нужно явно прописать в запросе. Без этого будет выполнено только шифрование данных.
|
||||
|
||||
**Пример**
|
||||
|
||||
```sql
|
||||
CREATE TABLE mytable
|
||||
(
|
||||
x String Codec(Delta, LZ4, AES_128_GCM_SIV)
|
||||
)
|
||||
ENGINE = MergeTree ORDER BY x;
|
||||
```
|
||||
|
||||
## Временные таблицы {#temporary-tables}
|
||||
|
||||
ClickHouse поддерживает временные таблицы со следующими характеристиками:
|
||||
|
@ -43,7 +43,7 @@ CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1]
|
||||
- `HOST ANY` — Пользователь может подключиться с любого хоста. Используется по умолчанию.
|
||||
- `HOST LOCAL` — Пользователь может подключиться только локально.
|
||||
- `HOST NAME 'fqdn'` — Хост задается через FQDN. Например, `HOST NAME 'mysite.com'`.
|
||||
- `HOST NAME REGEXP 'regexp'` — Позволяет использовать регулярные выражения [pcre](http://www.pcre.org/), чтобы задать хосты. Например, `HOST NAME REGEXP '.*\.mysite\.com'`.
|
||||
- `HOST REGEXP 'regexp'` — Позволяет использовать регулярные выражения [pcre](http://www.pcre.org/), чтобы задать хосты. Например, `HOST REGEXP '.*\.mysite\.com'`.
|
||||
- `HOST LIKE 'template'` — Позволяет использовать оператор [LIKE](../../functions/string-search-functions.md#function-like) для фильтрации хостов. Например, `HOST LIKE '%'` эквивалентен `HOST ANY`; `HOST LIKE '%.mysite.com'` разрешает подключение со всех хостов в домене `mysite.com`.
|
||||
|
||||
Также, чтобы задать хост, вы можете использовать `@` вместе с именем пользователя. Примеры:
|
||||
|
@ -62,7 +62,7 @@ def build_for_lang(lang, args):
|
||||
strict=True,
|
||||
theme=theme_cfg,
|
||||
nav=blog_nav,
|
||||
copyright='©2016–2021 ClickHouse, Inc.',
|
||||
copyright='©2016–2022 ClickHouse, Inc.',
|
||||
use_directory_urls=True,
|
||||
repo_name='ClickHouse/ClickHouse',
|
||||
repo_url='https://github.com/ClickHouse/ClickHouse/',
|
||||
@ -97,10 +97,6 @@ def build_for_lang(lang, args):
|
||||
with open(os.path.join(args.blog_output_dir, lang, 'rss.xml'), 'w') as f:
|
||||
f.write(rss_template.render({'config': raw_config}))
|
||||
|
||||
# TODO: AMP for blog
|
||||
# if not args.skip_amp:
|
||||
# amp.build_amp(lang, args, cfg)
|
||||
|
||||
logging.info(f'Finished building {lang} blog')
|
||||
|
||||
except exceptions.ConfigurationError as e:
|
||||
|
@ -1 +0,0 @@
|
||||
../../../en/faq/general/dbms-naming.md
|
17
docs/zh/faq/general/dbms-naming.md
Normal file
17
docs/zh/faq/general/dbms-naming.md
Normal file
@ -0,0 +1,17 @@
|
||||
---
|
||||
title: "\u201CClickHouse\u201D 有什么含义?"
|
||||
toc_hidden: true
|
||||
toc_priority: 10
|
||||
---
|
||||
|
||||
# “ClickHouse” 有什么含义? {#what-does-clickhouse-mean}
|
||||
|
||||
它是“**点击**流”和“数据**仓库**”的组合。它来自于Yandex最初的用例。在Metrica网站上,ClickHouse本应该保存人们在互联网上的所有点击记录,现在它仍然在做这项工作。你可以在[ClickHouse history](../../introduction/history.md)页面上阅读更多关于这个用例的信息。
|
||||
|
||||
这个由两部分组成的意思有两个结果:
|
||||
|
||||
- 唯一正确的写“Click**H** house”的方式是用大写H。
|
||||
- 如果需要缩写,请使用“**CH**”。由于一些历史原因,缩写CK在中国也很流行,主要是因为中文中最早的一个关于ClickHouse的演讲使用了这种形式。
|
||||
|
||||
!!! info “有趣的事实”
|
||||
多年后ClickHouse闻名于世, 这种命名方法:结合各有深意的两个词被赞扬为最好的数据库命名方式, 卡内基梅隆大学数据库副教授[Andy Pavlo做的研究](https://www.cs.cmu.edu/~pavlo/blog/2020/03/on-naming-a-database-management-system.html) 。ClickHouse与Postgres共同获得“史上最佳数据库名”奖。
|
@ -1 +0,0 @@
|
||||
../../../en/faq/general/index.md
|
27
docs/zh/faq/general/index.md
Normal file
27
docs/zh/faq/general/index.md
Normal file
@ -0,0 +1,27 @@
|
||||
---
|
||||
title: ClickHouse 有关常见问题
|
||||
toc_hidden_folder: true
|
||||
toc_priority: 1
|
||||
toc_title: General
|
||||
---
|
||||
|
||||
# ClickHouse 有关常见问题 {#general-questions}
|
||||
|
||||
常见问题:
|
||||
|
||||
- [什么是 ClickHouse?](../../index.md#what-is-clickhouse)
|
||||
- [为何 ClickHouse 如此迅捷?](../../faq/general/why-clickhouse-is-so-fast.md)
|
||||
- [谁在使用 ClickHouse?](../../faq/general/who-is-using-clickhouse.md)
|
||||
- [“ClickHouse” 有什么含义?](../../faq/general/dbms-naming.md)
|
||||
- [ “Не тормозит” 有什么含义?](../../faq/general/ne-tormozit.md)
|
||||
- [什么是 OLAP?](../../faq/general/olap.md)
|
||||
- [什么是列存储数据库?](../../faq/general/columnar-database.md)
|
||||
- [为何不使用 MapReduce等技术?](../../faq/general/mapreduce.md)
|
||||
- [我如何为 ClickHouse贡献代码?](../../faq/general/how-do-i-contribute-code-to-clickhouse.md)
|
||||
|
||||
|
||||
|
||||
!!! info "没找到您需要的内容?"
|
||||
请查阅 [其他 F.A.Q. 类别](../../faq/index.md) 或者从左侧导航栏浏览其他文档
|
||||
|
||||
{## [原始文档](https://clickhouse.com/docs/en/faq/general/) ##}
|
@ -1 +0,0 @@
|
||||
../../../en/faq/general/mapreduce.md
|
13
docs/zh/faq/general/mapreduce.md
Normal file
13
docs/zh/faq/general/mapreduce.md
Normal file
@ -0,0 +1,13 @@
|
||||
---
|
||||
title: 为何不使用 MapReduce等技术?
|
||||
toc_hidden: true
|
||||
toc_priority: 110
|
||||
---
|
||||
|
||||
# 为何不使用 MapReduce等技术? {#why-not-use-something-like-mapreduce}
|
||||
|
||||
我们可以将MapReduce这样的系统称为分布式计算系统,其中的reduce操作是基于分布式排序的。这个领域中最常见的开源解决方案是[Apache Hadoop](http://hadoop.apache.org)。Yandex使用其内部解决方案YT。
|
||||
|
||||
这些系统不适合用于在线查询,因为它们的延迟很大。换句话说,它们不能被用作网页界面的后端。这些类型的系统对于实时数据更新并不是很有用。如果操作的结果和所有中间结果(如果有的话)都位于单个服务器的内存中,那么分布式排序就不是执行reduce操作的最佳方式,这通常是在线查询的情况。在这种情况下,哈希表是执行reduce操作的最佳方式。优化map-reduce任务的一种常见方法是使用内存中的哈希表进行预聚合(部分reduce)。用户手动执行此优化。在运行简单的map-reduce任务时,分布式排序是导致性能下降的主要原因之一。
|
||||
|
||||
大多数MapReduce实现允许你在集群中执行任意代码。但是声明性查询语言更适合于OLAP,以便快速运行实验。例如,Hadoop有Hive和Pig。还可以考虑使用Cloudera Impala或Shark(已经过时了)来支持Spark,以及Spark SQL、Presto和Apache Drill。与专门的系统相比,运行这些任务的性能是非常不理想的,但是相对较高的延迟使得使用这些系统作为web界面的后端是不现实的。
|
@ -19,6 +19,7 @@ toc_priority: 76
|
||||
- [什么是 OLAP?](../faq/general/olap.md)
|
||||
- [什么是列存储数据库?](../faq/general/columnar-database.md)
|
||||
- [为何不使用 MapReduce等技术?](../faq/general/mapreduce.md)
|
||||
- [我如何为 ClickHouse贡献代码?](../faq/general/how-do-i-contribute-code-to-clickhouse.md)
|
||||
- **[应用案例](../faq/use-cases/index.md)**
|
||||
- [我能把 ClickHouse 作为时序数据库来使用吗?](../faq/use-cases/time-series.md)
|
||||
- [我能把 ClickHouse 作为 key-value 键值存储吗?](../faq/use-cases/key-value.md)
|
||||
|
@ -1 +0,0 @@
|
||||
../../../en/faq/use-cases/time-series.md
|
21
docs/zh/faq/use-cases/time-series.md
Normal file
21
docs/zh/faq/use-cases/time-series.md
Normal file
@ -0,0 +1,21 @@
|
||||
---
|
||||
title: 我能把 ClickHouse 当做时序数据库来使用吗?
|
||||
toc_hidden: true
|
||||
toc_priority: 101
|
||||
---
|
||||
|
||||
# 我能把 ClickHouse 当做时序数据库来使用吗? {#can-i-use-clickhouse-as-a-time-series-database}
|
||||
|
||||
ClickHouse是一个通用的数据存储解决方案[OLAP](../../faq/general/olap.md)的工作负载,而有许多专门的时间序列数据库管理系统。然而,ClickHouse的[专注于查询执行速度](../../faq/general/why-clickhouse-is-so-fast.md)使得它在许多情况下的性能优于专门的系统。关于这个话题有很多独立的基准,所以我们不打算在这里进行论述。相反,让我们将重点放在ClickHouse的重要功能(如果这是你的用例)上。
|
||||
|
||||
|
||||
|
||||
首先,有 **[specialized codecs](../../sql-reference/statements/create/table.md#create-query-specialized-codecs)**,这是典型的时间序列。无论是常见的算法,如“DoubleDelta”和“Gorilla”,或特定的ClickHouse 数据类型如“T64”。
|
||||
|
||||
|
||||
|
||||
其次,时间序列查询通常只访问最近的数据,比如一天或一周以前的数据。使用具有快速nVME/SSD驱动器和高容量HDD驱动器的服务器是有意义的。ClickHouse [TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes)特性允许配置在快速硬盘上保持新鲜的热数据,并随着数据的老化逐渐移动到较慢的硬盘上。如果您的需求需要,也可以汇总或删除更旧的数据。
|
||||
|
||||
|
||||
|
||||
尽管这与ClickHouse存储和处理原始数据的理念相违背,但你可以使用[materialized views](../../sql-reference/statements/create/view.md)来适应更紧迫的延迟或成本需求。
|
@ -18,6 +18,17 @@ $ curl 'http://localhost:8123/'
|
||||
Ok.
|
||||
```
|
||||
|
||||
Web UI 可以通过这个地址访问: `http://localhost:8123/play`.
|
||||
在运行状况检查脚本中,使用`GET /ping`请求。这个处理方法总是返回 "Ok"。(以换行结尾)。可从18.12.13版获得。请参见' /replicas_status '检查复制集的延迟。
|
||||
|
||||
|
||||
``` bash
|
||||
$ curl 'http://localhost:8123/ping'
|
||||
Ok.
|
||||
$ curl 'http://localhost:8123/replicas_status'
|
||||
Ok.
|
||||
```
|
||||
|
||||
通过URL中的 `query` 参数来发送请求,或者发送POST请求,或者将查询的开头部分放在URL的`query`参数中,其他部分放在POST中(我们会在后面解释为什么这样做是有必要的)。URL的大小会限制在16KB,所以发送大型查询时要时刻记住这点。
|
||||
|
||||
如果请求成功,将会收到200的响应状态码和响应主体中的结果。
|
||||
|
@ -1,59 +1,59 @@
|
||||
---
|
||||
toc_priority: 44
|
||||
toc_title: "要求"
|
||||
toc_title: "必备条件"
|
||||
---
|
||||
|
||||
# 要求 {#requirements}
|
||||
# 必备条件 {#requirements}
|
||||
|
||||
## CPU {#cpu}
|
||||
|
||||
对于从预构建的deb包进行安装,请使用具有x86_64架构并支持SSE4.2指令的CPU。 要使用不支持SSE4.2或具有AArch64或PowerPC64LE体系结构的处理器运行ClickHouse,您应该从源代码构建ClickHouse。
|
||||
如果您使用预编译的DEB/RPM包安装ClickHouse,请使用支持SSE4.2指令集的x86_64架构的CPU。如果需要在不支持SSE4.2指令集的CPU上,或者在AArch64(ARM)和PowerPC64LE(IBM Power)架构上运行ClickHouse,您应该从源码编译ClickHouse。
|
||||
|
||||
ClickHouse实现并行数据处理并使用所有可用的硬件资源。 在选择处理器时,考虑到ClickHouse在具有大量内核但时钟速率较低的配置中的工作效率要高于具有较少内核和较高时钟速率的配置。 例如,具有2600MHz的16核心优于具有3600MHz的8核心。
|
||||
ClickHouse实现了并行数据处理,处理时会使用所有的可用资源。在选择处理器时,请注意:ClickHouse在具有大量计算核、时钟频率稍低的平台上比计算核少、时钟频率高的平台上效率更高。例如,ClickHouse在16核 2.6GHz的CPU上运行速度高于8核 3.6GHz的CPU。
|
||||
|
||||
建议使用 **睿频加速** 和 **超线程** 技术。 它显着提高了典型工作负载的性能。
|
||||
建议使用 **睿频加速** 和 **超线程** 技术。 它显着提高了正常工作负载的性能。
|
||||
|
||||
## RAM {#ram}
|
||||
|
||||
我们建议使用至少4GB的RAM来执行重要的查询。 ClickHouse服务器可以使用少得多的RAM运行,但它需要处理查询的内存。
|
||||
我们建议使用至少4GB的内存来执行重要的查询。 ClickHouse服务器可以使用很少的内存运行,但它需要一定量的内存用于处理查询。
|
||||
|
||||
RAM所需的体积取决于:
|
||||
ClickHouse所需内存取决于:
|
||||
|
||||
- 查询的复杂性。
|
||||
- 查询中处理的数据量。
|
||||
- 查询的复杂程度。
|
||||
- 查询处理的数据量。
|
||||
|
||||
要计算所需的RAM体积,您应该估计临时数据的大小 [GROUP BY](../sql-reference/statements/select/group-by.md#select-group-by-clause), [DISTINCT](../sql-reference/statements/select/distinct.md#select-distinct), [JOIN](../sql-reference/statements/select/join.md#select-join) 和您使用的其他操作。
|
||||
要计算所需的内存大小,您应该考虑用于[GROUP BY](../sql-reference/statements/select/group-by.md#select-group-by-clause)、[DISTINCT](../sql-reference/statements/select/distinct.md#select-distinct)、[JOIN](../sql-reference/statements/select/join.md#select-join) 和其他操作所需的临时数据量。
|
||||
|
||||
ClickHouse可以使用外部存储器来存储临时数据。看 [在外部存储器中分组](../sql-reference/statements/select/group-by.md#select-group-by-in-external-memory) 有关详细信息。
|
||||
ClickHouse可以使用外部存储器来存储临时数据。详情请见[在外部存储器中分组](../sql-reference/statements/select/group-by.md#select-group-by-in-external-memory)。
|
||||
|
||||
## 交换文件 {#swap-file}
|
||||
|
||||
禁用生产环境的交换文件。
|
||||
请在生产环境禁用交换文件。
|
||||
|
||||
## 存储子系统 {#storage-subsystem}
|
||||
|
||||
您需要有2GB的可用磁盘空间来安装ClickHouse。
|
||||
|
||||
数据所需的存储量应单独计算。 评估应包括:
|
||||
数据所需的存储空间应单独计算。预估存储容量时请考虑:
|
||||
|
||||
- 估计数据量。
|
||||
- 数据量
|
||||
|
||||
您可以采取数据的样本并从中获取行的平均大小。 然后将该值乘以计划存储的行数。
|
||||
您可以对数据进行采样并计算每行的平均占用空间。然后将该值乘以计划存储的行数。
|
||||
|
||||
- 数据压缩系数。
|
||||
- 数据压缩比
|
||||
|
||||
要估计数据压缩系数,请将数据的样本加载到ClickHouse中,并将数据的实际大小与存储的表的大小进行比较。 例如,点击流数据通常被压缩6-10倍。
|
||||
要计算数据压缩比,请将样本数据写入ClickHouse,并将原始数据大小与ClickHouse实际存储的数据进行比较。例如,用户点击行为的原始数据压缩比通常为6-10。
|
||||
|
||||
要计算要存储的最终数据量,请将压缩系数应用于估计的数据量。 如果计划将数据存储在多个副本中,则将估计的量乘以副本数。
|
||||
请将原始数据的大小除以压缩比来获得实际所需存储的大小。如果您打算将数据存放于几个副本中,请将存储容量乘上副本数。
|
||||
|
||||
## 网络 {#network}
|
||||
|
||||
如果可能的话,使用10G或更高级别的网络。
|
||||
如果可能的话,请使用10G或更高级别的网络。
|
||||
|
||||
网络带宽对于处理具有大量中间结果数据的分布式查询至关重要。 此外,网络速度会影响复制过程。
|
||||
网络带宽对于处理具有大量中间结果数据的分布式查询至关重要。此外,网络速度会影响复制过程。
|
||||
|
||||
## 软件 {#software}
|
||||
|
||||
ClickHouse主要是为Linux系列操作系统开发的。 推荐的Linux发行版是Ubuntu。 `tzdata` 软件包应安装在系统中。
|
||||
ClickHouse主要是为Linux系列操作系统开发的。推荐的Linux发行版是Ubuntu。您需要检查`tzdata`(对于Ubuntu)软件包是否在安装ClickHouse之前已经安装。
|
||||
|
||||
ClickHouse也可以在其他操作系统系列中工作。 查看详细信息 [开始](../getting-started/index.md) 文档的部分。
|
||||
ClickHouse也可以在其他操作系统系列中工作。详情请查看[开始](../getting-started/index.md)。
|
||||
|
@ -1,23 +1,74 @@
|
||||
---
|
||||
toc_hidden_folder: true
|
||||
toc_priority: 42
|
||||
toc_title: INDEX
|
||||
toc_priority: 35
|
||||
toc_title: ALTER
|
||||
---
|
||||
|
||||
# 操作数据跳过索引 {#manipulations-with-data-skipping-indices}
|
||||
## ALTER {#query_language_queries_alter}
|
||||
|
||||
可以使用以下操作:
|
||||
大多数 `ALTER TABLE` 查询修改表设置或数据:
|
||||
|
||||
- `ALTER TABLE [db].name ADD INDEX name expression TYPE type GRANULARITY value [FIRST|AFTER name]` - 向表元数据添加索引描述。
|
||||
- [COLUMN](../../../sql-reference/statements/alter/column.md)
|
||||
- [PARTITION](../../../sql-reference/statements/alter/partition.md)
|
||||
- [DELETE](../../../sql-reference/statements/alter/delete.md)
|
||||
- [UPDATE](../../../sql-reference/statements/alter/update.md)
|
||||
- [ORDER BY](../../../sql-reference/statements/alter/order-by.md)
|
||||
- [INDEX](../../../sql-reference/statements/alter/index/index.md)
|
||||
- [CONSTRAINT](../../../sql-reference/statements/alter/constraint.md)
|
||||
- [TTL](../../../sql-reference/statements/alter/ttl.md)
|
||||
|
||||
- `ALTER TABLE [db].name DROP INDEX name` - 从表元数据中删除索引描述并从磁盘中删除索引文件。
|
||||
!!! note "备注"
|
||||
大多数 `ALTER TABLE` 查询只支持[\*MergeTree](../../../engines/table-engines/mergetree-family/index.md)表,以及[Merge](../../../engines/table-engines/special/merge.md)和[Distributed](../../../engines/table-engines/special/distributed.md)。
|
||||
|
||||
- `ALTER TABLE [db.]table MATERIALIZE INDEX name IN PARTITION partition_name` - 查询在分区`partition_name`中重建二级索引`name`。 操作为[mutation](../../../sql-reference/statements/alter/index.md#mutations).
|
||||
这些 `ALTER` 语句操作视图:
|
||||
|
||||
前两个命令是轻量级的,它们只更改元数据或删除文件。
|
||||
- [ALTER TABLE ... MODIFY QUERY](../../../sql-reference/statements/alter/view.md) — 修改一个 [Materialized view](../create/view.md#materialized) 结构.
|
||||
- [ALTER LIVE VIEW](../../../sql-reference/statements/alter/view.md#alter-live-view) — 刷新一个 [Live view](../create/view.md#live-view).
|
||||
|
||||
Also, they are replicated, syncing indices metadata via ZooKeeper.
|
||||
此外,它们会被复制,会通过ZooKeeper同步索引元数据。
|
||||
这些 `ALTER` 语句修改与基于角色的访问控制相关的实体:
|
||||
|
||||
!!! note "注意"
|
||||
索引操作仅支持具有以下特征的表 [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md)引擎 (包括[replicated](../../../engines/table-engines/mergetree-family/replication.md)).
|
||||
- [USER](../../../sql-reference/statements/alter/user.md)
|
||||
- [ROLE](../../../sql-reference/statements/alter/role.md)
|
||||
- [QUOTA](../../../sql-reference/statements/alter/quota.md)
|
||||
- [ROW POLICY](../../../sql-reference/statements/alter/row-policy.md)
|
||||
- [SETTINGS PROFILE](../../../sql-reference/statements/alter/settings-profile.md)
|
||||
|
||||
[ALTER TABLE ... MODIFY COMMENT](../../../sql-reference/statements/alter/comment.md) 语句添加、修改或删除表中的注释,无论之前是否设置过。
|
||||
|
||||
## Mutations 突变 {#mutations}
|
||||
|
||||
用来操作表数据的ALTER查询是通过一种叫做“突变”的机制来实现的,最明显的是[ALTER TABLE … DELETE](../../../sql-reference/statements/alter/delete.md)和[ALTER TABLE … UPDATE](../../../sql-reference/statements/alter/update.md)。它们是异步的后台进程,类似于[MergeTree](../../../engines/table-engines/mergetree-family/index.md)表的合并,产生新的“突变”版本的部件。
|
||||
|
||||
|
||||
|
||||
对于 `*MergeTree` 表,通过重写整个数据部分来执行突变。没有原子性——一旦突变的部件准备好,部件就会被替换,并且在突变期间开始执行的 `SELECT` 查询将看到来自已经突变的部件的数据,以及来自尚未突变的部件的数据。
|
||||
|
||||
|
||||
|
||||
突变完全按照它们的产生顺序排列,并按此顺序应用于每个部分。突变还与“INSERT INTO”查询进行部分排序:在提交突变之前插入表中的数据将被突变,而在此之后插入的数据将不会被突变。注意,突变不会以任何方式阻止插入。
|
||||
|
||||
|
||||
|
||||
突变查询在添加突变条目后立即返回(对于复制表到ZooKeeper,对于非复制表到文件系统)。突变本身使用系统配置文件设置异步执行。要跟踪突变的进程,可以使用[`system.mutations`](../../../operations/system-tables/mutations.md#system_tables-mutations) 表。成功提交的变异将继续执行,即使ClickHouse服务器重新启动。没有办法回滚突变一旦提交,但如果突变卡住了,它可以取消与[`KILL MUTATION`](../../../sql-reference/statements/misc.md#kill-mutation) 查询。
|
||||
|
||||
|
||||
|
||||
完成突变的条目不会立即删除(保留条目的数量由 `finished_mutations_to_keep` 存储引擎参数决定)。删除旧的突变条目。
|
||||
|
||||
## ALTER 查询的同步性 {#synchronicity-of-alter-queries}
|
||||
|
||||
|
||||
对于非复制表,所有的 `ALTER` 查询都是同步执行的。对于复制表,查询只是向“ZooKeeper”添加相应动作的指令,动作本身会尽快执行。但是,查询可以等待所有副本上的这些操作完成。
|
||||
|
||||
对于所有的“ALTER”查询,您可以使用[replication_alter_partitions_sync](../../../operations/settings/settings.md#replication-alter-partitions-sync)设置等待。
|
||||
|
||||
通过[replication_wait_for_inactive_replica_timeout](../../../operations/settings/settings.md#replication-wait-for-inactive-replica-timeout]设置,可以指定不活动的副本执行所有 `ALTER` 查询的等待时间(以秒为单位)。
|
||||
|
||||
|
||||
|
||||
!!! info "备注"
|
||||
|
||||
对于所有的 `ALTER` 查询,如果 `replication_alter_partitions_sync = 2` 和一些副本的不激活时间超过时间(在 `replication_wait_for_inactive_replica_timeout` 设置中指定),那么将抛出一个异常 `UNFINISHED`。
|
||||
|
||||
|
||||
|
||||
对于 `ALTER TABLE ... UPDATE|DELETE` 查询由 [mutations_sync](../../../operations/settings/settings.md#mutations_sync) 设置定义的同步度。
|
||||
|
@ -1 +0,0 @@
|
||||
../../../../../en/sql-reference/statements/alter/index/index.md
|
23
docs/zh/sql-reference/statements/alter/index/index.md
Normal file
23
docs/zh/sql-reference/statements/alter/index/index.md
Normal file
@ -0,0 +1,23 @@
|
||||
---
|
||||
toc_hidden_folder: true
|
||||
toc_priority: 42
|
||||
toc_title: INDEX
|
||||
---
|
||||
|
||||
# 操作数据跳过索引 {#manipulations-with-data-skipping-indices}
|
||||
|
||||
可以使用以下操作:
|
||||
|
||||
- `ALTER TABLE [db].name ADD INDEX name expression TYPE type GRANULARITY value [FIRST|AFTER name]` - 向表元数据添加索引描述。
|
||||
|
||||
- `ALTER TABLE [db].name DROP INDEX name` - 从表元数据中删除索引描述并从磁盘中删除索引文件。
|
||||
|
||||
- `ALTER TABLE [db.]table MATERIALIZE INDEX name IN PARTITION partition_name` - 查询在分区`partition_name`中重建二级索引`name`。 操作为[mutation](../../../../sql-reference/statements/alter/index.md#mutations).
|
||||
|
||||
前两个命令是轻量级的,它们只更改元数据或删除文件。
|
||||
|
||||
Also, they are replicated, syncing indices metadata via ZooKeeper.
|
||||
此外,它们会被复制,会通过ZooKeeper同步索引元数据。
|
||||
|
||||
!!! note "注意"
|
||||
索引操作仅支持具有以下特征的表 [`*MergeTree`](../../../../engines/table-engines/mergetree-family/mergetree.md)引擎 (包括[replicated](../../../../engines/table-engines/mergetree-family/replication.md)).
|
@ -1 +0,0 @@
|
||||
../../../en/sql-reference/statements/exists.md
|
12
docs/zh/sql-reference/statements/exists.md
Normal file
12
docs/zh/sql-reference/statements/exists.md
Normal file
@ -0,0 +1,12 @@
|
||||
---
|
||||
toc_priority: 45
|
||||
toc_title: EXISTS
|
||||
---
|
||||
|
||||
# EXISTS 语句 {#exists-statement}
|
||||
|
||||
``` sql
|
||||
EXISTS [TEMPORARY] [TABLE|DICTIONARY] [db.]name [INTO OUTFILE filename] [FORMAT format]
|
||||
```
|
||||
|
||||
返回一个单独的 `UInt8`类型的列,如果表或数据库不存在,则包含一个值 `0`,如果表在指定的数据库中存在,则包含一个值 `1`。
|
@ -1 +0,0 @@
|
||||
../../../en/sql-reference/statements/set.md
|
23
docs/zh/sql-reference/statements/set.md
Normal file
23
docs/zh/sql-reference/statements/set.md
Normal file
@ -0,0 +1,23 @@
|
||||
---
|
||||
toc_priority: 50
|
||||
toc_title: SET
|
||||
---
|
||||
|
||||
# SET 语句 {#query-set}
|
||||
|
||||
``` sql
|
||||
SET param = value
|
||||
```
|
||||
|
||||
给当前会话的 `param` [配置项](../../operations/settings/index.md)赋值。你不能用这样的方式修改[服务器相关设置](../../operations/server-configuration-parameters/index.md)。
|
||||
|
||||
|
||||
您还可以在单个查询中设置指定设置配置文件中的所有值。
|
||||
|
||||
|
||||
|
||||
``` sql
|
||||
SET profile = 'profile-name-from-the-settings-file'
|
||||
```
|
||||
|
||||
更多详情, 详见 [配置项](../../operations/settings/settings.md).
|
@ -1 +0,0 @@
|
||||
../../../en/sql-reference/statements/truncate.md
|
31
docs/zh/sql-reference/statements/truncate.md
Normal file
31
docs/zh/sql-reference/statements/truncate.md
Normal file
@ -0,0 +1,31 @@
|
||||
---
|
||||
toc_priority: 52
|
||||
toc_title: TRUNCATE
|
||||
---
|
||||
|
||||
# TRUNCATE 语句 {#truncate-statement}
|
||||
|
||||
``` sql
|
||||
TRUNCATE TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster]
|
||||
```
|
||||
|
||||
删除表中的所有数据。当省略子句 `IF EXISTS` 时,如果表不存在,则查询返回一个错误。
|
||||
|
||||
|
||||
|
||||
`TRUNCATE` 查询不支持[View](../../engines/table-engines/special/view.md),[File](../../engines/table-engines/special/file.md), [URL](../../engines/table-engines/special/url.md), [Buffer](../../engines/table-engines/special/buffer.md) 和 [Null](../../engines/table-engines/special/null.md)表引擎。
|
||||
|
||||
|
||||
|
||||
可以使用 replication_alter_partitions_sync 设置在复制集上等待执行的操作。
|
||||
|
||||
|
||||
|
||||
通过 replication_wait_for_inactive_replica_timeout 设置,可以指定不活动副本执行 `TRUNCATE`查询需要等待多长时间(以秒为单位)。
|
||||
|
||||
|
||||
|
||||
!!! info "注意"
|
||||
如果`replication_alter_partitions_sync` 被设置为`2`,并且某些复制集超过 `replication_wait_for_inactive_replica_timeout`设置的时间不激活,那么将抛出一个异常`UNFINISHED`。
|
||||
|
||||
|
@ -1 +0,0 @@
|
||||
../../../en/sql-reference/statements/use.md
|
16
docs/zh/sql-reference/statements/use.md
Normal file
16
docs/zh/sql-reference/statements/use.md
Normal file
@ -0,0 +1,16 @@
|
||||
---
|
||||
toc_priority: 53
|
||||
toc_title: USE
|
||||
---
|
||||
|
||||
# USE 语句 {#use}
|
||||
|
||||
``` sql
|
||||
USE db
|
||||
```
|
||||
|
||||
用于设置会话的当前数据库。
|
||||
|
||||
如果查询语句中没有在表名前面以加点的方式指明数据库名, 则用当前数据库进行搜索。
|
||||
|
||||
使用 HTTP 协议时无法进行此查询,因为没有会话的概念。
|
@ -364,7 +364,9 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
|
||||
"clickhouse-git-import",
|
||||
"clickhouse-compressor",
|
||||
"clickhouse-format",
|
||||
"clickhouse-extract-from-config"
|
||||
"clickhouse-extract-from-config",
|
||||
"clickhouse-keeper",
|
||||
"clickhouse-keeper-converter",
|
||||
};
|
||||
|
||||
for (const auto & tool : tools)
|
||||
|
@ -330,8 +330,6 @@ int Keeper::main(const std::vector<std::string> & /*args*/)
|
||||
|
||||
DB::ServerUUID::load(path + "/uuid", log);
|
||||
|
||||
const Settings & settings = global_context->getSettingsRef();
|
||||
|
||||
std::string include_from_path = config().getString("include_from", "/etc/metrika.xml");
|
||||
|
||||
GlobalThreadPool::initialize(
|
||||
@ -377,8 +375,8 @@ int Keeper::main(const std::vector<std::string> & /*args*/)
|
||||
{
|
||||
Poco::Net::ServerSocket socket;
|
||||
auto address = socketBindListen(socket, listen_host, port);
|
||||
socket.setReceiveTimeout(settings.receive_timeout);
|
||||
socket.setSendTimeout(settings.send_timeout);
|
||||
socket.setReceiveTimeout(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC));
|
||||
socket.setSendTimeout(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC));
|
||||
servers->emplace_back(
|
||||
listen_host,
|
||||
port_name,
|
||||
@ -393,8 +391,8 @@ int Keeper::main(const std::vector<std::string> & /*args*/)
|
||||
#if USE_SSL
|
||||
Poco::Net::SecureServerSocket socket;
|
||||
auto address = socketBindListen(socket, listen_host, port, /* secure = */ true);
|
||||
socket.setReceiveTimeout(settings.receive_timeout);
|
||||
socket.setSendTimeout(settings.send_timeout);
|
||||
socket.setReceiveTimeout(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC));
|
||||
socket.setSendTimeout(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC));
|
||||
servers->emplace_back(
|
||||
listen_host,
|
||||
secure_port_name,
|
||||
|
@ -327,6 +327,7 @@ std::string LocalServer::getInitialCreateTableQuery()
|
||||
{
|
||||
/// Use Unix tools stdin naming convention
|
||||
table_file = "stdin";
|
||||
format_from_file_name = FormatFactory::instance().getFormatFromFileDescriptor(STDIN_FILENO);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -194,6 +194,7 @@ namespace
|
||||
{
|
||||
|
||||
void setupTmpPath(Poco::Logger * log, const std::string & path)
|
||||
try
|
||||
{
|
||||
LOG_DEBUG(log, "Setting up {} to store temporary data in it", path);
|
||||
|
||||
@ -212,6 +213,15 @@ void setupTmpPath(Poco::Logger * log, const std::string & path)
|
||||
LOG_DEBUG(log, "Skipped file in temporary path {}", it->path().string());
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
DB::tryLogCurrentException(
|
||||
log,
|
||||
fmt::format(
|
||||
"Caught exception while setup temporary path: {}. It is ok to skip this exception as cleaning old temporary files is not "
|
||||
"necessary",
|
||||
path));
|
||||
}
|
||||
|
||||
int waitServersToFinish(std::vector<DB::ProtocolServerAdapter> & servers, size_t seconds_to_wait)
|
||||
{
|
||||
@ -967,6 +977,83 @@ if (ThreadFuzzer::instance().isEffective())
|
||||
},
|
||||
/* already_loaded = */ false); /// Reload it right now (initial loading)
|
||||
|
||||
const auto listen_hosts = getListenHosts(config());
|
||||
const auto listen_try = getListenTry(config());
|
||||
|
||||
if (config().has("keeper_server"))
|
||||
{
|
||||
#if USE_NURAFT
|
||||
//// If we don't have configured connection probably someone trying to use clickhouse-server instead
|
||||
//// of clickhouse-keeper, so start synchronously.
|
||||
bool can_initialize_keeper_async = false;
|
||||
|
||||
if (has_zookeeper) /// We have configured connection to some zookeeper cluster
|
||||
{
|
||||
/// If we cannot connect to some other node from our cluster then we have to wait our Keeper start
|
||||
/// synchronously.
|
||||
can_initialize_keeper_async = global_context->tryCheckClientConnectionToMyKeeperCluster();
|
||||
}
|
||||
/// Initialize keeper RAFT.
|
||||
global_context->initializeKeeperDispatcher(can_initialize_keeper_async);
|
||||
FourLetterCommandFactory::registerCommands(*global_context->getKeeperDispatcher());
|
||||
|
||||
for (const auto & listen_host : listen_hosts)
|
||||
{
|
||||
/// TCP Keeper
|
||||
const char * port_name = "keeper_server.tcp_port";
|
||||
createServer(
|
||||
config(), listen_host, port_name, listen_try, /* start_server: */ false,
|
||||
servers_to_start_before_tables,
|
||||
[&](UInt16 port) -> ProtocolServerAdapter
|
||||
{
|
||||
Poco::Net::ServerSocket socket;
|
||||
auto address = socketBindListen(socket, listen_host, port);
|
||||
socket.setReceiveTimeout(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC));
|
||||
socket.setSendTimeout(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC));
|
||||
return ProtocolServerAdapter(
|
||||
listen_host,
|
||||
port_name,
|
||||
"Keeper (tcp): " + address.toString(),
|
||||
std::make_unique<TCPServer>(
|
||||
new KeeperTCPHandlerFactory(*this, false), server_pool, socket));
|
||||
});
|
||||
|
||||
const char * secure_port_name = "keeper_server.tcp_port_secure";
|
||||
createServer(
|
||||
config(), listen_host, secure_port_name, listen_try, /* start_server: */ false,
|
||||
servers_to_start_before_tables,
|
||||
[&](UInt16 port) -> ProtocolServerAdapter
|
||||
{
|
||||
#if USE_SSL
|
||||
Poco::Net::SecureServerSocket socket;
|
||||
auto address = socketBindListen(socket, listen_host, port, /* secure = */ true);
|
||||
socket.setReceiveTimeout(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC));
|
||||
socket.setSendTimeout(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC));
|
||||
return ProtocolServerAdapter(
|
||||
listen_host,
|
||||
secure_port_name,
|
||||
"Keeper with secure protocol (tcp_secure): " + address.toString(),
|
||||
std::make_unique<TCPServer>(
|
||||
new KeeperTCPHandlerFactory(*this, true), server_pool, socket));
|
||||
#else
|
||||
UNUSED(port);
|
||||
throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.",
|
||||
ErrorCodes::SUPPORT_IS_DISABLED};
|
||||
#endif
|
||||
});
|
||||
}
|
||||
#else
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "ClickHouse server built without NuRaft library. Cannot use internal coordination.");
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
for (auto & server : servers_to_start_before_tables)
|
||||
{
|
||||
server.start();
|
||||
LOG_INFO(log, "Listening for {}", server.getDescription());
|
||||
}
|
||||
|
||||
auto & access_control = global_context->getAccessControl();
|
||||
if (config().has("custom_settings_prefixes"))
|
||||
access_control.setCustomSettingsPrefixes(config().getString("custom_settings_prefixes"));
|
||||
@ -1075,83 +1162,6 @@ if (ThreadFuzzer::instance().isEffective())
|
||||
/// try set up encryption. There are some errors in config, error will be printed and server wouldn't start.
|
||||
CompressionCodecEncrypted::Configuration::instance().load(config(), "encryption_codecs");
|
||||
|
||||
const auto listen_hosts = getListenHosts(config());
|
||||
const auto listen_try = getListenTry(config());
|
||||
|
||||
if (config().has("keeper_server"))
|
||||
{
|
||||
#if USE_NURAFT
|
||||
//// If we don't have configured connection probably someone trying to use clickhouse-server instead
|
||||
//// of clickhouse-keeper, so start synchronously.
|
||||
bool can_initialize_keeper_async = false;
|
||||
|
||||
if (has_zookeeper) /// We have configured connection to some zookeeper cluster
|
||||
{
|
||||
/// If we cannot connect to some other node from our cluster then we have to wait our Keeper start
|
||||
/// synchronously.
|
||||
can_initialize_keeper_async = global_context->tryCheckClientConnectionToMyKeeperCluster();
|
||||
}
|
||||
/// Initialize keeper RAFT.
|
||||
global_context->initializeKeeperDispatcher(can_initialize_keeper_async);
|
||||
FourLetterCommandFactory::registerCommands(*global_context->getKeeperDispatcher());
|
||||
|
||||
for (const auto & listen_host : listen_hosts)
|
||||
{
|
||||
/// TCP Keeper
|
||||
const char * port_name = "keeper_server.tcp_port";
|
||||
createServer(
|
||||
config(), listen_host, port_name, listen_try, /* start_server: */ false,
|
||||
servers_to_start_before_tables,
|
||||
[&](UInt16 port) -> ProtocolServerAdapter
|
||||
{
|
||||
Poco::Net::ServerSocket socket;
|
||||
auto address = socketBindListen(socket, listen_host, port);
|
||||
socket.setReceiveTimeout(settings.receive_timeout);
|
||||
socket.setSendTimeout(settings.send_timeout);
|
||||
return ProtocolServerAdapter(
|
||||
listen_host,
|
||||
port_name,
|
||||
"Keeper (tcp): " + address.toString(),
|
||||
std::make_unique<TCPServer>(
|
||||
new KeeperTCPHandlerFactory(*this, false), server_pool, socket));
|
||||
});
|
||||
|
||||
const char * secure_port_name = "keeper_server.tcp_port_secure";
|
||||
createServer(
|
||||
config(), listen_host, secure_port_name, listen_try, /* start_server: */ false,
|
||||
servers_to_start_before_tables,
|
||||
[&](UInt16 port) -> ProtocolServerAdapter
|
||||
{
|
||||
#if USE_SSL
|
||||
Poco::Net::SecureServerSocket socket;
|
||||
auto address = socketBindListen(socket, listen_host, port, /* secure = */ true);
|
||||
socket.setReceiveTimeout(settings.receive_timeout);
|
||||
socket.setSendTimeout(settings.send_timeout);
|
||||
return ProtocolServerAdapter(
|
||||
listen_host,
|
||||
secure_port_name,
|
||||
"Keeper with secure protocol (tcp_secure): " + address.toString(),
|
||||
std::make_unique<TCPServer>(
|
||||
new KeeperTCPHandlerFactory(*this, true), server_pool, socket));
|
||||
#else
|
||||
UNUSED(port);
|
||||
throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.",
|
||||
ErrorCodes::SUPPORT_IS_DISABLED};
|
||||
#endif
|
||||
});
|
||||
}
|
||||
#else
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "ClickHouse server built without NuRaft library. Cannot use internal coordination.");
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
for (auto & server : servers_to_start_before_tables)
|
||||
{
|
||||
server.start();
|
||||
LOG_INFO(log, "Listening for {}", server.getDescription());
|
||||
}
|
||||
|
||||
SCOPE_EXIT({
|
||||
/// Stop reloading of the main config. This must be done before `global_context->shutdown()` because
|
||||
/// otherwise the reloading may pass a changed config to some destroyed parts of ContextSharedPart.
|
||||
|
@ -145,14 +145,14 @@ enum class AccessType
|
||||
M(SYSTEM_RELOAD_EMBEDDED_DICTIONARIES, "RELOAD EMBEDDED DICTIONARIES", GLOBAL, SYSTEM_RELOAD) /* implicitly enabled by the grant SYSTEM_RELOAD_DICTIONARY ON *.* */\
|
||||
M(SYSTEM_RELOAD, "", GROUP, SYSTEM) \
|
||||
M(SYSTEM_RESTART_DISK, "SYSTEM RESTART DISK", GLOBAL, SYSTEM) \
|
||||
M(SYSTEM_MERGES, "SYSTEM STOP MERGES, SYSTEM START MERGES, STOP_MERGES, START MERGES", TABLE, SYSTEM) \
|
||||
M(SYSTEM_MERGES, "SYSTEM STOP MERGES, SYSTEM START MERGES, STOP MERGES, START MERGES", TABLE, SYSTEM) \
|
||||
M(SYSTEM_TTL_MERGES, "SYSTEM STOP TTL MERGES, SYSTEM START TTL MERGES, STOP TTL MERGES, START TTL MERGES", TABLE, SYSTEM) \
|
||||
M(SYSTEM_FETCHES, "SYSTEM STOP FETCHES, SYSTEM START FETCHES, STOP FETCHES, START FETCHES", TABLE, SYSTEM) \
|
||||
M(SYSTEM_MOVES, "SYSTEM STOP MOVES, SYSTEM START MOVES, STOP MOVES, START MOVES", TABLE, SYSTEM) \
|
||||
M(SYSTEM_DISTRIBUTED_SENDS, "SYSTEM STOP DISTRIBUTED SENDS, SYSTEM START DISTRIBUTED SENDS, STOP DISTRIBUTED SENDS, START DISTRIBUTED SENDS", TABLE, SYSTEM_SENDS) \
|
||||
M(SYSTEM_REPLICATED_SENDS, "SYSTEM STOP REPLICATED SENDS, SYSTEM START REPLICATED SENDS, STOP_REPLICATED_SENDS, START REPLICATED SENDS", TABLE, SYSTEM_SENDS) \
|
||||
M(SYSTEM_REPLICATED_SENDS, "SYSTEM STOP REPLICATED SENDS, SYSTEM START REPLICATED SENDS, STOP REPLICATED SENDS, START REPLICATED SENDS", TABLE, SYSTEM_SENDS) \
|
||||
M(SYSTEM_SENDS, "SYSTEM STOP SENDS, SYSTEM START SENDS, STOP SENDS, START SENDS", GROUP, SYSTEM) \
|
||||
M(SYSTEM_REPLICATION_QUEUES, "SYSTEM STOP REPLICATION QUEUES, SYSTEM START REPLICATION QUEUES, STOP_REPLICATION_QUEUES, START REPLICATION QUEUES", TABLE, SYSTEM) \
|
||||
M(SYSTEM_REPLICATION_QUEUES, "SYSTEM STOP REPLICATION QUEUES, SYSTEM START REPLICATION QUEUES, STOP REPLICATION QUEUES, START REPLICATION QUEUES", TABLE, SYSTEM) \
|
||||
M(SYSTEM_DROP_REPLICA, "DROP REPLICA", TABLE, SYSTEM) \
|
||||
M(SYSTEM_SYNC_REPLICA, "SYNC REPLICA", TABLE, SYSTEM) \
|
||||
M(SYSTEM_RESTART_REPLICA, "RESTART REPLICA", TABLE, SYSTEM) \
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <boost/noncopyable.hpp>
|
||||
#include <base/sort.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Common/HashTable/SmallTable.h>
|
||||
@ -557,7 +558,7 @@ public:
|
||||
}
|
||||
if (limit < answer.size())
|
||||
{
|
||||
std::nth_element(answer.begin(), answer.begin() + limit, answer.end());
|
||||
::nth_element(answer.begin(), answer.begin() + limit, answer.end());
|
||||
answer.resize(limit);
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/sort.h>
|
||||
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/NaNUtils.h>
|
||||
|
||||
@ -72,7 +74,7 @@ private:
|
||||
private:
|
||||
void sort()
|
||||
{
|
||||
std::sort(points, points + size,
|
||||
::sort(points, points + size,
|
||||
[](const WeightedValue & first, const WeightedValue & second)
|
||||
{
|
||||
return first.mean < second.mean;
|
||||
|
@ -72,7 +72,7 @@ private:
|
||||
using Base = AggregateFunctionNullBase<result_is_nullable, serialize_flag,
|
||||
AggregateFunctionIfNullUnary<result_is_nullable, serialize_flag>>;
|
||||
|
||||
inline bool singleFilter(const IColumn ** columns, size_t row_num, size_t num_arguments) const
|
||||
inline bool singleFilter(const IColumn ** columns, size_t row_num) const
|
||||
{
|
||||
const IColumn * filter_column = columns[num_arguments - 1];
|
||||
|
||||
@ -112,7 +112,7 @@ public:
|
||||
{
|
||||
const ColumnNullable * column = assert_cast<const ColumnNullable *>(columns[0]);
|
||||
const IColumn * nested_column = &column->getNestedColumn();
|
||||
if (!column->isNullAt(row_num) && singleFilter(columns, row_num, num_arguments))
|
||||
if (!column->isNullAt(row_num) && singleFilter(columns, row_num))
|
||||
{
|
||||
this->setFlag(place);
|
||||
this->nested_function->add(this->nestedPlace(place), &nested_column, row_num, arena);
|
||||
|
@ -1,5 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <unordered_set>
|
||||
|
||||
#include <AggregateFunctions/AggregateFunctionNull.h>
|
||||
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
@ -7,6 +9,7 @@
|
||||
#include <Common/ArenaAllocator.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <base/arithmeticOverflow.h>
|
||||
#include <base/sort.h>
|
||||
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
@ -14,8 +17,6 @@
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
#include <unordered_set>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -67,7 +68,7 @@ struct AggregateFunctionIntervalLengthSumData
|
||||
/// either sort whole container or do so partially merging ranges afterwards
|
||||
if (!sorted && !other.sorted)
|
||||
{
|
||||
std::sort(std::begin(segments), std::end(segments));
|
||||
::sort(std::begin(segments), std::end(segments));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -76,10 +77,10 @@ struct AggregateFunctionIntervalLengthSumData
|
||||
const auto end = std::end(segments);
|
||||
|
||||
if (!sorted)
|
||||
std::sort(begin, middle);
|
||||
::sort(begin, middle);
|
||||
|
||||
if (!other.sorted)
|
||||
std::sort(middle, end);
|
||||
::sort(middle, end);
|
||||
|
||||
std::inplace_merge(begin, middle, end);
|
||||
}
|
||||
@ -91,7 +92,7 @@ struct AggregateFunctionIntervalLengthSumData
|
||||
{
|
||||
if (!sorted)
|
||||
{
|
||||
std::sort(std::begin(segments), std::end(segments));
|
||||
::sort(std::begin(segments), std::end(segments));
|
||||
sorted = true;
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
#include <base/sort.h>
|
||||
#include <AggregateFunctions/AggregateFunctionCombinatorFactory.h>
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
@ -226,7 +227,7 @@ public:
|
||||
{
|
||||
keys.push_back(it.first);
|
||||
}
|
||||
std::sort(keys.begin(), keys.end());
|
||||
::sort(keys.begin(), keys.end());
|
||||
|
||||
// insert using sorted keys to result column
|
||||
for (auto & key : keys)
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/logger_useful.h>
|
||||
#include <base/sort.h>
|
||||
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
@ -142,7 +143,7 @@ public:
|
||||
auto & array = this->data(place).value;
|
||||
|
||||
/// Sort by position; for equal position, sort by weight to get deterministic result.
|
||||
std::sort(array.begin(), array.end());
|
||||
::sort(array.begin(), array.end());
|
||||
|
||||
for (const auto & point_weight : array)
|
||||
{
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <base/range.h>
|
||||
#include <base/sort.h>
|
||||
#include <Common/PODArray.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
@ -76,7 +77,7 @@ struct AggregateFunctionSequenceMatchData final
|
||||
{
|
||||
if (!sorted)
|
||||
{
|
||||
std::sort(std::begin(events_list), std::end(events_list), Comparator{});
|
||||
::sort(std::begin(events_list), std::end(events_list), Comparator{});
|
||||
sorted = true;
|
||||
}
|
||||
}
|
||||
|
@ -17,15 +17,11 @@ class AggregateFunctionSimpleState final : public IAggregateFunctionHelper<Aggre
|
||||
{
|
||||
private:
|
||||
AggregateFunctionPtr nested_func;
|
||||
DataTypes arguments;
|
||||
Array params;
|
||||
|
||||
public:
|
||||
AggregateFunctionSimpleState(AggregateFunctionPtr nested_, const DataTypes & arguments_, const Array & params_)
|
||||
: IAggregateFunctionHelper<AggregateFunctionSimpleState>(arguments_, params_)
|
||||
, nested_func(nested_)
|
||||
, arguments(arguments_)
|
||||
, params(params_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -35,18 +31,19 @@ public:
|
||||
{
|
||||
DataTypeCustomSimpleAggregateFunction::checkSupportedFunctions(nested_func);
|
||||
|
||||
// Need to make a clone because it'll be customized.
|
||||
auto storage_type = DataTypeFactory::instance().get(nested_func->getReturnType()->getName());
|
||||
|
||||
// Need to make a clone to avoid recursive reference.
|
||||
auto storage_type_out = DataTypeFactory::instance().get(nested_func->getReturnType()->getName());
|
||||
// Need to make a new function with promoted argument types because SimpleAggregates requires arg_type = return_type.
|
||||
AggregateFunctionProperties properties;
|
||||
auto function
|
||||
= AggregateFunctionFactory::instance().get(nested_func->getName(), {storage_type}, nested_func->getParameters(), properties);
|
||||
= AggregateFunctionFactory::instance().get(nested_func->getName(), {storage_type_out}, nested_func->getParameters(), properties);
|
||||
|
||||
// Need to make a clone because it'll be customized.
|
||||
auto storage_type_arg = DataTypeFactory::instance().get(nested_func->getReturnType()->getName());
|
||||
DataTypeCustomNamePtr custom_name
|
||||
= std::make_unique<DataTypeCustomSimpleAggregateFunction>(function, DataTypes{nested_func->getReturnType()}, params);
|
||||
storage_type->setCustomization(std::make_unique<DataTypeCustomDesc>(std::move(custom_name), nullptr));
|
||||
return storage_type;
|
||||
= std::make_unique<DataTypeCustomSimpleAggregateFunction>(function, DataTypes{nested_func->getReturnType()}, parameters);
|
||||
storage_type_arg->setCustomization(std::make_unique<DataTypeCustomDesc>(std::move(custom_name), nullptr));
|
||||
return storage_type_arg;
|
||||
}
|
||||
|
||||
bool isVersioned() const override
|
||||
|
@ -20,13 +20,12 @@ class AggregateFunctionState final : public IAggregateFunctionHelper<AggregateFu
|
||||
{
|
||||
private:
|
||||
AggregateFunctionPtr nested_func;
|
||||
DataTypes arguments;
|
||||
Array params;
|
||||
|
||||
public:
|
||||
AggregateFunctionState(AggregateFunctionPtr nested_, const DataTypes & arguments_, const Array & params_)
|
||||
: IAggregateFunctionHelper<AggregateFunctionState>(arguments_, params_)
|
||||
, nested_func(nested_), arguments(arguments_), params(params_) {}
|
||||
, nested_func(nested_)
|
||||
{}
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
|
@ -226,7 +226,7 @@ public:
|
||||
{
|
||||
// FIXME why is storing NearestFieldType not enough, and we
|
||||
// have to check for decimals again here?
|
||||
UInt32 scale = static_cast<const ColumnDecimal<T> &>(key_column).getData().getScale();
|
||||
UInt32 scale = static_cast<const ColumnDecimal<T> &>(key_column).getScale();
|
||||
it = merged_maps.find(DecimalField<T>(key, scale));
|
||||
}
|
||||
else
|
||||
@ -251,7 +251,7 @@ public:
|
||||
|
||||
if constexpr (is_decimal<T>)
|
||||
{
|
||||
UInt32 scale = static_cast<const ColumnDecimal<T> &>(key_column).getData().getScale();
|
||||
UInt32 scale = static_cast<const ColumnDecimal<T> &>(key_column).getScale();
|
||||
merged_maps.emplace(DecimalField<T>(key, scale), std::move(new_values));
|
||||
}
|
||||
else
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <base/types.h>
|
||||
#include <base/bit_cast.h>
|
||||
#include <base/sort.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
|
||||
#include <IO/ReadBuffer.h>
|
||||
@ -134,7 +135,7 @@ private:
|
||||
++arr_it;
|
||||
}
|
||||
|
||||
std::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; });
|
||||
::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; });
|
||||
|
||||
Float64 threshold = std::ceil(sum_weight * level);
|
||||
Float64 accumulated = 0;
|
||||
@ -175,7 +176,7 @@ private:
|
||||
++arr_it;
|
||||
}
|
||||
|
||||
std::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; });
|
||||
::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; });
|
||||
|
||||
size_t level_index = 0;
|
||||
Float64 accumulated = 0;
|
||||
|
@ -88,7 +88,7 @@ struct QuantileExact : QuantileExactBase<Value, QuantileExact<Value>>
|
||||
if (!array.empty())
|
||||
{
|
||||
size_t n = level < 1 ? level * array.size() : (array.size() - 1);
|
||||
nth_element(array.begin(), array.begin() + n, array.end()); /// NOTE: You can think of the radix-select algorithm.
|
||||
::nth_element(array.begin(), array.begin() + n, array.end()); /// NOTE: You can think of the radix-select algorithm.
|
||||
return array[n];
|
||||
}
|
||||
|
||||
@ -107,7 +107,7 @@ struct QuantileExact : QuantileExactBase<Value, QuantileExact<Value>>
|
||||
auto level = levels[indices[i]];
|
||||
|
||||
size_t n = level < 1 ? level * array.size() : (array.size() - 1);
|
||||
nth_element(array.begin() + prev_n, array.begin() + n, array.end());
|
||||
::nth_element(array.begin() + prev_n, array.begin() + n, array.end());
|
||||
result[indices[i]] = array[n];
|
||||
prev_n = n;
|
||||
}
|
||||
@ -143,7 +143,7 @@ struct QuantileExactExclusive : public QuantileExact<Value>
|
||||
else if (n < 1)
|
||||
return static_cast<Float64>(array[0]);
|
||||
|
||||
nth_element(array.begin(), array.begin() + n - 1, array.end());
|
||||
::nth_element(array.begin(), array.begin() + n - 1, array.end());
|
||||
auto nth_elem = std::min_element(array.begin() + n, array.end());
|
||||
|
||||
return static_cast<Float64>(array[n - 1]) + (h - n) * static_cast<Float64>(*nth_elem - array[n - 1]);
|
||||
@ -172,7 +172,7 @@ struct QuantileExactExclusive : public QuantileExact<Value>
|
||||
result[indices[i]] = static_cast<Float64>(array[0]);
|
||||
else
|
||||
{
|
||||
nth_element(array.begin() + prev_n, array.begin() + n - 1, array.end());
|
||||
::nth_element(array.begin() + prev_n, array.begin() + n - 1, array.end());
|
||||
auto nth_elem = std::min_element(array.begin() + n, array.end());
|
||||
|
||||
result[indices[i]] = static_cast<Float64>(array[n - 1]) + (h - n) * static_cast<Float64>(*nth_elem - array[n - 1]);
|
||||
@ -207,7 +207,7 @@ struct QuantileExactInclusive : public QuantileExact<Value>
|
||||
return static_cast<Float64>(array[array.size() - 1]);
|
||||
else if (n < 1)
|
||||
return static_cast<Float64>(array[0]);
|
||||
nth_element(array.begin(), array.begin() + n - 1, array.end());
|
||||
::nth_element(array.begin(), array.begin() + n - 1, array.end());
|
||||
auto nth_elem = std::min_element(array.begin() + n, array.end());
|
||||
|
||||
return static_cast<Float64>(array[n - 1]) + (h - n) * static_cast<Float64>(*nth_elem - array[n - 1]);
|
||||
@ -234,7 +234,7 @@ struct QuantileExactInclusive : public QuantileExact<Value>
|
||||
result[indices[i]] = static_cast<Float64>(array[0]);
|
||||
else
|
||||
{
|
||||
nth_element(array.begin() + prev_n, array.begin() + n - 1, array.end());
|
||||
::nth_element(array.begin() + prev_n, array.begin() + n - 1, array.end());
|
||||
auto nth_elem = std::min_element(array.begin() + n, array.end());
|
||||
|
||||
result[indices[i]] = static_cast<Float64>(array[n - 1]) + (h - n) * (static_cast<Float64>(*nth_elem) - array[n - 1]);
|
||||
@ -263,7 +263,7 @@ struct QuantileExactLow : public QuantileExactBase<Value, QuantileExactLow<Value
|
||||
if (!array.empty())
|
||||
{
|
||||
// sort inputs in ascending order
|
||||
std::sort(array.begin(), array.end());
|
||||
::sort(array.begin(), array.end());
|
||||
|
||||
// if level is 0.5 then compute the "low" median of the sorted array
|
||||
// by the method of rounding.
|
||||
@ -296,7 +296,7 @@ struct QuantileExactLow : public QuantileExactBase<Value, QuantileExactLow<Value
|
||||
if (!array.empty())
|
||||
{
|
||||
// sort inputs in ascending order
|
||||
std::sort(array.begin(), array.end());
|
||||
::sort(array.begin(), array.end());
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
auto level = levels[indices[i]];
|
||||
@ -345,7 +345,7 @@ struct QuantileExactHigh : public QuantileExactBase<Value, QuantileExactHigh<Val
|
||||
if (!array.empty())
|
||||
{
|
||||
// sort inputs in ascending order
|
||||
std::sort(array.begin(), array.end());
|
||||
::sort(array.begin(), array.end());
|
||||
|
||||
// if level is 0.5 then compute the "high" median of the sorted array
|
||||
// by the method of rounding.
|
||||
@ -370,7 +370,7 @@ struct QuantileExactHigh : public QuantileExactBase<Value, QuantileExactHigh<Val
|
||||
if (!array.empty())
|
||||
{
|
||||
// sort inputs in ascending order
|
||||
std::sort(array.begin(), array.end());
|
||||
::sort(array.begin(), array.end());
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
auto level = levels[indices[i]];
|
||||
|
@ -1,5 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/sort.h>
|
||||
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <Common/NaNUtils.h>
|
||||
|
||||
@ -101,7 +103,7 @@ struct QuantileExactWeighted
|
||||
++i;
|
||||
}
|
||||
|
||||
std::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; });
|
||||
::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; });
|
||||
|
||||
Float64 threshold = std::ceil(sum_weight * level);
|
||||
Float64 accumulated = 0;
|
||||
@ -151,7 +153,7 @@ struct QuantileExactWeighted
|
||||
++i;
|
||||
}
|
||||
|
||||
std::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; });
|
||||
::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; });
|
||||
|
||||
Float64 accumulated = 0;
|
||||
|
||||
|
@ -90,7 +90,7 @@ namespace detail
|
||||
/** This function must be called before get-functions. */
|
||||
void prepare() const
|
||||
{
|
||||
std::sort(elems, elems + count);
|
||||
::sort(elems, elems + count);
|
||||
}
|
||||
|
||||
UInt16 get(double level) const
|
||||
@ -183,7 +183,7 @@ namespace detail
|
||||
|
||||
/// Sorting an array will not be considered a violation of constancy.
|
||||
auto & array = elems;
|
||||
nth_element(array.begin(), array.begin() + n, array.end());
|
||||
::nth_element(array.begin(), array.begin() + n, array.end());
|
||||
quantile = array[n];
|
||||
}
|
||||
|
||||
@ -204,7 +204,7 @@ namespace detail
|
||||
? level * elems.size()
|
||||
: (elems.size() - 1);
|
||||
|
||||
nth_element(array.begin() + prev_n, array.begin() + n, array.end());
|
||||
::nth_element(array.begin() + prev_n, array.begin() + n, array.end());
|
||||
|
||||
result[level_index] = array[n];
|
||||
prev_n = n;
|
||||
|
@ -2,6 +2,8 @@
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <base/sort.h>
|
||||
|
||||
#include <Common/FieldVisitorConvertToNumber.h>
|
||||
#include <Common/NaNUtils.h>
|
||||
|
||||
@ -64,7 +66,7 @@ struct QuantileLevels
|
||||
permutation[i] = i;
|
||||
}
|
||||
|
||||
std::sort(permutation.begin(), permutation.end(), [this] (size_t a, size_t b) { return levels[a] < levels[b]; });
|
||||
::sort(permutation.begin(), permutation.end(), [this] (size_t a, size_t b) { return levels[a] < levels[b]; });
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <algorithm>
|
||||
#include <climits>
|
||||
#include <base/types.h>
|
||||
#include <base/sort.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
@ -15,6 +16,7 @@
|
||||
#include <Poco/Exception.h>
|
||||
#include <pcg_random.hpp>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
@ -249,7 +251,7 @@ private:
|
||||
if (sorted)
|
||||
return;
|
||||
sorted = true;
|
||||
std::sort(samples.begin(), samples.end(), Comparer());
|
||||
::sort(samples.begin(), samples.end(), Comparer());
|
||||
}
|
||||
|
||||
template <typename ResultType>
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <climits>
|
||||
#include <AggregateFunctions/ReservoirSampler.h>
|
||||
#include <base/types.h>
|
||||
#include <base/sort.h>
|
||||
#include <Common/HashTable/Hash.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
@ -258,7 +259,8 @@ private:
|
||||
{
|
||||
if (sorted)
|
||||
return;
|
||||
std::sort(samples.begin(), samples.end(), [](const auto & lhs, const auto & rhs) { return lhs.first < rhs.first; });
|
||||
|
||||
::sort(samples.begin(), samples.end(), [](const auto & lhs, const auto & rhs) { return lhs.first < rhs.first; });
|
||||
sorted = true;
|
||||
}
|
||||
|
||||
|
@ -1,13 +1,17 @@
|
||||
#pragma once
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <Common/ArenaAllocator.h>
|
||||
|
||||
#include <numeric>
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
|
||||
#include <base/sort.h>
|
||||
|
||||
#include <Common/ArenaAllocator.h>
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
@ -41,7 +45,7 @@ std::pair<RanksArray, Float64> computeRanksAndTieCorrection(const Values & value
|
||||
/// Save initial positions, than sort indices according to the values.
|
||||
std::vector<size_t> indexes(size);
|
||||
std::iota(indexes.begin(), indexes.end(), 0);
|
||||
std::sort(indexes.begin(), indexes.end(),
|
||||
::sort(indexes.begin(), indexes.end(),
|
||||
[&] (size_t lhs, size_t rhs) { return values[lhs] < values[rhs]; });
|
||||
|
||||
size_t left = 0;
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include <Parsers/formatAST.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <base/insertAtEnd.h>
|
||||
#include <base/sort.h>
|
||||
#include <boost/range/adaptor/reversed.hpp>
|
||||
#include <filesystem>
|
||||
|
||||
@ -632,7 +633,7 @@ BackupEntries makeBackupEntries(const Elements & elements, const ContextPtr & co
|
||||
throw Exception("Backup must not be empty", ErrorCodes::BACKUP_IS_EMPTY);
|
||||
|
||||
/// Check that all backup entries are unique.
|
||||
std::sort(
|
||||
::sort(
|
||||
backup_entries.begin(),
|
||||
backup_entries.end(),
|
||||
[](const std::pair<String, std::unique_ptr<IBackupEntry>> & lhs, const std::pair<String, std::unique_ptr<IBackupEntry>> & rhs)
|
||||
|
@ -506,6 +506,7 @@ if (ENABLE_NLP)
|
||||
dbms_target_link_libraries (PUBLIC ch_contrib::stemmer)
|
||||
dbms_target_link_libraries (PUBLIC ch_contrib::wnb)
|
||||
dbms_target_link_libraries (PUBLIC ch_contrib::lemmagen)
|
||||
dbms_target_link_libraries (PUBLIC ch_contrib::nlp_data)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::bzip2)
|
||||
@ -558,3 +559,4 @@ if (ENABLE_TESTS)
|
||||
|
||||
add_check(unit_tests_dbms)
|
||||
endif ()
|
||||
|
||||
|
@ -48,6 +48,7 @@
|
||||
#include <Parsers/ASTQueryWithOutput.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTColumnDeclaration.h>
|
||||
|
||||
#include <Processors/Formats/Impl/NullFormat.h>
|
||||
#include <Processors/Formats/IInputFormat.h>
|
||||
@ -552,6 +553,25 @@ void ClientBase::initLogsOutputStream()
|
||||
}
|
||||
}
|
||||
|
||||
void ClientBase::updateSuggest(const ASTCreateQuery & ast_create)
|
||||
{
|
||||
std::vector<std::string> new_words;
|
||||
|
||||
if (ast_create.database)
|
||||
new_words.push_back(ast_create.getDatabase());
|
||||
new_words.push_back(ast_create.getTable());
|
||||
|
||||
if (ast_create.columns_list && ast_create.columns_list->columns)
|
||||
{
|
||||
for (const auto & elem : ast_create.columns_list->columns->children)
|
||||
{
|
||||
if (const auto * column = elem->as<ASTColumnDeclaration>())
|
||||
new_words.push_back(column->name);
|
||||
}
|
||||
}
|
||||
|
||||
suggest->addWords(std::move(new_words));
|
||||
}
|
||||
|
||||
void ClientBase::processTextAsSingleQuery(const String & full_query)
|
||||
{
|
||||
@ -565,6 +585,18 @@ void ClientBase::processTextAsSingleQuery(const String & full_query)
|
||||
|
||||
String query_to_execute;
|
||||
|
||||
/// Query will be parsed before checking the result because error does not
|
||||
/// always means a problem, i.e. if table already exists, and it is no a
|
||||
/// huge problem if suggestion will be added even on error, since this is
|
||||
/// just suggestion.
|
||||
if (auto * create = parsed_query->as<ASTCreateQuery>())
|
||||
{
|
||||
/// Do not update suggest, until suggestion will be ready
|
||||
/// (this will avoid extra complexity)
|
||||
if (suggest)
|
||||
updateSuggest(*create);
|
||||
}
|
||||
|
||||
// An INSERT query may have the data that follow query text. Remove the
|
||||
/// Send part of query without data, because data will be sent separately.
|
||||
auto * insert = parsed_query->as<ASTInsertQuery>();
|
||||
@ -1463,7 +1495,6 @@ void ClientBase::runInteractive()
|
||||
/// Initialize DateLUT here to avoid counting time spent here as query execution time.
|
||||
const auto local_tz = DateLUT::instance().getTimeZone();
|
||||
|
||||
std::optional<Suggest> suggest;
|
||||
suggest.emplace();
|
||||
if (load_suggestions)
|
||||
{
|
||||
|
@ -136,6 +136,8 @@ private:
|
||||
void readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector<Arguments> & external_tables_arguments);
|
||||
void parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments);
|
||||
|
||||
void updateSuggest(const ASTCreateQuery & ast_create);
|
||||
|
||||
protected:
|
||||
bool is_interactive = false; /// Use either interactive line editing interface or batch mode.
|
||||
bool is_multiquery = false;
|
||||
@ -144,6 +146,8 @@ protected:
|
||||
bool echo_queries = false; /// Print queries before execution in batch mode.
|
||||
bool ignore_error = false; /// In case of errors, don't print error message, continue to next query. Only applicable for non-interactive mode.
|
||||
bool print_time_to_stderr = false; /// Output execution time to stderr in batch mode.
|
||||
|
||||
std::optional<Suggest> suggest;
|
||||
bool load_suggestions = false;
|
||||
|
||||
std::vector<String> queries_files; /// If not empty, queries will be read from these files
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include <Parsers/Lexer.h>
|
||||
#include <Common/UTF8Helpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -114,6 +113,7 @@ void highlight(const String & query, std::vector<replxx::Replxx::Color> & colors
|
||||
|
||||
{TokenType::Comma, replxx::color::bold(Replxx::Color::DEFAULT)},
|
||||
{TokenType::Semicolon, replxx::color::bold(Replxx::Color::DEFAULT)},
|
||||
{TokenType::VerticalDelimiter, replxx::color::bold(Replxx::Color::DEFAULT)},
|
||||
{TokenType::Dot, replxx::color::bold(Replxx::Color::DEFAULT)},
|
||||
{TokenType::Asterisk, replxx::color::bold(Replxx::Color::DEFAULT)},
|
||||
{TokenType::HereDoc, Replxx::Color::CYAN},
|
||||
@ -151,6 +151,11 @@ void highlight(const String & query, std::vector<replxx::Replxx::Color> & colors
|
||||
|
||||
for (Token token = lexer.nextToken(); !token.isEnd(); token = lexer.nextToken())
|
||||
{
|
||||
if (token.type == TokenType::Semicolon || token.type == TokenType::VerticalDelimiter)
|
||||
ReplxxLineReader::setLastIsDelimiter(true);
|
||||
else if (token.type != TokenType::Whitespace)
|
||||
ReplxxLineReader::setLastIsDelimiter(false);
|
||||
|
||||
size_t utf8_len = UTF8::countCodePoints(reinterpret_cast<const UInt8 *>(token.begin), token.size());
|
||||
for (size_t code_point_index = 0; code_point_index < utf8_len; ++code_point_index)
|
||||
{
|
||||
|
@ -29,19 +29,21 @@ namespace ErrorCodes
|
||||
Suggest::Suggest()
|
||||
{
|
||||
/// Keywords may be not up to date with ClickHouse parser.
|
||||
words = {"CREATE", "DATABASE", "IF", "NOT", "EXISTS", "TEMPORARY", "TABLE", "ON", "CLUSTER", "DEFAULT",
|
||||
"MATERIALIZED", "ALIAS", "ENGINE", "AS", "VIEW", "POPULATE", "SETTINGS", "ATTACH", "DETACH", "DROP",
|
||||
"RENAME", "TO", "ALTER", "ADD", "MODIFY", "CLEAR", "COLUMN", "AFTER", "COPY", "PROJECT",
|
||||
"PRIMARY", "KEY", "CHECK", "PARTITION", "PART", "FREEZE", "FETCH", "FROM", "SHOW", "INTO",
|
||||
"OUTFILE", "FORMAT", "TABLES", "DATABASES", "LIKE", "PROCESSLIST", "CASE", "WHEN", "THEN", "ELSE",
|
||||
"END", "DESCRIBE", "DESC", "USE", "SET", "OPTIMIZE", "FINAL", "DEDUPLICATE", "INSERT", "VALUES",
|
||||
"SELECT", "DISTINCT", "SAMPLE", "ARRAY", "JOIN", "GLOBAL", "LOCAL", "ANY", "ALL", "INNER",
|
||||
"LEFT", "RIGHT", "FULL", "OUTER", "CROSS", "USING", "PREWHERE", "WHERE", "GROUP", "BY",
|
||||
"WITH", "TOTALS", "HAVING", "ORDER", "COLLATE", "LIMIT", "UNION", "AND", "OR", "ASC",
|
||||
"IN", "KILL", "QUERY", "SYNC", "ASYNC", "TEST", "BETWEEN", "TRUNCATE", "USER", "ROLE",
|
||||
"PROFILE", "QUOTA", "POLICY", "ROW", "GRANT", "REVOKE", "OPTION", "ADMIN", "EXCEPT", "REPLACE",
|
||||
"IDENTIFIED", "HOST", "NAME", "READONLY", "WRITABLE", "PERMISSIVE", "FOR", "RESTRICTIVE", "RANDOMIZED",
|
||||
"INTERVAL", "LIMITS", "ONLY", "TRACKING", "IP", "REGEXP", "ILIKE"};
|
||||
addWords({
|
||||
"CREATE", "DATABASE", "IF", "NOT", "EXISTS", "TEMPORARY", "TABLE", "ON", "CLUSTER", "DEFAULT",
|
||||
"MATERIALIZED", "ALIAS", "ENGINE", "AS", "VIEW", "POPULATE", "SETTINGS", "ATTACH", "DETACH", "DROP",
|
||||
"RENAME", "TO", "ALTER", "ADD", "MODIFY", "CLEAR", "COLUMN", "AFTER", "COPY", "PROJECT",
|
||||
"PRIMARY", "KEY", "CHECK", "PARTITION", "PART", "FREEZE", "FETCH", "FROM", "SHOW", "INTO",
|
||||
"OUTFILE", "FORMAT", "TABLES", "DATABASES", "LIKE", "PROCESSLIST", "CASE", "WHEN", "THEN", "ELSE",
|
||||
"END", "DESCRIBE", "DESC", "USE", "SET", "OPTIMIZE", "FINAL", "DEDUPLICATE", "INSERT", "VALUES",
|
||||
"SELECT", "DISTINCT", "SAMPLE", "ARRAY", "JOIN", "GLOBAL", "LOCAL", "ANY", "ALL", "INNER",
|
||||
"LEFT", "RIGHT", "FULL", "OUTER", "CROSS", "USING", "PREWHERE", "WHERE", "GROUP", "BY",
|
||||
"WITH", "TOTALS", "HAVING", "ORDER", "COLLATE", "LIMIT", "UNION", "AND", "OR", "ASC",
|
||||
"IN", "KILL", "QUERY", "SYNC", "ASYNC", "TEST", "BETWEEN", "TRUNCATE", "USER", "ROLE",
|
||||
"PROFILE", "QUOTA", "POLICY", "ROW", "GRANT", "REVOKE", "OPTION", "ADMIN", "EXCEPT", "REPLACE",
|
||||
"IDENTIFIED", "HOST", "NAME", "READONLY", "WRITABLE", "PERMISSIVE", "FOR", "RESTRICTIVE", "RANDOMIZED",
|
||||
"INTERVAL", "LIMITS", "ONLY", "TRACKING", "IP", "REGEXP", "ILIKE",
|
||||
});
|
||||
}
|
||||
|
||||
static String getLoadSuggestionQuery(Int32 suggestion_limit, bool basic_suggestion)
|
||||
@ -124,18 +126,6 @@ void Suggest::load(ContextPtr context, const ConnectionParameters & connection_p
|
||||
}
|
||||
|
||||
/// Note that keyword suggestions are available even if we cannot load data from server.
|
||||
|
||||
std::sort(words.begin(), words.end());
|
||||
words_no_case = words;
|
||||
std::sort(words_no_case.begin(), words_no_case.end(), [](const std::string & str1, const std::string & str2)
|
||||
{
|
||||
return std::lexicographical_compare(begin(str1), end(str1), begin(str2), end(str2), [](const char char1, const char char2)
|
||||
{
|
||||
return std::tolower(char1) < std::tolower(char2);
|
||||
});
|
||||
});
|
||||
|
||||
ready = true;
|
||||
});
|
||||
}
|
||||
|
||||
@ -190,8 +180,14 @@ void Suggest::fillWordsFromBlock(const Block & block)
|
||||
const ColumnString & column = typeid_cast<const ColumnString &>(*block.getByPosition(0).column);
|
||||
|
||||
size_t rows = block.rows();
|
||||
|
||||
Words new_words;
|
||||
new_words.reserve(rows);
|
||||
for (size_t i = 0; i < rows; ++i)
|
||||
words.emplace_back(column.getDataAt(i).toString());
|
||||
{
|
||||
new_words.emplace_back(column.getDataAt(i).toString());
|
||||
}
|
||||
addWords(std::move(new_words));
|
||||
}
|
||||
|
||||
template
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <Common/Exception.h>
|
||||
#include <Poco/String.h>
|
||||
#include <algorithm>
|
||||
#include <base/sort.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -74,10 +75,10 @@ AvailableCollationLocales::LocalesVector AvailableCollationLocales::getAvailable
|
||||
result.push_back(name_and_locale.second);
|
||||
|
||||
auto comparator = [] (const LocaleAndLanguage & f, const LocaleAndLanguage & s)
|
||||
{
|
||||
return f.locale_name < s.locale_name;
|
||||
};
|
||||
std::sort(result.begin(), result.end(), comparator);
|
||||
{
|
||||
return f.locale_name < s.locale_name;
|
||||
};
|
||||
::sort(result.begin(), result.end(), comparator);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
@ -1,5 +1,3 @@
|
||||
#include <string.h> // memcpy
|
||||
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
@ -9,12 +7,7 @@
|
||||
#include <Columns/ColumnsCommon.h>
|
||||
#include <Columns/ColumnCompressed.h>
|
||||
#include <Columns/MaskOperations.h>
|
||||
|
||||
#include <base/unaligned.h>
|
||||
#include <base/sort.h>
|
||||
|
||||
#include <Processors/Transforms/ColumnGathererTransform.h>
|
||||
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/SipHash.h>
|
||||
@ -22,6 +15,9 @@
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/WeakHash.h>
|
||||
#include <Common/HashTable/Hash.h>
|
||||
#include <base/unaligned.h>
|
||||
#include <base/sort.h>
|
||||
#include <cstring> // memcpy
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -127,18 +123,8 @@ size_t ColumnArray::size() const
|
||||
|
||||
Field ColumnArray::operator[](size_t n) const
|
||||
{
|
||||
size_t offset = offsetAt(n);
|
||||
size_t size = sizeAt(n);
|
||||
|
||||
if (size > max_array_size_as_field)
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Array of size {} is too large to be manipulated as single field, maximum size {}",
|
||||
size, max_array_size_as_field);
|
||||
|
||||
Array res(size);
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
res[i] = getData()[offset + i];
|
||||
|
||||
Field res;
|
||||
get(n, res);
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -152,11 +138,12 @@ void ColumnArray::get(size_t n, Field & res) const
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Array of size {} is too large to be manipulated as single field, maximum size {}",
|
||||
size, max_array_size_as_field);
|
||||
|
||||
res = Array(size);
|
||||
res = Array();
|
||||
Array & res_arr = DB::get<Array &>(res);
|
||||
res_arr.reserve(size);
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
getData().get(offset + i, res_arr[i]);
|
||||
res_arr.push_back(getData()[offset + i]);
|
||||
}
|
||||
|
||||
|
||||
@ -824,9 +811,9 @@ void ColumnArray::getPermutationImpl(size_t limit, Permutation & res, Comparator
|
||||
auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; };
|
||||
|
||||
if (limit)
|
||||
partial_sort(res.begin(), res.begin() + limit, res.end(), less);
|
||||
::partial_sort(res.begin(), res.begin() + limit, res.end(), less);
|
||||
else
|
||||
std::sort(res.begin(), res.end(), less);
|
||||
::sort(res.begin(), res.end(), less);
|
||||
}
|
||||
|
||||
void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
|
||||
|
@ -9,7 +9,6 @@
|
||||
#include <base/sort.h>
|
||||
#include <base/scope_guard.h>
|
||||
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
#include <Columns/ColumnsCommon.h>
|
||||
@ -32,12 +31,6 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
template class DecimalPaddedPODArray<Decimal32>;
|
||||
template class DecimalPaddedPODArray<Decimal64>;
|
||||
template class DecimalPaddedPODArray<Decimal128>;
|
||||
template class DecimalPaddedPODArray<Decimal256>;
|
||||
template class DecimalPaddedPODArray<DateTime64>;
|
||||
|
||||
template <is_decimal T>
|
||||
int ColumnDecimal<T>::compareAt(size_t n, size_t m, const IColumn & rhs_, int) const
|
||||
{
|
||||
@ -131,19 +124,6 @@ void ColumnDecimal<T>::updateHashFast(SipHash & hash) const
|
||||
template <is_decimal T>
|
||||
void ColumnDecimal<T>::getPermutation(bool reverse, size_t limit, int , IColumn::Permutation & res) const
|
||||
{
|
||||
#if 1 /// TODO: perf test
|
||||
if (data.size() <= std::numeric_limits<UInt32>::max())
|
||||
{
|
||||
PaddedPODArray<UInt32> tmp_res;
|
||||
permutation(reverse, limit, tmp_res);
|
||||
|
||||
res.resize(tmp_res.size());
|
||||
for (size_t i = 0; i < tmp_res.size(); ++i)
|
||||
res[i] = tmp_res[i];
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
permutation(reverse, limit, res);
|
||||
}
|
||||
|
||||
@ -151,7 +131,7 @@ template <is_decimal T>
|
||||
void ColumnDecimal<T>::updatePermutation(bool reverse, size_t limit, int, IColumn::Permutation & res, EqualRanges & equal_ranges) const
|
||||
{
|
||||
auto equals = [this](size_t lhs, size_t rhs) { return data[lhs] == data[rhs]; };
|
||||
auto sort = [](auto begin, auto end, auto pred) { std::sort(begin, end, pred); };
|
||||
auto sort = [](auto begin, auto end, auto pred) { ::sort(begin, end, pred); };
|
||||
auto partial_sort = [](auto begin, auto mid, auto end, auto pred) { ::partial_sort(begin, mid, end, pred); };
|
||||
|
||||
if (reverse)
|
||||
|
@ -1,66 +1,21 @@
|
||||
#pragma once
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include <base/sort.h>
|
||||
#include <base/TypeName.h>
|
||||
#include <Core/Field.h>
|
||||
#include <Core/DecimalFunctions.h>
|
||||
#include <Core/TypeId.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Columns/ColumnVectorHelper.h>
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Columns/IColumnImpl.h>
|
||||
#include <Core/Field.h>
|
||||
#include <Core/DecimalFunctions.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <base/sort.h>
|
||||
#include <Core/TypeId.h>
|
||||
#include <base/TypeName.h>
|
||||
|
||||
#include <cmath>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// PaddedPODArray extended by Decimal scale
|
||||
template <typename T>
|
||||
class DecimalPaddedPODArray : public PaddedPODArray<T>
|
||||
{
|
||||
public:
|
||||
using Base = PaddedPODArray<T>;
|
||||
using Base::operator[];
|
||||
|
||||
DecimalPaddedPODArray(size_t size, UInt32 scale_)
|
||||
: Base(size),
|
||||
scale(scale_)
|
||||
{}
|
||||
|
||||
DecimalPaddedPODArray(const DecimalPaddedPODArray & other)
|
||||
: Base(other.begin(), other.end()),
|
||||
scale(other.scale)
|
||||
{}
|
||||
|
||||
DecimalPaddedPODArray(DecimalPaddedPODArray && other)
|
||||
{
|
||||
this->swap(other);
|
||||
std::swap(scale, other.scale);
|
||||
}
|
||||
|
||||
DecimalPaddedPODArray & operator=(DecimalPaddedPODArray && other)
|
||||
{
|
||||
this->swap(other);
|
||||
std::swap(scale, other.scale);
|
||||
return *this;
|
||||
}
|
||||
|
||||
UInt32 getScale() const { return scale; }
|
||||
|
||||
private:
|
||||
UInt32 scale;
|
||||
};
|
||||
|
||||
/// Prevent implicit template instantiation of DecimalPaddedPODArray for common decimal types
|
||||
|
||||
extern template class DecimalPaddedPODArray<Decimal32>;
|
||||
extern template class DecimalPaddedPODArray<Decimal64>;
|
||||
extern template class DecimalPaddedPODArray<Decimal128>;
|
||||
extern template class DecimalPaddedPODArray<Decimal256>;
|
||||
extern template class DecimalPaddedPODArray<DateTime64>;
|
||||
|
||||
/// A ColumnVector for Decimals
|
||||
template <is_decimal T>
|
||||
class ColumnDecimal final : public COWHelper<ColumnVectorHelper, ColumnDecimal<T>>
|
||||
@ -72,16 +27,16 @@ private:
|
||||
public:
|
||||
using ValueType = T;
|
||||
using NativeT = typename T::NativeType;
|
||||
using Container = DecimalPaddedPODArray<T>;
|
||||
using Container = PaddedPODArray<T>;
|
||||
|
||||
private:
|
||||
ColumnDecimal(const size_t n, UInt32 scale_)
|
||||
: data(n, scale_),
|
||||
: data(n),
|
||||
scale(scale_)
|
||||
{}
|
||||
|
||||
ColumnDecimal(const ColumnDecimal & src)
|
||||
: data(src.data),
|
||||
: data(src.data.begin(), src.data.end()),
|
||||
scale(src.scale)
|
||||
{}
|
||||
|
||||
@ -195,7 +150,7 @@ public:
|
||||
const T & getElement(size_t n) const { return data[n]; }
|
||||
T & getElement(size_t n) { return data[n]; }
|
||||
|
||||
UInt32 getScale() const {return scale;}
|
||||
UInt32 getScale() const { return scale; }
|
||||
|
||||
protected:
|
||||
Container data;
|
||||
@ -206,17 +161,17 @@ protected:
|
||||
{
|
||||
size_t s = data.size();
|
||||
res.resize(s);
|
||||
for (U i = 0; i < s; ++i)
|
||||
res[i] = i;
|
||||
for (size_t i = 0; i < s; ++i)
|
||||
res[i] = static_cast<U>(i);
|
||||
|
||||
auto sort_end = res.end();
|
||||
if (limit && limit < s)
|
||||
sort_end = res.begin() + limit;
|
||||
|
||||
if (reverse)
|
||||
partial_sort(res.begin(), sort_end, res.end(), [this](size_t a, size_t b) { return data[a] > data[b]; });
|
||||
::partial_sort(res.begin(), sort_end, res.end(), [this](size_t a, size_t b) { return data[a] > data[b]; });
|
||||
else
|
||||
partial_sort(res.begin(), sort_end, res.end(), [this](size_t a, size_t b) { return data[a] < data[b]; });
|
||||
::partial_sort(res.begin(), sort_end, res.end(), [this](size_t a, size_t b) { return data[a] < data[b]; });
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -192,9 +192,9 @@ void ColumnFixedString::getPermutation(bool reverse, size_t limit, int /*nan_dir
|
||||
else
|
||||
{
|
||||
if (reverse)
|
||||
std::sort(res.begin(), res.end(), greater(*this));
|
||||
::sort(res.begin(), res.end(), greater(*this));
|
||||
else
|
||||
std::sort(res.begin(), res.end(), less(*this));
|
||||
::sort(res.begin(), res.end(), less(*this));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4,8 +4,6 @@
|
||||
#include <Processors/Transforms/ColumnGathererTransform.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <base/map.h>
|
||||
#include <base/range.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/WeakHash.h>
|
||||
@ -64,8 +62,9 @@ MutableColumnPtr ColumnMap::cloneResized(size_t new_size) const
|
||||
|
||||
Field ColumnMap::operator[](size_t n) const
|
||||
{
|
||||
auto array = DB::get<Array>((*nested)[n]);
|
||||
return Map(std::make_move_iterator(array.begin()), std::make_move_iterator(array.end()));
|
||||
Field res;
|
||||
get(n, res);
|
||||
return res;
|
||||
}
|
||||
|
||||
void ColumnMap::get(size_t n, Field & res) const
|
||||
@ -74,11 +73,12 @@ void ColumnMap::get(size_t n, Field & res) const
|
||||
size_t offset = offsets[n - 1];
|
||||
size_t size = offsets[n] - offsets[n - 1];
|
||||
|
||||
res = Map(size);
|
||||
res = Map();
|
||||
auto & map = DB::get<Map &>(res);
|
||||
map.reserve(size);
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
getNestedData().get(offset + i, map[i]);
|
||||
map.push_back(getNestedData()[offset + i]);
|
||||
}
|
||||
|
||||
bool ColumnMap::isDefaultAt(size_t n) const
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user