Merge master

This commit is contained in:
kssenii 2022-02-07 15:23:10 +01:00
commit 2e58733750
827 changed files with 16784 additions and 5507 deletions

View File

@ -1,8 +1,9 @@
self-hosted-runner:
labels:
- builder
- func-tester
- func-tester-aarch64
- fuzzer-unit-tester
- stress-tester
- style-checker
- func-tester-aarch64
- func-tester
- style-checker-aarch64

View File

@ -10,7 +10,7 @@ on: # yamllint disable-line rule:truthy
- 'backport/**'
jobs:
DockerHubPushAarch64:
runs-on: [self-hosted, func-tester-aarch64]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Clear repository
run: |

View File

@ -30,7 +30,7 @@ jobs:
python3 run_check.py
DockerHubPushAarch64:
needs: CheckLabels
runs-on: [self-hosted, func-tester-aarch64]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Clear repository
run: |

View File

@ -20,7 +20,7 @@ on: # yamllint disable-line rule:truthy
workflow_dispatch:
jobs:
DockerHubPushAarch64:
runs-on: [self-hosted, func-tester-aarch64]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Clear repository
run: |

View File

@ -9,8 +9,20 @@ on: # yamllint disable-line rule:truthy
branches:
- 'master'
jobs:
PythonUnitTests:
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Python unit tests
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 -m unittest discover -s . -p '*_test.py'
DockerHubPushAarch64:
runs-on: [self-hosted, func-tester-aarch64]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Clear repository
run: |
@ -44,7 +56,7 @@ jobs:
name: changed_images_amd64
path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json
DockerHubPush:
needs: [DockerHubPushAmd64, DockerHubPushAarch64]
needs: [DockerHubPushAmd64, DockerHubPushAarch64, PythonUnitTests]
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository
@ -74,6 +86,7 @@ jobs:
StyleCheck:
needs: DockerHubPush
runs-on: [self-hosted, style-checker]
if: ${{ success() || failure() }}
steps:
- name: Set envs
run: |
@ -81,6 +94,8 @@ jobs:
TEMP_PATH=${{ runner.temp }}/style_check
EOF
- name: Download changed images
# even if artifact does not exist, e.g. on `do not test` label or failed Docker job
continue-on-error: true
uses: actions/download-artifact@v2
with:
name: changed_images

View File

@ -31,9 +31,22 @@ jobs:
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 run_check.py
PythonUnitTests:
needs: CheckLabels
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Python unit tests
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 -m unittest discover -s . -p '*_test.py'
DockerHubPushAarch64:
needs: CheckLabels
runs-on: [self-hosted, func-tester-aarch64]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Clear repository
run: |
@ -68,7 +81,7 @@ jobs:
name: changed_images_amd64
path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json
DockerHubPush:
needs: [DockerHubPushAmd64, DockerHubPushAarch64]
needs: [DockerHubPushAmd64, DockerHubPushAarch64, PythonUnitTests]
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository
@ -98,6 +111,7 @@ jobs:
StyleCheck:
needs: DockerHubPush
runs-on: [self-hosted, style-checker]
if: ${{ success() || failure() }}
steps:
- name: Set envs
run: |
@ -105,6 +119,8 @@ jobs:
TEMP_PATH=${{ runner.temp }}/style_check
EOF
- name: Download changed images
# even if artifact does not exist, e.g. on `do not test` label or failed Docker job
continue-on-error: true
uses: actions/download-artifact@v2
with:
name: changed_images

View File

@ -22,7 +22,6 @@ jobs:
- name: Check out repository code
uses: actions/checkout@v2
- name: Download packages and push to Artifactory
env:
run: |
rm -rf "$TEMP_PATH" && mkdir -p "$REPO_COPY"
cp -r "$GITHUB_WORKSPACE" "$REPO_COPY"

View File

@ -13,7 +13,7 @@ on: # yamllint disable-line rule:truthy
jobs:
DockerHubPushAarch64:
runs-on: [self-hosted, func-tester-aarch64]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Clear repository
run: |

38
.github/workflows/tags_stable.yml vendored Normal file
View File

@ -0,0 +1,38 @@
name: TagsStableWorkflow
# - Gets artifacts from S3
# - Sends it to JFROG Artifactory
# - Adds them to the release assets
on: # yamllint disable-line rule:truthy
push:
tags:
- 'v*-stable'
- 'v*-lts'
jobs:
UpdateVersions:
runs-on: [self-hosted, style-checker]
steps:
- name: Get tag name
run: echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV"
- name: Check out repository code
uses: actions/checkout@v2
with:
ref: master
- name: Generate versions
run: |
git fetch --tags
./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv
- name: Create Pull Request
uses: peter-evans/create-pull-request@v3
with:
commit-message: Update version_date.tsv after ${{ env.GITHUB_TAG }}
branch: auto/${{ env.GITHUB_TAG }}
delete-branch: true
title: Update version_date.tsv after ${{ env.GITHUB_TAG }}
body: |
Update version_date.tsv after ${{ env.GITHUB_TAG }}
Changelog category (leave one):
- Not for changelog (changelog entry is not required)

6
.gitmodules vendored
View File

@ -217,6 +217,9 @@
[submodule "contrib/yaml-cpp"]
path = contrib/yaml-cpp
url = https://github.com/ClickHouse-Extras/yaml-cpp.git
[submodule "contrib/cld2"]
path = contrib/cld2
url = https://github.com/ClickHouse-Extras/cld2.git
[submodule "contrib/libstemmer_c"]
path = contrib/libstemmer_c
url = https://github.com/ClickHouse-Extras/libstemmer_c.git
@ -247,6 +250,9 @@
[submodule "contrib/sysroot"]
path = contrib/sysroot
url = https://github.com/ClickHouse-Extras/sysroot.git
[submodule "contrib/nlp-data"]
path = contrib/nlp-data
url = https://github.com/ClickHouse-Extras/nlp-data.git
[submodule "contrib/hive-metastore"]
path = contrib/hive-metastore
url = https://github.com/ClickHouse-Extras/hive-metastore

View File

@ -67,7 +67,7 @@ if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git" AND NOT EXISTS "${ClickHouse_SOURC
message (FATAL_ERROR "Submodules are not initialized. Run\n\tgit submodule update --init --recursive")
endif ()
include (cmake/find/ccache.cmake)
include (cmake/ccache.cmake)
# Take care to add prlimit in command line before ccache, or else ccache thinks that
# prlimit is compiler, and clang++ is its input file, and refuses to work with
@ -104,9 +104,8 @@ message (STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
string (TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC)
option(USE_STATIC_LIBRARIES "Disable to use shared libraries" ON)
option(MAKE_STATIC_LIBRARIES "Disable to make shared libraries" ${USE_STATIC_LIBRARIES})
if (NOT MAKE_STATIC_LIBRARIES)
if (NOT USE_STATIC_LIBRARIES)
# DEVELOPER ONLY.
# Faster linking if turned on.
option(SPLIT_SHARED_LIBRARIES "Keep all internal libraries as separate .so files")
@ -115,11 +114,11 @@ if (NOT MAKE_STATIC_LIBRARIES)
"Make several binaries (clickhouse-server, clickhouse-client etc.) instead of one bundled")
endif ()
if (MAKE_STATIC_LIBRARIES AND SPLIT_SHARED_LIBRARIES)
message(FATAL_ERROR "Defining SPLIT_SHARED_LIBRARIES=1 without MAKE_STATIC_LIBRARIES=0 has no effect.")
if (USE_STATIC_LIBRARIES AND SPLIT_SHARED_LIBRARIES)
message(FATAL_ERROR "Defining SPLIT_SHARED_LIBRARIES=1 without USE_STATIC_LIBRARIES=0 has no effect.")
endif()
if (NOT MAKE_STATIC_LIBRARIES AND SPLIT_SHARED_LIBRARIES)
if (NOT USE_STATIC_LIBRARIES AND SPLIT_SHARED_LIBRARIES)
set(BUILD_SHARED_LIBS 1 CACHE INTERNAL "")
endif ()
@ -183,7 +182,7 @@ if (COMPILER_CLANG)
if (HAS_USE_CTOR_HOMING)
# For more info see https://blog.llvm.org/posts/2021-04-05-constructor-homing-for-debug-info/
if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO")
if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xclang -fuse-ctor-homing")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Xclang -fuse-ctor-homing")
endif()
@ -201,21 +200,13 @@ endif ()
option(ENABLE_TESTS "Provide unit_test_dbms target with Google.Test unit tests" ON)
option(ENABLE_EXAMPLES "Build all example programs in 'examples' subdirectories" OFF)
if (OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64) AND MAKE_STATIC_LIBRARIES AND NOT SPLIT_SHARED_LIBRARIES AND NOT USE_MUSL)
if (OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64) AND USE_STATIC_LIBRARIES AND NOT SPLIT_SHARED_LIBRARIES AND NOT USE_MUSL)
# Only for Linux, x86_64 or aarch64.
option(GLIBC_COMPATIBILITY "Enable compatibility with older glibc libraries." ON)
elseif(GLIBC_COMPATIBILITY)
message (${RECONFIGURE_MESSAGE_LEVEL} "Glibc compatibility cannot be enabled in current configuration")
endif ()
if (GLIBC_COMPATIBILITY)
# NOTE: we may also want to check glibc version and add -include only for 2.32+
# however this is extra complexity, especially for cross compiling.
# And anyway it should not break anything for <2.32.
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -include ${CMAKE_CURRENT_SOURCE_DIR}/base/glibc-compatibility/glibc-compat-2.32.h")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -include ${CMAKE_CURRENT_SOURCE_DIR}/base/glibc-compatibility/glibc-compat-2.32.h")
endif()
# Make sure the final executable has symbols exported
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic")
@ -256,8 +247,6 @@ endif()
if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
set(USE_DEBUG_HELPERS ON)
else ()
set(USE_DEBUG_HELPERS ON)
endif()
option(USE_DEBUG_HELPERS "Enable debug helpers" ${USE_DEBUG_HELPERS})
@ -412,17 +401,6 @@ else ()
option(WERROR "Enable -Werror compiler option" ON)
endif ()
if (WERROR)
# Don't pollute CMAKE_CXX_FLAGS with -Werror as it will break some CMake checks.
# Instead, adopt modern cmake usage requirement.
target_compile_options(global-libs INTERFACE "-Werror")
endif ()
# Make this extra-checks for correct library dependencies.
if (OS_LINUX AND NOT SANITIZE)
target_link_options(global-libs INTERFACE "-Wl,--no-undefined")
endif ()
# Increase stack size on Musl. We need big stack for our recursive-descend parser.
if (USE_MUSL)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-z,stack-size=2097152")
@ -430,6 +408,7 @@ endif ()
include(cmake/dbms_glob_sources.cmake)
add_library(global-group INTERFACE)
if (OS_LINUX OR OS_ANDROID)
include(cmake/linux/default_libs.cmake)
elseif (OS_DARWIN)
@ -437,6 +416,18 @@ elseif (OS_DARWIN)
elseif (OS_FREEBSD)
include(cmake/freebsd/default_libs.cmake)
endif ()
link_libraries(global-group)
if (WERROR)
# Don't pollute CMAKE_CXX_FLAGS with -Werror as it will break some CMake checks.
# Instead, adopt modern cmake usage requirement.
target_compile_options(global-group INTERFACE "-Werror")
endif ()
# Make this extra-checks for correct library dependencies.
if (OS_LINUX AND NOT SANITIZE)
target_link_options(global-group INTERFACE "-Wl,--no-undefined")
endif ()
######################################
### Add targets below this comment ###
@ -444,7 +435,7 @@ endif ()
set (CMAKE_POSTFIX_VARIABLE "CMAKE_${CMAKE_BUILD_TYPE_UC}_POSTFIX")
if (MAKE_STATIC_LIBRARIES)
if (USE_STATIC_LIBRARIES)
set (CMAKE_POSITION_INDEPENDENT_CODE OFF)
if (OS_LINUX AND NOT ARCH_ARM)
# Slightly more efficient code can be generated
@ -480,7 +471,6 @@ endif ()
message (STATUS
"Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE_LIBRARY_ARCHITECTURE} ;
USE_STATIC_LIBRARIES=${USE_STATIC_LIBRARIES}
MAKE_STATIC_LIBRARIES=${MAKE_STATIC_LIBRARIES}
SPLIT_SHARED=${SPLIT_SHARED_LIBRARIES}
CCACHE=${CCACHE_FOUND} ${CCACHE_VERSION}")
@ -528,7 +518,7 @@ macro (add_executable target)
# - _je_zone_register due to JEMALLOC_PRIVATE_NAMESPACE=je_ under OS X.
# - but jemalloc-cmake does not run private_namespace.sh
# so symbol name should be _zone_register
if (ENABLE_JEMALLOC AND MAKE_STATIC_LIBRARIES AND OS_DARWIN)
if (ENABLE_JEMALLOC AND USE_STATIC_LIBRARIES AND OS_DARWIN)
set_property(TARGET ${target} APPEND PROPERTY LINK_OPTIONS -u_zone_register)
endif()
endif()

View File

@ -1,4 +1,4 @@
Copyright 2016-2021 ClickHouse, Inc.
Copyright 2016-2022 ClickHouse, Inc.
Apache License
Version 2.0, January 2004
@ -188,7 +188,7 @@ Copyright 2016-2021 ClickHouse, Inc.
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2016-2021 ClickHouse, Inc.
Copyright 2016-2022 ClickHouse, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.

View File

@ -22,9 +22,10 @@ The following versions of ClickHouse server are currently being supported with s
| 21.7 | :x: |
| 21.8 | ✅ |
| 21.9 | :x: |
| 21.10 | |
| 21.10 | :x: |
| 21.11 | ✅ |
| 21.12 | ✅ |
| 22.1 | ✅ |
## Reporting a Vulnerability

View File

@ -42,7 +42,7 @@ endif ()
target_include_directories(common PUBLIC .. "${CMAKE_CURRENT_BINARY_DIR}/..")
if (OS_DARWIN AND NOT MAKE_STATIC_LIBRARIES)
if (OS_DARWIN AND NOT USE_STATIC_LIBRARIES)
target_link_libraries(common PUBLIC -Wl,-U,_inside_main)
endif()

View File

@ -2,7 +2,9 @@
#include <iostream>
#include <string_view>
#include <algorithm>
#include <cassert>
#include <string.h>
#include <unistd.h>
#include <sys/select.h>
@ -34,13 +36,37 @@ bool hasInputData()
return select(1, &fds, nullptr, nullptr, &timeout) == 1;
}
struct NoCaseCompare
{
bool operator()(const std::string & str1, const std::string & str2)
{
return std::lexicographical_compare(begin(str1), end(str1), begin(str2), end(str2), [](const char c1, const char c2)
{
return std::tolower(c1) < std::tolower(c2);
});
}
};
using Words = std::vector<std::string>;
template <class Compare>
void addNewWords(Words & to, const Words & from, Compare comp)
{
size_t old_size = to.size();
size_t new_size = old_size + from.size();
to.reserve(new_size);
to.insert(to.end(), from.begin(), from.end());
auto middle = to.begin() + old_size;
std::inplace_merge(to.begin(), middle, to.end(), comp);
auto last_unique = std::unique(to.begin(), to.end());
to.erase(last_unique, to.end());
}
std::optional<LineReader::Suggest::WordsRange> LineReader::Suggest::getCompletions(const String & prefix, size_t prefix_length) const
{
if (!ready)
return std::nullopt;
}
replxx::Replxx::completions_t LineReader::Suggest::getCompletions(const String & prefix, size_t prefix_length)
{
std::string_view last_word;
auto last_word_pos = prefix.find_last_of(word_break_characters);
@ -48,21 +74,45 @@ std::optional<LineReader::Suggest::WordsRange> LineReader::Suggest::getCompletio
last_word = prefix;
else
last_word = std::string_view(prefix).substr(last_word_pos + 1, std::string::npos);
/// last_word can be empty.
std::pair<Words::const_iterator, Words::const_iterator> range;
std::lock_guard lock(mutex);
/// Only perform case sensitive completion when the prefix string contains any uppercase characters
if (std::none_of(prefix.begin(), prefix.end(), [&](auto c) { return c >= 'A' && c <= 'Z'; }))
return std::equal_range(
if (std::none_of(prefix.begin(), prefix.end(), [](char32_t x) { return iswupper(static_cast<wint_t>(x)); }))
range = std::equal_range(
words_no_case.begin(), words_no_case.end(), last_word, [prefix_length](std::string_view s, std::string_view prefix_searched)
{
return strncasecmp(s.data(), prefix_searched.data(), prefix_length) < 0;
});
else
return std::equal_range(words.begin(), words.end(), last_word, [prefix_length](std::string_view s, std::string_view prefix_searched)
range = std::equal_range(words.begin(), words.end(), last_word, [prefix_length](std::string_view s, std::string_view prefix_searched)
{
return strncmp(s.data(), prefix_searched.data(), prefix_length) < 0;
});
return replxx::Replxx::completions_t(range.first, range.second);
}
void LineReader::Suggest::addWords(Words && new_words)
{
Words new_words_no_case = new_words;
if (!new_words.empty())
{
std::sort(new_words.begin(), new_words.end());
std::sort(new_words_no_case.begin(), new_words_no_case.end(), NoCaseCompare{});
}
{
std::lock_guard lock(mutex);
addNewWords(words, new_words, std::less<std::string>{});
addNewWords(words_no_case, new_words_no_case, NoCaseCompare{});
}
assert(std::is_sorted(words.begin(), words.end()));
assert(std::is_sorted(words_no_case.begin(), words_no_case.end(), NoCaseCompare{}));
}
LineReader::LineReader(const String & history_file_path_, bool multiline_, Patterns extenders_, Patterns delimiters_)

View File

@ -1,10 +1,12 @@
#pragma once
#include <base/types.h>
#include <mutex>
#include <atomic>
#include <vector>
#include <optional>
#include <replxx.hxx>
#include <base/types.h>
class LineReader
{
@ -12,14 +14,16 @@ public:
struct Suggest
{
using Words = std::vector<std::string>;
using WordsRange = std::pair<Words::const_iterator, Words::const_iterator>;
/// Get vector for the matched range of words if any.
replxx::Replxx::completions_t getCompletions(const String & prefix, size_t prefix_length);
void addWords(Words && new_words);
private:
Words words;
Words words_no_case;
std::atomic<bool> ready{false};
/// Get iterators for the matched range of words if any.
std::optional<WordsRange> getCompletions(const String & prefix, size_t prefix_length) const;
std::mutex mutex;
};
using Patterns = std::vector<const char *>;

View File

@ -25,13 +25,6 @@ void trim(String & s)
s.erase(std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !std::isspace(ch); }).base(), s.end());
}
/// Check if string ends with given character after skipping whitespaces.
bool ends_with(const std::string_view & s, const std::string_view & p)
{
auto ss = std::string_view(s.data(), s.rend() - std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !std::isspace(ch); }));
return ss.ends_with(p);
}
std::string getEditor()
{
const char * editor = std::getenv("EDITOR");
@ -132,8 +125,14 @@ void convertHistoryFile(const std::string & path, replxx::Replxx & rx)
}
static bool replxx_last_is_delimiter = false;
void ReplxxLineReader::setLastIsDelimiter(bool flag)
{
replxx_last_is_delimiter = flag;
}
ReplxxLineReader::ReplxxLineReader(
const Suggest & suggest,
Suggest & suggest,
const String & history_file_path_,
bool multiline_,
Patterns extenders_,
@ -179,14 +178,13 @@ ReplxxLineReader::ReplxxLineReader(
auto callback = [&suggest] (const String & context, size_t context_size)
{
if (auto range = suggest.getCompletions(context, context_size))
return Replxx::completions_t(range->first, range->second);
return Replxx::completions_t();
return suggest.getCompletions(context, context_size);
};
rx.set_completion_callback(callback);
rx.set_complete_on_empty(false);
rx.set_word_break_characters(word_break_characters);
rx.set_ignore_case(true);
if (highlighter)
rx.set_highlighter_callback(highlighter);
@ -198,21 +196,11 @@ ReplxxLineReader::ReplxxLineReader(
auto commit_action = [this](char32_t code)
{
std::string_view str = rx.get_state().text();
/// Always commit line when we see extender at the end. It will start a new prompt.
for (const auto * extender : extenders)
if (ends_with(str, extender))
return rx.invoke(Replxx::ACTION::COMMIT_LINE, code);
/// If we see an delimiter at the end, commit right away.
for (const auto * delimiter : delimiters)
if (ends_with(str, delimiter))
return rx.invoke(Replxx::ACTION::COMMIT_LINE, code);
/// If we allow multiline and there is already something in the input, start a newline.
if (multiline && !input.empty())
/// NOTE: Lexer is only available if we use highlighter.
if (highlighter && multiline && !replxx_last_is_delimiter)
return rx.invoke(Replxx::ACTION::NEW_LINE, code);
replxx_last_is_delimiter = false;
return rx.invoke(Replxx::ACTION::COMMIT_LINE, code);
};
/// bind C-j to ENTER action.

View File

@ -9,7 +9,7 @@ class ReplxxLineReader : public LineReader
{
public:
ReplxxLineReader(
const Suggest & suggest,
Suggest & suggest,
const String & history_file_path,
bool multiline,
Patterns extenders_,
@ -19,6 +19,9 @@ public:
void enableBracketedPaste() override;
/// If highlight is on, we will set a flag to denote whether the last token is a delimiter.
/// This is useful to determine the behavior of <ENTER> key when multiline is enabled.
static void setLastIsDelimiter(bool flag);
private:
InputStatus readOneLine(const String & prompt) override;
void addToHistory(const String & line) override;

View File

@ -12,6 +12,8 @@ namespace
{
template <typename... Ts> constexpr size_t numArgs(Ts &&...) { return sizeof...(Ts); }
template <typename T, typename... Ts> constexpr auto firstArg(T && x, Ts &&...) { return std::forward<T>(x); }
/// For implicit conversion of fmt::basic_runtime<> to char* for std::string ctor
template <typename T, typename... Ts> constexpr auto firstArg(fmt::basic_runtime<T> && data, Ts &&...) { return data.str.data(); }
}

View File

@ -1,26 +1,42 @@
#pragma once
#include <pdqsort.h>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wold-style-cast"
#include <miniselect/floyd_rivest_select.h>
template <class RandomIt>
template <typename RandomIt>
void nth_element(RandomIt first, RandomIt nth, RandomIt last)
{
::miniselect::floyd_rivest_select(first, nth, last);
}
template <class RandomIt>
template <typename RandomIt>
void partial_sort(RandomIt first, RandomIt middle, RandomIt last)
{
::miniselect::floyd_rivest_partial_sort(first, middle, last);
}
template <class RandomIt, class Compare>
template <typename RandomIt, typename Compare>
void partial_sort(RandomIt first, RandomIt middle, RandomIt last, Compare compare)
{
::miniselect::floyd_rivest_partial_sort(first, middle, last, compare);
}
#pragma GCC diagnostic pop
template <typename RandomIt, typename Compare>
void sort(RandomIt first, RandomIt last, Compare compare)
{
::pdqsort(first, last, compare);
}
template <typename RandomIt>
void sort(RandomIt first, RandomIt last)
{
using value_type = typename std::iterator_traits<RandomIt>::value_type;
using comparator = std::less<value_type>;
::pdqsort(first, last, comparator());
}

View File

@ -79,18 +79,14 @@ static void call_default_signal_handler(int sig)
raise(sig);
}
static constexpr size_t max_query_id_size = 127;
static const size_t signal_pipe_buf_size =
sizeof(int)
+ sizeof(siginfo_t)
+ sizeof(ucontext_t)
+ sizeof(ucontext_t*)
+ sizeof(StackTrace)
+ sizeof(UInt32)
+ max_query_id_size + 1 /// query_id + varint encoded length
+ sizeof(void*);
using signal_function = void(int, siginfo_t*, void*);
static void writeSignalIDtoSignalPipe(int sig)
@ -129,18 +125,14 @@ static void signalHandler(int sig, siginfo_t * info, void * context)
char buf[signal_pipe_buf_size];
DB::WriteBufferFromFileDescriptorDiscardOnFailure out(signal_pipe.fds_rw[1], signal_pipe_buf_size, buf);
const ucontext_t signal_context = *reinterpret_cast<ucontext_t *>(context);
const StackTrace stack_trace(signal_context);
StringRef query_id = DB::CurrentThread::getQueryId(); /// This is signal safe.
query_id.size = std::min(query_id.size, max_query_id_size);
const ucontext_t * signal_context = reinterpret_cast<ucontext_t *>(context);
const StackTrace stack_trace(*signal_context);
DB::writeBinary(sig, out);
DB::writePODBinary(*info, out);
DB::writePODBinary(signal_context, out);
DB::writePODBinary(stack_trace, out);
DB::writeBinary(UInt32(getThreadId()), out);
DB::writeStringBinary(query_id, out);
DB::writePODBinary(DB::current_thread, out);
out.next();
@ -184,6 +176,8 @@ public:
void run() override
{
static_assert(PIPE_BUF >= 512);
static_assert(signal_pipe_buf_size <= PIPE_BUF, "Only write of PIPE_BUF to pipe is atomic and the minimal known PIPE_BUF across supported platforms is 512");
char buf[signal_pipe_buf_size];
DB::ReadBufferFromFileDescriptor in(signal_pipe.fds_rw[0], signal_pipe_buf_size, buf);
@ -227,10 +221,9 @@ public:
else
{
siginfo_t info{};
ucontext_t context{};
ucontext_t * context{};
StackTrace stack_trace(NoCapture{});
UInt32 thread_num{};
std::string query_id;
DB::ThreadStatus * thread_ptr{};
if (sig != SanitizerTrap)
@ -241,12 +234,11 @@ public:
DB::readPODBinary(stack_trace, in);
DB::readBinary(thread_num, in);
DB::readBinary(query_id, in);
DB::readPODBinary(thread_ptr, in);
/// This allows to receive more signals if failure happens inside onFault function.
/// Example: segfault while symbolizing stack trace.
std::thread([=, this] { onFault(sig, info, context, stack_trace, thread_num, query_id, thread_ptr); }).detach();
std::thread([=, this] { onFault(sig, info, context, stack_trace, thread_num, thread_ptr); }).detach();
}
}
}
@ -279,18 +271,27 @@ private:
void onFault(
int sig,
const siginfo_t & info,
const ucontext_t & context,
ucontext_t * context,
const StackTrace & stack_trace,
UInt32 thread_num,
const std::string & query_id,
DB::ThreadStatus * thread_ptr) const
{
DB::ThreadStatus thread_status;
String query_id;
String query;
/// Send logs from this thread to client if possible.
/// It will allow client to see failure messages directly.
if (thread_ptr)
{
query_id = thread_ptr->getQueryId().toString();
if (auto thread_group = thread_ptr->getThreadGroup())
{
query = thread_group->query;
}
if (auto logs_queue = thread_ptr->getInternalTextLogsQueue())
DB::CurrentThread::attachInternalTextLogsQueue(logs_queue, DB::LogsLevel::trace);
}
@ -305,19 +306,19 @@ private:
}
else
{
LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (query_id: {}) Received signal {} ({})",
LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (query_id: {}) (query: {}) Received signal {} ({})",
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info,
thread_num, query_id, strsignal(sig), sig);
thread_num, query_id, query, strsignal(sig), sig);
}
String error_message;
if (sig != SanitizerTrap)
error_message = signalToErrorMessage(sig, info, context);
error_message = signalToErrorMessage(sig, info, *context);
else
error_message = "Sanitizer trap.";
LOG_FATAL(log, error_message);
LOG_FATAL(log, fmt::runtime(error_message));
if (stack_trace.getSize())
{
@ -330,11 +331,11 @@ private:
for (size_t i = stack_trace.getOffset(); i < stack_trace.getSize(); ++i)
bare_stacktrace << ' ' << stack_trace.getFramePointers()[i];
LOG_FATAL(log, bare_stacktrace.str());
LOG_FATAL(log, fmt::runtime(bare_stacktrace.str()));
}
/// Write symbolized stack trace line by line for better grep-ability.
stack_trace.toStringEveryLine([&](const std::string & s) { LOG_FATAL(log, s); });
stack_trace.toStringEveryLine([&](const std::string & s) { LOG_FATAL(log, fmt::runtime(s)); });
#if defined(OS_LINUX)
/// Write information about binary checksum. It can be difficult to calculate, so do it only after printing stack trace.
@ -389,20 +390,16 @@ static void sanitizerDeathCallback()
const StackTrace stack_trace;
StringRef query_id = DB::CurrentThread::getQueryId();
query_id.size = std::min(query_id.size, max_query_id_size);
int sig = SignalListener::SanitizerTrap;
DB::writeBinary(sig, out);
DB::writePODBinary(stack_trace, out);
DB::writeBinary(UInt32(getThreadId()), out);
DB::writeStringBinary(query_id, out);
DB::writePODBinary(DB::current_thread, out);
out.next();
/// The time that is usually enough for separate thread to print info into log.
sleepForSeconds(10);
sleepForSeconds(20);
}
#endif

View File

@ -6,7 +6,7 @@ add_library (daemon
target_include_directories (daemon PUBLIC ..)
if (OS_DARWIN AND NOT MAKE_STATIC_LIBRARIES)
if (OS_DARWIN AND NOT USE_STATIC_LIBRARIES)
target_link_libraries (daemon PUBLIC -Wl,-undefined,dynamic_lookup)
endif()

View File

@ -37,7 +37,7 @@ if (GLIBC_COMPATIBILITY)
target_include_directories(glibc-compatibility PRIVATE libcxxabi ${musl_arch_include_dir})
if (NOT USE_STATIC_LIBRARIES AND NOT MAKE_STATIC_LIBRARIES)
if (NOT USE_STATIC_LIBRARIES AND NOT USE_STATIC_LIBRARIES)
target_compile_options(glibc-compatibility PRIVATE -fPIC)
endif ()

View File

@ -1,50 +0,0 @@
/// In glibc 2.32 new version of some symbols had been added [1]:
///
/// $ nm -D clickhouse | fgrep -e @GLIBC_2.32
/// U pthread_getattr_np@GLIBC_2.32
/// U pthread_sigmask@GLIBC_2.32
///
/// [1]: https://www.spinics.net/lists/fedora-devel/msg273044.html
///
/// Right now ubuntu 20.04 is used as official image for building
/// ClickHouse, however once it will be switched someone may not be happy
/// with that fact that he/she cannot use official binaries anymore because
/// they have glibc < 2.32.
///
/// To avoid this dependency, let's force previous version of those
/// symbols from glibc.
///
/// Also note, that the following approach had been tested:
/// a) -Wl,--wrap -- but it goes into endless recursion whey you try to do
/// something like this:
///
/// int __pthread_getattr_np_compact(pthread_t thread, pthread_attr_t *attr);
/// GLIBC_COMPAT_SYMBOL(__pthread_getattr_np_compact, pthread_getattr_np)
/// int __pthread_getattr_np_compact(pthread_t thread, pthread_attr_t *attr);
/// int __wrap_pthread_getattr_np(pthread_t thread, pthread_attr_t *attr)
/// {
/// return __pthread_getattr_np_compact(thread, attr);
/// }
///
/// int __pthread_sigmask_compact(int how, const sigset_t *set, sigset_t *oldset);
/// GLIBC_COMPAT_SYMBOL(__pthread_sigmask_compact, pthread_sigmask)
/// int __pthread_sigmask_compact(int how, const sigset_t *set, sigset_t *oldset);
/// int __wrap_pthread_sigmask(int how, const sigset_t *set, sigset_t *oldset)
/// {
/// return __pthread_sigmask_compact(how, set, oldset);
/// }
///
/// b) -Wl,--defsym -- same problems (and you cannot use version of symbol with
/// version in the expression)
/// c) this approach -- simply add this file with -include directive.
#if defined(__amd64__)
#define GLIBC_COMPAT_SYMBOL(func) __asm__(".symver " #func "," #func "@GLIBC_2.2.5");
#elif defined(__aarch64__)
#define GLIBC_COMPAT_SYMBOL(func) __asm__(".symver " #func "," #func "@GLIBC_2.17");
#else
#error Your platform is not supported.
#endif
GLIBC_COMPAT_SYMBOL(pthread_sigmask)
GLIBC_COMPAT_SYMBOL(pthread_getattr_np)

View File

@ -22,16 +22,12 @@ set(CMAKE_OSX_DEPLOYMENT_TARGET 10.15)
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
include (cmake/find/cxx.cmake)
add_library(global-group INTERFACE)
include (cmake/cxx.cmake)
target_link_libraries(global-group INTERFACE
$<TARGET_PROPERTY:global-libs,INTERFACE_LINK_LIBRARIES>
)
link_libraries(global-group)
# FIXME: remove when all contribs will get custom cmake lists
install(
TARGETS global-group global-libs

View File

@ -22,17 +22,13 @@ set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS})
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
include (cmake/find/unwind.cmake)
include (cmake/find/cxx.cmake)
add_library(global-group INTERFACE)
include (cmake/unwind.cmake)
include (cmake/cxx.cmake)
target_link_libraries(global-group INTERFACE
$<TARGET_PROPERTY:global-libs,INTERFACE_LINK_LIBRARIES>
)
link_libraries(global-group)
# FIXME: remove when all contribs will get custom cmake lists
install(
TARGETS global-group global-libs

View File

@ -42,18 +42,15 @@ if (NOT OS_ANDROID)
add_subdirectory(base/harmful)
endif ()
include (cmake/find/unwind.cmake)
include (cmake/find/cxx.cmake)
include (cmake/unwind.cmake)
include (cmake/cxx.cmake)
add_library(global-group INTERFACE)
target_link_libraries(global-group INTERFACE
-Wl,--start-group
$<TARGET_PROPERTY:global-libs,INTERFACE_LINK_LIBRARIES>
-Wl,--end-group
)
link_libraries(global-group)
# FIXME: remove when all contribs will get custom cmake lists
install(
TARGETS global-group global-libs

View File

@ -23,7 +23,7 @@ if (SANITIZE)
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${ASAN_FLAGS}")
endif()
if (MAKE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if (USE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libasan")
endif ()
if (COMPILER_GCC)
@ -48,7 +48,7 @@ if (SANITIZE)
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=memory")
endif()
if (MAKE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if (USE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libmsan")
endif ()
@ -69,7 +69,7 @@ if (SANITIZE)
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=thread")
endif()
if (MAKE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if (USE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libtsan")
endif ()
if (COMPILER_GCC)
@ -101,7 +101,7 @@ if (SANITIZE)
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined")
endif()
if (MAKE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if (USE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libubsan")
endif ()
if (COMPILER_GCC)

View File

@ -140,6 +140,8 @@ if (ENABLE_NLP)
add_contrib (libstemmer-c-cmake libstemmer_c)
add_contrib (wordnet-blast-cmake wordnet-blast)
add_contrib (lemmagen-c-cmake lemmagen-c)
add_contrib (nlp-data-cmake nlp-data)
add_contrib (cld2-cmake cld2)
endif()
add_contrib (sqlite-cmake sqlite-amalgamation)

2
contrib/NuRaft vendored

@ -1 +1 @@
Subproject commit c2043aa250e53ad5cf75e596e319d587af4dcb3c
Subproject commit 1707a7572aa66ec5d0a2dbe2bf5effa3352e6b2d

View File

@ -29,12 +29,6 @@ if (OS_FREEBSD)
message (FATAL_ERROR "Using internal parquet library on FreeBSD is not supported")
endif()
if(MAKE_STATIC_LIBRARIES)
set(FLATBUFFERS_LIBRARY flatbuffers)
else()
set(FLATBUFFERS_LIBRARY flatbuffers_shared)
endif()
set (CMAKE_CXX_STANDARD 17)
set(ARROW_VERSION "6.0.1")
@ -84,7 +78,7 @@ set(FLATBUFFERS_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/flatbuffers")
set(FLATBUFFERS_INCLUDE_DIR "${FLATBUFFERS_SRC_DIR}/include")
# set flatbuffers CMake options
if (MAKE_STATIC_LIBRARIES)
if (USE_STATIC_LIBRARIES)
set(FLATBUFFERS_BUILD_FLATLIB ON CACHE BOOL "Enable the build of the flatbuffers library")
set(FLATBUFFERS_BUILD_SHAREDLIB OFF CACHE BOOL "Disable the build of the flatbuffers shared library")
else ()
@ -95,9 +89,16 @@ set(FLATBUFFERS_BUILD_TESTS OFF CACHE BOOL "Skip flatbuffers tests")
add_subdirectory(${FLATBUFFERS_SRC_DIR} "${FLATBUFFERS_BINARY_DIR}")
message(STATUS "FLATBUFFERS_LIBRARY: ${FLATBUFFERS_LIBRARY}")
add_library(_flatbuffers INTERFACE)
if(USE_STATIC_LIBRARIES)
target_link_libraries(_flatbuffers INTERFACE flatbuffers)
else()
target_link_libraries(_flatbuffers INTERFACE flatbuffers_shared)
endif()
target_include_directories(_flatbuffers INTERFACE ${FLATBUFFERS_INCLUDE_DIR})
# === hdfs
# NOTE: cannot use ch_contrib::hdfs since it's INCLUDE_DIRECTORIES does not includes trailing "hdfs/"
set(HDFS_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libhdfs3/include/hdfs/")
# arrow-cmake cmake file calling orc cmake subroutine which detects certain compiler features.
@ -123,8 +124,6 @@ configure_file("${ORC_SOURCE_SRC_DIR}/Adaptor.hh.in" "${ORC_BUILD_INCLUDE_DIR}/A
# ARROW_ORC + adapters/orc/CMakefiles
set(ORC_SRCS
"${ARROW_SRC_DIR}/arrow/adapters/orc/adapter.cc"
"${ARROW_SRC_DIR}/arrow/adapters/orc/adapter_util.cc"
"${ORC_SOURCE_SRC_DIR}/Exceptions.cc"
"${ORC_SOURCE_SRC_DIR}/OrcFile.cc"
"${ORC_SOURCE_SRC_DIR}/Reader.cc"
@ -151,6 +150,22 @@ set(ORC_SRCS
"${ORC_ADDITION_SOURCE_DIR}/orc_proto.pb.cc"
)
add_library(_orc ${ORC_SRCS})
target_link_libraries(_orc PRIVATE
ch_contrib::protobuf
ch_contrib::lz4
ch_contrib::snappy
ch_contrib::zlib
ch_contrib::zstd)
target_include_directories(_orc SYSTEM BEFORE PUBLIC ${ORC_INCLUDE_DIR})
target_include_directories(_orc SYSTEM BEFORE PUBLIC ${ORC_BUILD_INCLUDE_DIR})
target_include_directories(_orc SYSTEM PRIVATE
${ORC_SOURCE_SRC_DIR}
${ORC_SOURCE_WRAP_DIR}
${ORC_BUILD_SRC_DIR}
${ORC_ADDITION_SOURCE_DIR}
${ARROW_SRC_DIR})
# === arrow
@ -336,7 +351,8 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/ipc/reader.cc"
"${LIBRARY_DIR}/ipc/writer.cc"
${ORC_SRCS}
"${ARROW_SRC_DIR}/arrow/adapters/orc/adapter.cc"
"${ARROW_SRC_DIR}/arrow/adapters/orc/adapter_util.cc"
)
add_definitions(-DARROW_WITH_LZ4)
@ -356,30 +372,27 @@ endif ()
add_library(_arrow ${ARROW_SRCS})
# Arrow dependencies
add_dependencies(_arrow ${FLATBUFFERS_LIBRARY})
target_link_libraries(_arrow PRIVATE
boost::filesystem
target_link_libraries(_arrow PRIVATE ${FLATBUFFERS_LIBRARY} boost::filesystem)
_flatbuffers
ch_contrib::double_conversion
ch_contrib::lz4
ch_contrib::snappy
ch_contrib::zlib
ch_contrib::zstd
ch_contrib::zstd
)
target_link_libraries(_arrow PUBLIC _orc)
add_dependencies(_arrow protoc)
target_include_directories(_arrow SYSTEM BEFORE PUBLIC ${ARROW_SRC_DIR})
target_include_directories(_arrow SYSTEM BEFORE PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/cpp/src")
target_link_libraries(_arrow PRIVATE ch_contrib::double_conversion)
target_link_libraries(_arrow PRIVATE ch_contrib::protobuf)
target_link_libraries(_arrow PRIVATE ch_contrib::lz4)
target_link_libraries(_arrow PRIVATE ch_contrib::snappy)
target_link_libraries(_arrow PRIVATE ch_contrib::zlib)
target_link_libraries(_arrow PRIVATE ch_contrib::zstd)
target_include_directories(_arrow SYSTEM BEFORE PUBLIC ${ORC_INCLUDE_DIR})
target_include_directories(_arrow SYSTEM BEFORE PUBLIC ${ORC_BUILD_INCLUDE_DIR})
target_include_directories(_arrow SYSTEM PRIVATE ${ORC_SOURCE_SRC_DIR})
target_include_directories(_arrow SYSTEM PRIVATE ${ORC_SOURCE_WRAP_DIR})
target_include_directories(_arrow SYSTEM PRIVATE ${ORC_BUILD_SRC_DIR})
target_include_directories(_arrow SYSTEM PRIVATE ${ORC_ADDITION_SOURCE_DIR})
target_include_directories(_arrow SYSTEM PRIVATE ${ARROW_SRC_DIR})
target_include_directories(_arrow SYSTEM PRIVATE ${FLATBUFFERS_INCLUDE_DIR})
target_include_directories(_arrow SYSTEM PRIVATE ${HDFS_INCLUDE_DIR})
# === parquet

1
contrib/cld2 vendored Submodule

@ -0,0 +1 @@
Subproject commit bc6d493a2f64ed1fc1c4c4b4294a542a04e04217

View File

@ -0,0 +1,33 @@
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/cld2")
set (SRCS
"${LIBRARY_DIR}/internal/cldutil.cc"
"${LIBRARY_DIR}/internal/compact_lang_det.cc"
"${LIBRARY_DIR}/internal/cldutil_shared.cc"
"${LIBRARY_DIR}/internal/compact_lang_det_hint_code.cc"
"${LIBRARY_DIR}/internal/compact_lang_det_impl.cc"
"${LIBRARY_DIR}/internal/debug.cc"
"${LIBRARY_DIR}/internal/fixunicodevalue.cc"
"${LIBRARY_DIR}/internal/generated_entities.cc"
"${LIBRARY_DIR}/internal/generated_language.cc"
"${LIBRARY_DIR}/internal/generated_ulscript.cc"
"${LIBRARY_DIR}/internal/getonescriptspan.cc"
"${LIBRARY_DIR}/internal/lang_script.cc"
"${LIBRARY_DIR}/internal/offsetmap.cc"
"${LIBRARY_DIR}/internal/scoreonescriptspan.cc"
"${LIBRARY_DIR}/internal/tote.cc"
"${LIBRARY_DIR}/internal/utf8statetable.cc"
"${LIBRARY_DIR}/internal/cld_generated_cjk_uni_prop_80.cc"
"${LIBRARY_DIR}/internal/cld2_generated_cjk_compatible.cc"
"${LIBRARY_DIR}/internal/cld_generated_cjk_delta_bi_4.cc"
"${LIBRARY_DIR}/internal/generated_distinct_bi_0.cc"
"${LIBRARY_DIR}/internal/cld2_generated_quadchrome_2.cc"
"${LIBRARY_DIR}/internal/cld2_generated_deltaoctachrome.cc"
"${LIBRARY_DIR}/internal/cld2_generated_distinctoctachrome.cc"
"${LIBRARY_DIR}/internal/cld_generated_score_quad_octa_2.cc"
)
add_library(_cld2 ${SRCS})
set_property(TARGET _cld2 PROPERTY POSITION_INDEPENDENT_CODE ON)
target_compile_options (_cld2 PRIVATE -Wno-reserved-id-macro -Wno-c++11-narrowing)
target_include_directories(_cld2 SYSTEM BEFORE PUBLIC "${LIBRARY_DIR}/public")
add_library(ch_contrib::cld2 ALIAS _cld2)

2
contrib/fmtlib vendored

@ -1 +1 @@
Subproject commit c108ee1d590089ccf642fc85652b845924067af2
Subproject commit b6f4ceaed0a0a24ccf575fab6c56dd50ccf6f1a9

View File

@ -1,7 +1,10 @@
set (SRCS
# NOTE: do not build module for now:
# ../fmtlib/src/fmt.cc
../fmtlib/src/format.cc
../fmtlib/src/os.cc
../fmtlib/include/fmt/args.h
../fmtlib/include/fmt/chrono.h
../fmtlib/include/fmt/color.h
../fmtlib/include/fmt/compile.h
@ -11,9 +14,9 @@ set (SRCS
../fmtlib/include/fmt/locale.h
../fmtlib/include/fmt/os.h
../fmtlib/include/fmt/ostream.h
../fmtlib/include/fmt/posix.h
../fmtlib/include/fmt/printf.h
../fmtlib/include/fmt/ranges.h
../fmtlib/include/fmt/xchar.h
)
add_library(_fmt ${SRCS})

View File

@ -46,7 +46,7 @@ set(_gRPC_SSL_LIBRARIES OpenSSL::Crypto OpenSSL::SSL)
set(gRPC_ABSL_PROVIDER "clickhouse" CACHE STRING "" FORCE)
# Choose to build static or shared library for c-ares.
if (MAKE_STATIC_LIBRARIES)
if (USE_STATIC_LIBRARIES)
set(CARES_STATIC ON CACHE BOOL "" FORCE)
set(CARES_SHARED OFF CACHE BOOL "" FORCE)
else ()

View File

@ -133,7 +133,7 @@ add_library(ch_contrib::uv ALIAS _uv)
target_compile_definitions(_uv PRIVATE ${uv_defines})
target_include_directories(_uv SYSTEM PUBLIC ${SOURCE_DIR}/include PRIVATE ${SOURCE_DIR}/src)
target_link_libraries(_uv ${uv_libraries})
if (NOT MAKE_STATIC_LIBRARIES)
if (NOT USE_STATIC_LIBRARIES)
target_compile_definitions(_uv
INTERFACE USING_UV_SHARED=1
PRIVATE BUILDING_UV_SHARED=1)

2
contrib/lz4 vendored

@ -1 +1 @@
Subproject commit f39b79fb02962a1cd880bbdecb6dffba4f754a11
Subproject commit 4c9431e9af596af0556e5da0ae99305bafb2b10b

1
contrib/nlp-data vendored Submodule

@ -0,0 +1 @@
Subproject commit 5591f91f5e748cba8fb9ef81564176feae774853

View File

@ -0,0 +1,15 @@
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/nlp-data")
add_library (_nlp_data INTERFACE)
clickhouse_embed_binaries(
TARGET nlp_dictionaries
RESOURCE_DIR "${LIBRARY_DIR}"
RESOURCES charset.zst tonality_ru.zst programming.zst
)
add_dependencies(_nlp_data nlp_dictionaries)
target_link_libraries(_nlp_data INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:nlp_dictionaries> -Wl,${NO_WHOLE_ARCHIVE}")
add_library(ch_contrib::nlp_data ALIAS _nlp_data)

2
contrib/orc vendored

@ -1 +1 @@
Subproject commit 0a936f6bbdb9303308973073f8623b5a8d82eae1
Subproject commit f9a393ed2433a60034795284f82d093b348f2102

2
contrib/replxx vendored

@ -1 +1 @@
Subproject commit f019cba7ea1bcd1b4feb7826f28ed57fb581b04c
Subproject commit 9460e5e0fc10f78f460af26a6bd928798cac864d

View File

@ -72,11 +72,6 @@ else()
if(WITH_ZSTD)
add_definitions(-DZSTD)
include_directories(${ZSTD_INCLUDE_DIR})
include_directories("${ZSTD_INCLUDE_DIR}/common")
include_directories("${ZSTD_INCLUDE_DIR}/dictBuilder")
include_directories("${ZSTD_INCLUDE_DIR}/deprecated")
list(APPEND THIRDPARTY_LIBS ch_contrib::zstd)
endif()
endif()

View File

@ -16,6 +16,8 @@ Restart=always
RestartSec=30
RuntimeDirectory=clickhouse-server
ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=/run/clickhouse-server/clickhouse-server.pid
# Minus means that this file is optional.
EnvironmentFile=-/etc/default/clickhouse
LimitCORE=infinity
LimitNOFILE=500000
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE

View File

@ -65,7 +65,12 @@ do
# check if variable not empty
[ -z "$dir" ] && continue
# ensure directories exist
if ! mkdir -p "$dir"; then
if [ "$DO_CHOWN" = "1" ]; then
mkdir="mkdir"
else
mkdir="$gosu mkdir"
fi
if ! $mkdir -p "$dir"; then
echo "Couldn't create necessary directory: $dir"
exit 1
fi

View File

@ -12,7 +12,11 @@ dpkg -i package_folder/clickhouse-common-static_*.deb
dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
dpkg -i package_folder/clickhouse-server_*.deb
dpkg -i package_folder/clickhouse-client_*.deb
dpkg -i package_folder/clickhouse-test_*.deb
if [[ -n "$TEST_CASES_FROM_DEB" ]] && [[ "$TEST_CASES_FROM_DEB" -eq 1 ]]; then
dpkg -i package_folder/clickhouse-test_*.deb
else
ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
fi
# install test configs
/usr/share/clickhouse-test/config/install.sh

View File

@ -11,6 +11,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
curl \
git \
libxml2-utils \
moreutils \
pylint \
python3-pip \
shellcheck \

View File

@ -10,72 +10,26 @@ def process_result(result_folder):
status = "success"
description = ""
test_results = []
checks = (
("header duplicates", "duplicate_output.txt"),
("shellcheck", "shellcheck_output.txt"),
("style", "style_output.txt"),
("typos", "typos_output.txt"),
("whitespaces", "whitespaces_output.txt"),
("workflows", "workflows_output.txt"),
)
duplicate_log_path = "{}/duplicate_output.txt".format(result_folder)
if not os.path.exists(duplicate_log_path):
logging.info("No header duplicates check log on path %s", duplicate_log_path)
return "exception", "No header duplicates check log", []
elif os.stat(duplicate_log_path).st_size != 0:
description += " Header duplicates check failed. "
test_results.append(("Header duplicates check", "FAIL"))
status = "failure"
else:
test_results.append(("Header duplicates check", "OK"))
shellcheck_log_path = "{}/shellcheck_output.txt".format(result_folder)
if not os.path.exists(shellcheck_log_path):
logging.info("No shellcheck log on path %s", shellcheck_log_path)
return "exception", "No shellcheck log", []
elif os.stat(shellcheck_log_path).st_size != 0:
description += " Shellcheck check failed. "
test_results.append(("Shellcheck ", "FAIL"))
status = "failure"
else:
test_results.append(("Shellcheck", "OK"))
style_log_path = "{}/style_output.txt".format(result_folder)
if not os.path.exists(style_log_path):
logging.info("No style check log on path %s", style_log_path)
return "exception", "No style check log", []
elif os.stat(style_log_path).st_size != 0:
description += "Style check failed. "
test_results.append(("Style check", "FAIL"))
status = "failure"
else:
test_results.append(("Style check", "OK"))
typos_log_path = "{}/typos_output.txt".format(result_folder)
if not os.path.exists(typos_log_path):
logging.info("No typos check log on path %s", typos_log_path)
return "exception", "No typos check log", []
elif os.stat(typos_log_path).st_size != 0:
description += "Typos check failed. "
test_results.append(("Typos check", "FAIL"))
status = "failure"
else:
test_results.append(("Typos check", "OK"))
whitespaces_log_path = "{}/whitespaces_output.txt".format(result_folder)
if not os.path.exists(whitespaces_log_path):
logging.info("No whitespaces check log on path %s", whitespaces_log_path)
return "exception", "No whitespaces check log", []
elif os.stat(whitespaces_log_path).st_size != 0:
description += "Whitespaces check failed. "
test_results.append(("Whitespaces check", "FAIL"))
status = "failure"
else:
test_results.append(("Whitespaces check", "OK"))
workflows_log_path = "{}/workflows_output.txt".format(result_folder)
if not os.path.exists(workflows_log_path):
logging.info("No workflows check log on path %s", style_log_path)
return "exception", "No workflows check log", []
elif os.stat(whitespaces_log_path).st_size != 0:
description += "Workflows check failed. "
test_results.append(("Workflows check", "FAIL"))
status = "failure"
else:
test_results.append(("Workflows check", "OK"))
for name, out_file in checks:
full_path = os.path.join(result_folder, out_file)
if not os.path.exists(full_path):
logging.info("No %s check log on path %s", name, full_path)
return "exception", f"No {name} check log", []
elif os.stat(full_path).st_size != 0:
description += f"Check {name} failed. "
test_results.append((f"Check {name}", "FAIL"))
status = "failure"
else:
test_results.append((f"Check {name}", "OK"))
if not description:
description += "Style check success"

View File

@ -3,10 +3,16 @@
# yaml check is not the best one
cd /ClickHouse/utils/check-style || echo -e "failure\tRepo not found" > /test_output/check_status.tsv
echo "Check duplicates" | ts
./check-duplicate-includes.sh |& tee /test_output/duplicate_output.txt
echo "Check style" | ts
./check-style -n |& tee /test_output/style_output.txt
echo "Check typos" | ts
./check-typos |& tee /test_output/typos_output.txt
echo "Check whitespaces" | ts
./check-whitespaces -n |& tee /test_output/whitespaces_output.txt
echo "Check sorkflows" | ts
./check-workflows |& tee /test_output/workflows_output.txt
echo "Check shell scripts with shellcheck" | ts
./shellcheck-run.sh |& tee /test_output/shellcheck_output.txt
/process_style_check_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv

View File

@ -22,7 +22,7 @@ cmake .. \
1. ClickHouse's source CMake files (located in the root directory and in `/src`).
2. Arch-dependent CMake files (located in `/cmake/*os_name*`).
3. Libraries finders (search for contrib libraries, located in `/cmake/find`).
3. Libraries finders (search for contrib libraries, located in `/contrib/*/CMakeLists.txt`).
3. Contrib build CMake files (used instead of libraries' own CMake files, located in `/cmake/modules`)
## List of CMake flags

View File

@ -8,4 +8,4 @@ sudo apt-get update
sudo apt-get install -y clickhouse-server clickhouse-client
sudo service clickhouse-server start
clickhouse-client
clickhouse-client # or "clickhouse-client --password" if you set up a password.

View File

@ -4,4 +4,4 @@ sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/clickhouse.re
sudo yum install clickhouse-server clickhouse-client
sudo /etc/init.d/clickhouse-server start
clickhouse-client
clickhouse-client # or "clickhouse-client --password" if you set up a password.

View File

@ -125,10 +125,6 @@ For installing CMake and Ninja on Mac OS X first install Homebrew and then insta
Next, check the version of CMake: `cmake --version`. If it is below 3.12, you should install a newer version from the website: https://cmake.org/download/.
## Optional External Libraries {#optional-external-libraries}
ClickHouse uses several external libraries for building. All of them do not need to be installed separately as they are built together with ClickHouse from the sources located in the submodules. You can check the list in `contrib`.
## C++ Compiler {#c-compiler}
Compilers Clang starting from version 11 is supported for building ClickHouse.

View File

@ -78,15 +78,21 @@ When working with the `MaterializedMySQL` database engine, [ReplacingMergeTree](
| DATE, NEWDATE | [Date](../../sql-reference/data-types/date.md) |
| DATETIME, TIMESTAMP | [DateTime](../../sql-reference/data-types/datetime.md) |
| DATETIME2, TIMESTAMP2 | [DateTime64](../../sql-reference/data-types/datetime64.md) |
| YEAR | [UInt16](../../sql-reference/data-types/int-uint.md) |
| TIME | [Int64](../../sql-reference/data-types/int-uint.md) |
| ENUM | [Enum](../../sql-reference/data-types/enum.md) |
| STRING | [String](../../sql-reference/data-types/string.md) |
| VARCHAR, VAR_STRING | [String](../../sql-reference/data-types/string.md) |
| BLOB | [String](../../sql-reference/data-types/string.md) |
| GEOMETRY | [String](../../sql-reference/data-types/string.md) |
| BINARY | [FixedString](../../sql-reference/data-types/fixedstring.md) |
| BIT | [UInt64](../../sql-reference/data-types/int-uint.md) |
| SET | [UInt64](../../sql-reference/data-types/int-uint.md) |
[Nullable](../../sql-reference/data-types/nullable.md) is supported.
The data of TIME type in MySQL is converted to microseconds in ClickHouse.
Other types are not supported. If MySQL table contains a column of such type, ClickHouse throws exception "Unhandled data type" and stops replication.
## Specifics and Recommendations {#specifics-and-recommendations}

View File

@ -97,13 +97,16 @@ Structure of the `patterns` section:
``` text
pattern
rule_type
regexp
function
pattern
rule_type
regexp
age + precision
...
pattern
rule_type
regexp
function
age + precision
@ -127,12 +130,20 @@ When processing a row, ClickHouse checks the rules in the `pattern` sections. Ea
Fields for `pattern` and `default` sections:
- `regexp` A pattern for the metric name.
- `rule_type` - a rule's type. It's applied only to a particular metrics. The engine use it to separate plain and tagged metrics. Optional parameter. Default value: `all`.
It's unnecessary when performance is not critical, or only one metrics type is used, e.g. plain metrics. By default only one type of rules set is created. Otherwise, if any of special types is defined, two different sets are created. One for plain metrics (root.branch.leaf) and one for tagged metrics (root.branch.leaf;tag1=value1).
The default rules are ended up in both sets.
Valid values:
- `all` (default) - a universal rule, used when `rule_type` is omitted.
- `plain` - a rule for plain metrics. The field `regexp` is processed as regular expression.
- `tagged` - a rule for tagged metrics (metrics are stored in DB in the format of `someName?tag1=value1&tag2=value2&tag3=value3`). Regular expression must be sorted by tags' names, first tag must be `__name__` if exists. The field `regexp` is processed as regular expression.
- `tag_list` - a rule for tagged matrics, a simple DSL for easier metric description in graphite format `someName;tag1=value1;tag2=value2`, `someName`, or `tag1=value1;tag2=value2`. The field `regexp` is translated into a `tagged` rule. The sorting by tags' names is unnecessary, ti will be done automatically. A tag's value (but not a name) can be set as a regular expression, e.g. `env=(dev|staging)`.
- `regexp` A pattern for the metric name (a regular or DSL).
- `age` The minimum age of the data in seconds.
- `precision` How precisely to define the age of the data in seconds. Should be a divisor for 86400 (seconds in a day).
- `function` The name of the aggregating function to apply to data whose age falls within the range `[age, age + precision]`. Accepted functions: min / max / any / avg. The average is calculated imprecisely, like the average of the averages.
### Configuration Example {#configuration-example}
### Configuration Example without rules types {#configuration-example}
``` xml
<graphite_rollup>
@ -167,6 +178,81 @@ Fields for `pattern` and `default` sections:
</graphite_rollup>
```
### Configuration Example with rules types {#configuration-typed-example}
``` xml
<graphite_rollup>
<version_column_name>Version</version_column_name>
<pattern>
<rule_type>plain</rule_type>
<regexp>click_cost</regexp>
<function>any</function>
<retention>
<age>0</age>
<precision>5</precision>
</retention>
<retention>
<age>86400</age>
<precision>60</precision>
</retention>
</pattern>
<pattern>
<rule_type>tagged</rule_type>
<regexp>^((.*)|.)min\?</regexp>
<function>min</function>
<retention>
<age>0</age>
<precision>5</precision>
</retention>
<retention>
<age>86400</age>
<precision>60</precision>
</retention>
</pattern>
<pattern>
<rule_type>tagged</rule_type>
<regexp><![CDATA[^someName\?(.*&)*tag1=value1(&|$)]]></regexp>
<function>min</function>
<retention>
<age>0</age>
<precision>5</precision>
</retention>
<retention>
<age>86400</age>
<precision>60</precision>
</retention>
</pattern>
<pattern>
<rule_type>tag_list</rule_type>
<regexp>someName;tag2=value2</regexp>
<retention>
<age>0</age>
<precision>5</precision>
</retention>
<retention>
<age>86400</age>
<precision>60</precision>
</retention>
</pattern>
<default>
<function>max</function>
<retention>
<age>0</age>
<precision>60</precision>
</retention>
<retention>
<age>3600</age>
<precision>300</precision>
</retention>
<retention>
<age>86400</age>
<precision>3600</precision>
</retention>
</default>
</graphite_rollup>
```
!!! warning "Warning"
Data rollup is performed during merges. Usually, for old partitions, merges are not started, so for rollup it is necessary to trigger an unscheduled merge using [optimize](../../../sql-reference/statements/optimize.md). Or use additional tools, for example [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer).

View File

@ -54,10 +54,8 @@ If the set of columns in the Buffer table does not match the set of columns in a
If the types do not match for one of the columns in the Buffer table and a subordinate table, an error message is entered in the server log, and the buffer is cleared.
The same thing happens if the subordinate table does not exist when the buffer is flushed.
If you need to run ALTER for a subordinate table, and the Buffer table, we recommend first deleting the Buffer table, running ALTER for the subordinate table, then creating the Buffer table again.
!!! attention "Attention"
Running ALTER on the Buffer table in releases made before 28 Sep 2020 will cause a `Block structure mismatch` error (see [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117)), so deleting the Buffer table and then recreating is the only option. It is advisable to check that this error is fixed in your release before trying to run ALTER on the Buffer table.
Running ALTER on the Buffer table in releases made before 26 Oct 2021 will cause a `Block structure mismatch` error (see [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117) and [#30565](https://github.com/ClickHouse/ClickHouse/pull/30565)), so deleting the Buffer table and then recreating is the only option. It is advisable to check that this error is fixed in your release before trying to run ALTER on the Buffer table.
If the server is restarted abnormally, the data in the buffer is lost.

View File

@ -25,6 +25,7 @@ Categories:
- **[Operations](../faq/operations/index.md)**
- [Which ClickHouse version to use in production?](../faq/operations/production.md)
- [Is it possible to delete old records from a ClickHouse table?](../faq/operations/delete-old-data.md)
- [Does ClickHouse support multi-region replication?](../faq/operations/multi-region-replication.md)
- **[Integration](../faq/integration/index.md)**
- [How do I export data from ClickHouse to a file?](../faq/integration/file-export.md)
- [What if I have a problem with encodings when connecting to Oracle via ODBC?](../faq/integration/oracle-odbc.md)

View File

@ -23,11 +23,13 @@ Web UI can be accessed here: `http://localhost:8123/play`.
![Web UI](../images/play.png)
In health-check scripts use `GET /ping` request. This handler always returns “Ok.” (with a line feed at the end). Available from version 18.12.13.
In health-check scripts use `GET /ping` request. This handler always returns “Ok.” (with a line feed at the end). Available from version 18.12.13. See also `/replicas_status` to check replica's delay.
``` bash
$ curl 'http://localhost:8123/ping'
Ok.
$ curl 'http://localhost:8123/replicas_status'
Ok.
```
Send the request as a URL query parameter, or as a POST. Or send the beginning of the query in the query parameter, and the rest in the POST (well explain later why this is necessary). The size of the URL is limited to 16 KB, so keep this in mind when sending large queries.

View File

@ -27,6 +27,7 @@ toc_title: Client Libraries
- Go
- [clickhouse](https://github.com/kshvakov/clickhouse/)
- [go-clickhouse](https://github.com/roistat/go-clickhouse)
- [chconn](https://github.com/vahid-sohrabloo/chconn)
- [mailrugo-clickhouse](https://github.com/mailru/go-clickhouse)
- [golang-clickhouse](https://github.com/leprosus/golang-clickhouse)
- Swift

View File

@ -129,6 +129,10 @@ If you want to divide an existing ZooKeeper cluster into two, the correct way is
Do not run ZooKeeper on the same servers as ClickHouse. Because ZooKeeper is very sensitive for latency and ClickHouse may utilize all available system resources.
You can have ZooKeeper observers in an ensemble but ClickHouse servers should not interact with observers.
Do not change `minSessionTimeout` setting, large values may affect ClickHouse restart stability.
With the default settings, ZooKeeper is a time bomb:
> The ZooKeeper server wont delete files from old snapshots and logs when using the default configuration (see autopurge), and this is the responsibility of the operator.

View File

@ -10,7 +10,7 @@ Applies Student's t-test to samples from two populations.
**Syntax**
``` sql
studentTTest(sample_data, sample_index)
studentTTest([confidence_level])(sample_data, sample_index)
```
Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population.
@ -21,12 +21,19 @@ The null hypothesis is that means of populations are equal. Normal distribution
- `sample_data` — Sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
- `sample_index` — Sample index. [Integer](../../../sql-reference/data-types/int-uint.md).
**Parameters**
- `confidence_level` — Confidence level in order to calculate confidence intervals. [Float](../../../sql-reference/data-types/float.md).
**Returned values**
[Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
[Tuple](../../../sql-reference/data-types/tuple.md) with two or four elements (if the optional `confidence_level` is specified):
- calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md).
- calculated p-value. [Float64](../../../sql-reference/data-types/float.md).
- [calculated confidence-interval-low.] [Float64](../../../sql-reference/data-types/float.md).
- [calculated confidence-interval-high.] [Float64](../../../sql-reference/data-types/float.md).
**Example**

View File

@ -10,7 +10,7 @@ Applies Welch's t-test to samples from two populations.
**Syntax**
``` sql
welchTTest(sample_data, sample_index)
welchTTest([confidence_level])(sample_data, sample_index)
```
Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population.
@ -21,12 +21,18 @@ The null hypothesis is that means of populations are equal. Normal distribution
- `sample_data` — Sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
- `sample_index` — Sample index. [Integer](../../../sql-reference/data-types/int-uint.md).
**Parameters**
- `confidence_level` — Confidence level in order to calculate confidence intervals. [Float](../../../sql-reference/data-types/float.md).
**Returned values**
[Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
[Tuple](../../../sql-reference/data-types/tuple.md) with two two or four elements (if the optional `confidence_level` is specified)
- calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md).
- calculated p-value. [Float64](../../../sql-reference/data-types/float.md).
- [calculated confidence-interval-low.] [Float64](../../../sql-reference/data-types/float.md).
- [calculated confidence-interval-high.] [Float64](../../../sql-reference/data-types/float.md).
**Example**

View File

@ -159,7 +159,7 @@ Configuration fields:
| Tag | Description | Required |
|------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|
| `name` | Column name. | Yes |
| `type` | ClickHouse data type: [UInt8](../../../sql-reference/data-types/int-uint.md), [UInt16](../../../sql-reference/data-types/int-uint.md), [UInt32](../../../sql-reference/data-types/int-uint.md), [UInt64](../../../sql-reference/data-types/int-uint.md), [Int8](../../../sql-reference/data-types/int-uint.md), [Int16](../../../sql-reference/data-types/int-uint.md), [Int32](../../../sql-reference/data-types/int-uint.md), [Int64](../../../sql-reference/data-types/int-uint.md), [Float32](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md), [UUID](../../../sql-reference/data-types/uuid.md), [Decimal32](../../../sql-reference/data-types/decimal.md), [Decimal64](../../../sql-reference/data-types/decimal.md), [Decimal128](../../../sql-reference/data-types/decimal.md), [Decimal256](../../../sql-reference/data-types/decimal.md), [String](../../../sql-reference/data-types/string.md), [Array](../../../sql-reference/data-types/array.md).<br/>ClickHouse tries to cast value from dictionary to the specified data type. For example, for MySQL, the field might be `TEXT`, `VARCHAR`, or `BLOB` in the MySQL source table, but it can be uploaded as `String` in ClickHouse.<br/>[Nullable](../../../sql-reference/data-types/nullable.md) is currently supported for [Flat](external-dicts-dict-layout.md#flat), [Hashed](external-dicts-dict-layout.md#dicts-external_dicts_dict_layout-hashed), [ComplexKeyHashed](external-dicts-dict-layout.md#complex-key-hashed), [Direct](external-dicts-dict-layout.md#direct), [ComplexKeyDirect](external-dicts-dict-layout.md#complex-key-direct), [RangeHashed](external-dicts-dict-layout.md#range-hashed), [Polygon](external-dicts-dict-polygon.md), [Cache](external-dicts-dict-layout.md#cache), [ComplexKeyCache](external-dicts-dict-layout.md#complex-key-cache), [SSDCache](external-dicts-dict-layout.md#ssd-cache), [SSDComplexKeyCache](external-dicts-dict-layout.md#complex-key-ssd-cache) dictionaries. In [IPTrie](external-dicts-dict-layout.md#ip-trie) dictionaries `Nullable` types are not supported. | Yes |
| `type` | ClickHouse data type: [UInt8](../../../sql-reference/data-types/int-uint.md), [UInt16](../../../sql-reference/data-types/int-uint.md), [UInt32](../../../sql-reference/data-types/int-uint.md), [UInt64](../../../sql-reference/data-types/int-uint.md), [Int8](../../../sql-reference/data-types/int-uint.md), [Int16](../../../sql-reference/data-types/int-uint.md), [Int32](../../../sql-reference/data-types/int-uint.md), [Int64](../../../sql-reference/data-types/int-uint.md), [Float32](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md), [UUID](../../../sql-reference/data-types/uuid.md), [Decimal32](../../../sql-reference/data-types/decimal.md), [Decimal64](../../../sql-reference/data-types/decimal.md), [Decimal128](../../../sql-reference/data-types/decimal.md), [Decimal256](../../../sql-reference/data-types/decimal.md),[Date](../../../sql-reference/data-types/date.md), [Date32](../../../sql-reference/data-types/date32.md), [DateTime](../../../sql-reference/data-types/datetime.md), [DateTime64](../../../sql-reference/data-types/datetime64.md), [String](../../../sql-reference/data-types/string.md), [Array](../../../sql-reference/data-types/array.md).<br/>ClickHouse tries to cast value from dictionary to the specified data type. For example, for MySQL, the field might be `TEXT`, `VARCHAR`, or `BLOB` in the MySQL source table, but it can be uploaded as `String` in ClickHouse.<br/>[Nullable](../../../sql-reference/data-types/nullable.md) is currently supported for [Flat](external-dicts-dict-layout.md#flat), [Hashed](external-dicts-dict-layout.md#dicts-external_dicts_dict_layout-hashed), [ComplexKeyHashed](external-dicts-dict-layout.md#complex-key-hashed), [Direct](external-dicts-dict-layout.md#direct), [ComplexKeyDirect](external-dicts-dict-layout.md#complex-key-direct), [RangeHashed](external-dicts-dict-layout.md#range-hashed), [Polygon](external-dicts-dict-polygon.md), [Cache](external-dicts-dict-layout.md#cache), [ComplexKeyCache](external-dicts-dict-layout.md#complex-key-cache), [SSDCache](external-dicts-dict-layout.md#ssd-cache), [SSDComplexKeyCache](external-dicts-dict-layout.md#complex-key-ssd-cache) dictionaries. In [IPTrie](external-dicts-dict-layout.md#ip-trie) dictionaries `Nullable` types are not supported. | Yes |
| `null_value` | Default value for a non-existing element.<br/>In the example, it is an empty string. [NULL](../../syntax.md#null-literal) value can be used only for the `Nullable` types (see the previous line with types description). | Yes |
| `expression` | [Expression](../../../sql-reference/syntax.md#syntax-expressions) that ClickHouse executes on the value.<br/>The expression can be a column name in the remote SQL database. Thus, you can use it to create an alias for the remote column.<br/><br/>Default value: no expression. | No |
| <a name="hierarchical-dict-attr"></a> `hierarchical` | If `true`, the attribute contains the value of a parent key for the current key. See [Hierarchical Dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md).<br/><br/>Default value: `false`. | No |

View File

@ -46,7 +46,7 @@ CHECK TABLE test_table;
└───────────┴───────────┴─────────┘
```
If `check_query_single_value_result` = 0, the `CHECK TABLE` query shows the general table check status.
If `check_query_single_value_result` = 1, the `CHECK TABLE` query shows the general table check status.
```sql
SET check_query_single_value_result = 1;

View File

@ -252,7 +252,6 @@ CREATE TABLE codec_example
ENGINE = MergeTree()
```
### Encryption Codecs {#create-query-encryption-codecs}
These codecs don't actually compress data, but instead encrypt data on disk. These are only available when an encryption key is specified by [encryption](../../../operations/server-configuration-parameters/settings.md#server-settings-encryption) settings. Note that encryption only makes sense at the end of codec pipelines, because encrypted data usually can't be compressed in any meaningful way.
@ -260,6 +259,7 @@ These codecs don't actually compress data, but instead encrypt data on disk. The
Encryption codecs:
- `CODEC('AES-128-GCM-SIV')` — Encrypts data with AES-128 in [RFC 8452](https://tools.ietf.org/html/rfc8452) GCM-SIV mode.
- `CODEC('AES-256-GCM-SIV')` — Encrypts data with AES-256 in GCM-SIV mode.
These codecs use a fixed nonce and encryption is therefore deterministic. This makes it compatible with deduplicating engines such as [ReplicatedMergeTree](../../../engines/table-engines/mergetree-family/replication.md) but has a weakness: when the same data block is encrypted twice, the resulting ciphertext will be exactly the same so an adversary who can read the disk can see this equivalence (although only the equivalence, without getting its content).
@ -269,7 +269,7 @@ These codecs use a fixed nonce and encryption is therefore deterministic. This m
!!! attention "Attention"
If you perform a SELECT query mentioning a specific value in an encrypted column (such as in its WHERE clause), the value may appear in [system.query_log](../../../operations/system-tables/query_log.md). You may want to disable the logging.
**Example**
```sql

View File

@ -43,7 +43,7 @@ User host is a host from which a connection to ClickHouse server could be establ
- `HOST ANY` — User can connect from any location. This is a default option.
- `HOST LOCAL` — User can connect only locally.
- `HOST NAME 'fqdn'` — User host can be specified as FQDN. For example, `HOST NAME 'mysite.com'`.
- `HOST NAME REGEXP 'regexp'` — You can use [pcre](http://www.pcre.org/) regular expressions when specifying user hosts. For example, `HOST NAME REGEXP '.*\.mysite\.com'`.
- `HOST REGEXP 'regexp'` — You can use [pcre](http://www.pcre.org/) regular expressions when specifying user hosts. For example, `HOST REGEXP '.*\.mysite\.com'`.
- `HOST LIKE 'template'` — Allows you to use the [LIKE](../../../sql-reference/functions/string-search-functions.md#function-like) operator to filter the user hosts. For example, `HOST LIKE '%'` is equivalent to `HOST ANY`, `HOST LIKE '%.mysite.com'` filters all the hosts in the `mysite.com` domain.
Another way of specifying host is to use `@` syntax following the username. Examples:

View File

@ -285,7 +285,7 @@ ORDER BY expr [WITH FILL] [FROM const_expr] [TO const_expr] [STEP const_numeric_
`WITH FILL` can be applied for fields with Numeric (all kinds of float, decimal, int) or Date/DateTime types. When applied for `String` fields, missed values are filled with empty strings.
When `FROM const_expr` not defined sequence of filling use minimal `expr` field value from `ORDER BY`.
When `TO const_expr` not defined sequence of filling use maximum `expr` field value from `ORDER BY`.
When `STEP const_numeric_expr` defined then `const_numeric_expr` interprets `as is` for numeric types as `days` for Date type and as `seconds` for DateTime type.
When `STEP const_numeric_expr` defined then `const_numeric_expr` interprets `as is` for numeric types, as `days` for Date type, as `seconds` for DateTime type. It also supports [INTERVAL](https://clickhouse.com/docs/en/sql-reference/data-types/special-data-types/interval/) data type representing time and date intervals.
When `STEP const_numeric_expr` omitted then sequence of filling use `1.0` for numeric type, `1 day` for Date type and `1 second` for DateTime type.
Example of a query without `WITH FILL`:
@ -402,4 +402,85 @@ Result:
└────────────┴────────────┴──────────┘
```
The following query uses the `INTERVAL` data type of 1 day for each data filled on column `d1`:
``` sql
SELECT
toDate((number * 10) * 86400) AS d1,
toDate(number * 86400) AS d2,
'original' AS source
FROM numbers(10)
WHERE (number % 3) = 1
ORDER BY
d1 WITH FILL STEP INTERVAL 1 DAY,
d2 WITH FILL;
```
Result:
```
┌─────────d1─┬─────────d2─┬─source───┐
│ 1970-01-11 │ 1970-01-02 │ original │
│ 1970-01-12 │ 1970-01-01 │ │
│ 1970-01-13 │ 1970-01-01 │ │
│ 1970-01-14 │ 1970-01-01 │ │
│ 1970-01-15 │ 1970-01-01 │ │
│ 1970-01-16 │ 1970-01-01 │ │
│ 1970-01-17 │ 1970-01-01 │ │
│ 1970-01-18 │ 1970-01-01 │ │
│ 1970-01-19 │ 1970-01-01 │ │
│ 1970-01-20 │ 1970-01-01 │ │
│ 1970-01-21 │ 1970-01-01 │ │
│ 1970-01-22 │ 1970-01-01 │ │
│ 1970-01-23 │ 1970-01-01 │ │
│ 1970-01-24 │ 1970-01-01 │ │
│ 1970-01-25 │ 1970-01-01 │ │
│ 1970-01-26 │ 1970-01-01 │ │
│ 1970-01-27 │ 1970-01-01 │ │
│ 1970-01-28 │ 1970-01-01 │ │
│ 1970-01-29 │ 1970-01-01 │ │
│ 1970-01-30 │ 1970-01-01 │ │
│ 1970-01-31 │ 1970-01-01 │ │
│ 1970-02-01 │ 1970-01-01 │ │
│ 1970-02-02 │ 1970-01-01 │ │
│ 1970-02-03 │ 1970-01-01 │ │
│ 1970-02-04 │ 1970-01-01 │ │
│ 1970-02-05 │ 1970-01-01 │ │
│ 1970-02-06 │ 1970-01-01 │ │
│ 1970-02-07 │ 1970-01-01 │ │
│ 1970-02-08 │ 1970-01-01 │ │
│ 1970-02-09 │ 1970-01-01 │ │
│ 1970-02-10 │ 1970-01-05 │ original │
│ 1970-02-11 │ 1970-01-01 │ │
│ 1970-02-12 │ 1970-01-01 │ │
│ 1970-02-13 │ 1970-01-01 │ │
│ 1970-02-14 │ 1970-01-01 │ │
│ 1970-02-15 │ 1970-01-01 │ │
│ 1970-02-16 │ 1970-01-01 │ │
│ 1970-02-17 │ 1970-01-01 │ │
│ 1970-02-18 │ 1970-01-01 │ │
│ 1970-02-19 │ 1970-01-01 │ │
│ 1970-02-20 │ 1970-01-01 │ │
│ 1970-02-21 │ 1970-01-01 │ │
│ 1970-02-22 │ 1970-01-01 │ │
│ 1970-02-23 │ 1970-01-01 │ │
│ 1970-02-24 │ 1970-01-01 │ │
│ 1970-02-25 │ 1970-01-01 │ │
│ 1970-02-26 │ 1970-01-01 │ │
│ 1970-02-27 │ 1970-01-01 │ │
│ 1970-02-28 │ 1970-01-01 │ │
│ 1970-03-01 │ 1970-01-01 │ │
│ 1970-03-02 │ 1970-01-01 │ │
│ 1970-03-03 │ 1970-01-01 │ │
│ 1970-03-04 │ 1970-01-01 │ │
│ 1970-03-05 │ 1970-01-01 │ │
│ 1970-03-06 │ 1970-01-01 │ │
│ 1970-03-07 │ 1970-01-01 │ │
│ 1970-03-08 │ 1970-01-01 │ │
│ 1970-03-09 │ 1970-01-01 │ │
│ 1970-03-10 │ 1970-01-01 │ │
│ 1970-03-11 │ 1970-01-01 │ │
│ 1970-03-12 │ 1970-01-08 │ original │
└────────────┴────────────┴──────────┘
```
[Original article](https://clickhouse.com/docs/en/sql-reference/statements/select/order-by/) <!--hide-->

View File

@ -72,7 +72,7 @@ Reloads all [CatBoost](../../guides/apply-catboost-model.md#applying-catboost-mo
**Syntax**
```sql
SYSTEM RELOAD MODELS
SYSTEM RELOAD MODELS [ON CLUSTER cluster_name]
```
## RELOAD MODEL {#query_language-system-reload-model}
@ -82,7 +82,7 @@ Completely reloads a CatBoost model `model_name` if the configuration was update
**Syntax**
```sql
SYSTEM RELOAD MODEL <model_name>
SYSTEM RELOAD MODEL [ON CLUSTER cluster_name] <model_name>
```
## RELOAD FUNCTIONS {#query_language-system-reload-functions}
@ -92,8 +92,8 @@ Reloads all registered [executable user defined functions](../functions/index.md
**Syntax**
```sql
RELOAD FUNCTIONS
RELOAD FUNCTION function_name
RELOAD FUNCTIONS [ON CLUSTER cluster_name]
RELOAD FUNCTION [ON CLUSTER cluster_name] function_name
```
## DROP DNS CACHE {#query_language-system-drop-dns-cache}

View File

@ -3,14 +3,14 @@ toc_priority: 53
toc_title: USE
---
# USE 语句 {#use}
# USE Statement {#use}
``` sql
USE db
```
用于设置会话的当前数据库。
Lets you set the current database for the session.
如果查询语句中没有在表名前面以加点的方式指明数据库名, 则用当前数据库进行搜索。
The current database is used for searching for tables if the database is not explicitly defined in the query with a dot before the table name.
使用 HTTP 协议时无法进行此查询,因为没有会话的概念。
This query cant be made when using the HTTP protocol, since there is no concept of a session.

View File

@ -30,7 +30,7 @@ There may be any number of space symbols between syntactical constructions (incl
ClickHouse supports either SQL-style and C-style comments:
- SQL-style comments start with `--` and continue to the end of the line, a space after `--` can be omitted.
- SQL-style comments start with `--`, `#!` or `# ` and continue to the end of the line, a space after `--` and `#!` can be omitted.
- C-style are from `/*` to `*/`and can be multiline, spaces are not required either.
## Keywords {#syntax-keywords}
@ -106,9 +106,9 @@ In queries, you can check `NULL` using the [IS NULL](../sql-reference/operators/
### Heredoc {#heredeoc}
A [heredoc](https://en.wikipedia.org/wiki/Here_document) is a way to define a string (often multiline), while maintaining the original formatting. A heredoc is defined as a custom string literal, placed between two `$` symbols, for example `$heredoc$`. A value between two heredocs is processed "as-is".
A [heredoc](https://en.wikipedia.org/wiki/Here_document) is a way to define a string (often multiline), while maintaining the original formatting. A heredoc is defined as a custom string literal, placed between two `$` symbols, for example `$heredoc$`. A value between two heredocs is processed "as-is".
You can use a heredoc to embed snippets of SQL, HTML, or XML code, etc.
You can use a heredoc to embed snippets of SQL, HTML, or XML code, etc.
**Example**

Binary file not shown.

After

Width:  |  Height:  |  Size: 43 KiB

1
docs/ko/images/logo.svg Normal file
View File

@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" width="54" height="48" markdown="1" viewBox="0 0 9 8"><style>.o{fill:#fc0}.r{fill:red}</style><path d="M0,7 h1 v1 h-1 z" class="r"/><path d="M0,0 h1 v7 h-1 z" class="o"/><path d="M2,0 h1 v8 h-1 z" class="o"/><path d="M4,0 h1 v8 h-1 z" class="o"/><path d="M6,0 h1 v8 h-1 z" class="o"/><path d="M8,3.25 h1 v1.5 h-1 z" class="o"/></svg>

After

Width:  |  Height:  |  Size: 373 B

BIN
docs/ko/images/play.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 KiB

94
docs/ko/index.md Normal file
View File

@ -0,0 +1,94 @@
---
toc_priority: 0
toc_title: 목차
---
# ClickHouse란? {#what-is-clickhouse}
ClickHouse® 는 query의 온라인 분석 처리(OLAP)를 위한 열 지향(column-oriented) 데이터베이스 관리 시스템(DBMS)입니다.
"보통의" 행 지향(row-oriented) DMBS에서는 데이터가 다음과 같은 순서로 저장됩니다.
| row | WatchID | JavaEnable | Title | GoodEvent | EventTime |
|-----|-------------|------------|--------------------|-----------|---------------------|
| #0 | 89354350662 | 1 | Investor Relations | 1 | 2016-05-18 05:19:20 |
| #1 | 90329509958 | 0 | Contact us | 1 | 2016-05-18 08:10:20 |
| #2 | 89953706054 | 1 | Mission | 1 | 2016-05-18 07:38:00 |
| #N | … | … | … | … | … |
즉, 행과 관련된 모든 값들은 물리적으로 나란히 저장됩니다.
행 지향(row-oriented) DMBS의 예시로는 MySQL, Postgres, 그리고 MS SQL 서버 등이 있습니다.
열 지향 (column-oriented) DBMS에서는 데이터가 아래와 같은 방식으로 저장됩니다:
| Row: | #0 | #1 | #2 | #N |
|-------------|---------------------|---------------------|---------------------|-----|
| WatchID: | 89354350662 | 90329509958 | 89953706054 | … |
| JavaEnable: | 1 | 0 | 1 | … |
| Title: | Investor Relations | Contact us | Mission | … |
| GoodEvent: | 1 | 1 | 1 | … |
| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | … |
이 예에서는 데이터가 정렬된 순서만을 보여줍니다. 다른 열의 값들은 서로 분리되어 저장되고, 같은 열의 정보들은 함께 저장됩니다.
열 지향(column-oriented) DBMS 의 종류는 Vertica, Paraccel (Actian Matrix and Amazon Redshift), Sybase IQ, Exasol, Infobright, InfiniDB, MonetDB (VectorWise and Actian Vector), LucidDB, SAP HANA, Google Dremel, Google PowerDrill, Druid, 그리고 kdb+ 등이 있습니다.
데이터를 저장하기 위한 서로 다른 순서는 다른 시나리오에 더 적합합니다. 데이터 접근 시나리오는 쿼리가 수행되는 빈도, 비율 및 비율을 나타내거나, 각 쿼리 유형(행, 열 및 바이트)에 대해 읽은 데이터의 양 데이터 읽기와 업데이트 사이의 관계, 데이터의 작업 크기 및 로컬에서 사용되는 방법 트랜잭션이 사용되는지 여부, 트랜잭션이 얼마나 격리되어 있는지, 데이터 복제 및 논리적 무결성에 대한 요구 사항, 각 쿼리 유형에 대한 대기 시간 및 처리량 요구 사항 등이 있습니다.
시스템의 부하가 높을수록 사용 시나리오의 요구 사항에 맞게 시스템 설정을 사용자 지정하는 것이 더 중요하며 이 사용자 지정은 더욱 세분화됩니다. 상당히 다른 시나리오에 똑같이 적합한 시스템은 없습니다. 만약 높은 부하에서 시스템이 넓은 시나리오 집합에 대해 적응한다면 시스템은 모든 시나리오를 모두 제대로 처리하지 못하거나 가능한 시나리오 중 하나 또는 몇 개에 대해서만 잘 작동할 것입니다.
## OLAP 시나리오의 중요 속성들 {#key-properties-of-olap-scenario}
- 요청(request)의 대부분은 읽기 접근에 관한 것입니다.
- 데이터는 단일 행이 아니라 상당히 큰 일괄 처리(\> 1000개 행)로 업데이트됩니다. 또는 전혀 업데이트되지 않습니다.
- 데이터는 DB에 추가되지만 수정되지는 않습니다.
- 읽기의 경우 DB에서 상당히 많은 수의 행이 추출되지만 열은 일부만 추출됩니다.
- 테이블은 "넓습니다". 이는 열의 수가 많다는 것을 의미합니다.
- 쿼리는 상대적으로 드뭅니다(일반적으로 서버당 수백 또는 초당 쿼리 미만).
- 간단한 쿼리의 경우 약 50ms의 대기 시간이 허용됩니다.
- 열 값은 숫자와 짧은 문자열(예: URL당 60바이트)과 같이 상당히 작습니다
- 단일 쿼리를 처리할 때 높은 처리량이 필요합니다(서버당 초당 최대 수십억 행).
- 트랜잭션이 필요하지 않습니다.
- 데이터 일관성에 대한 요구 사항이 낮습니다.
- 쿼리당 하나의 큰 테이블이 존재하고 하나를 제외한 모든 테이블은 작습니다.
- 쿼리 결과가 원본 데이터보다 훨씬 작습니다. 즉, 데이터가 필터링되거나 집계되므로 결과가 단일 서버의 RAM에 꼭 들어맞습니다.
OLAP 시나리오가 다른 일반적인 시나리오(OLTP 또는 키-값 액세스와 같은)와 매우 다르다는 것을 쉽게 알 수 있습니다. 따라서 적절한 성능을 얻으려면 분석 쿼리를 처리하기 위해 OLTP 또는 키-값 DB를 사용하는 것은 의미가 없습니다. 예를 들어 분석에 MongoDB나 Redis를 사용하려고 하면 OLAP 데이터베이스에 비해 성능이 매우 저하됩니다.
## 왜 열 지향 데이터베이스가 OLAP 시나리오에 적합한가{#why-column-oriented-databases-work-better-in-the-olap-scenario}
열 지향(column-oriented) 데이터베이스는 OLAP 시나리오에 더 적합합니다. 대부분의 쿼리를 처리하는 데 있어서 행 지향(row-oriented) 데이터베이스보다 100배 이상 빠릅니다. 그 이유는 아래에 자세히 설명되어 있지만 사실은 시각적으로 더 쉽게 설명할 수 있습니다.
**행 지향 DBMS**
![Row-oriented](images/row-oriented.gif#)
**열 지향 DBMS**
![Column-oriented](images/column-oriented.gif#)
차이가 보이시나요?
### 입출력 {#inputoutput}
1. 분석 쿼리의 경우 적은 수의 테이블 열만 읽어야 합니다. 열 지향 데이터베이스에서는 필요한 데이터만 읽을 수 있습니다. 예를 들어 100개 중 5개의 열이 필요한 경우 I/O가 20배 감소할 것으로 예상할 수 있습니다.
2. 데이터는 패킷으로 읽히므로 압축하기가 더 쉽습니다. 열의 데이터도 압축하기 쉽습니다. 이것은 I/O의 볼륨을 더욱 감소시킵니다.
3. 감소된 I/O로 인해 시스템 캐시에 더 많은 데이터가 들어갑니다.
예를 들어, "각 광고 플랫폼에 대한 레코드 수 계산" 쿼리는 압축되지 않은 1바이트를 차지하는 하나의 "광고 플랫폼 ID" 열을 읽어야 합니다. 트래픽의 대부분이 광고 플랫폼에서 발생하지 않은 경우 이 열의 최소 10배 압축을 기대할 수 있습니다. 빠른 압축 알고리즘을 사용하면 초당 최소 몇 기가바이트의 압축되지 않은 데이터의 속도로 데이터 압축 해제가 가능합니다. 즉, 이 쿼리는 단일 서버에서 초당 약 수십억 행의 속도로 처리될 수 있습니다. 이 속도는 정말 실제로 달성됩니다.
### CPU {#cpu}
쿼리를 수행하려면 많은 행을 처리해야 하므로 별도의 행이 아닌 전체 벡터에 대한 모든 연산을 디스패치하거나 쿼리 엔진을 구현하여 디스패치 비용이 거의 들지 않습니다. 반쯤 괜찮은 디스크 하위 시스템에서 이렇게 하지 않으면 쿼리 인터프리터가 불가피하게 CPU를 정지시킵니다. 데이터를 열에 저장하고 가능한 경우 열별로 처리하는 것이 좋습니다.
이를 수행하기위한 두가지 방법이 있습니다.
1. 벡터 엔진. 모든 연산은 별도의 값 대신 벡터에 대해 작성됩니다. 즉, 작업을 자주 호출할 필요가 없으며 파견 비용도 무시할 수 있습니다. 작업 코드에는 최적화된 내부 주기가 포함되어 있습니다.
2. 코드 생성. 쿼리에 대해 생성된 코드에는 모든 간접 호출이 있습니다.
이것은 단순한 쿼리를 실행할 때 의미가 없기 때문에 "일반" 데이터베이스에서는 수행되지 않습니다. 그러나 예외가 있습니다. 예를 들어 MemSQL은 코드 생성을 사용하여 SQL 쿼리를 처리할 때 대기 시간을 줄입니다. (비교되게, 분석 DBMS는 대기 시간이 아닌 처리량 최적화가 필요합니다.)
CPU 효율성을 위해 쿼리 언어는 선언적(SQL 또는 MDX)이거나 최소한 벡터(J, K)여야 합니다. 쿼리는 최적화를 허용하는 암시적 루프만 포함해야 합니다.
{## [원문](https://clickhouse.com/docs/en/) ##}

View File

@ -2,8 +2,13 @@
toc_priority: 65
toc_title: Сборка на Mac OS X
---
# Как собрать ClickHouse на Mac OS X {#how-to-build-clickhouse-on-mac-os-x}
!!! info "Вам не нужно собирать ClickHouse самостоятельно"
Вы можете установить предварительно собранный ClickHouse, как описано в [Быстром старте](https://clickhouse.com/#quick-start).
Следуйте инструкциям по установке для `macOS (Intel)` или `macOS (Apple Silicon)`.
Сборка должна запускаться с x86_64 (Intel) на macOS версии 10.15 (Catalina) и выше в последней версии компилятора Xcode's native AppleClang, Homebrew's vanilla Clang или в GCC-компиляторах.
## Установка Homebrew {#install-homebrew}

View File

@ -99,13 +99,16 @@ patterns
``` text
pattern
rule_type
regexp
function
pattern
rule_type
regexp
age + precision
...
pattern
rule_type
regexp
function
age + precision
@ -129,12 +132,20 @@ default
Поля для разделов `pattern` и `default`:
- `regexp` шаблон имени метрики.
- `rule_type` - тип правила (применяется только к метрикам указанных типов), используется для разделения правил проверки плоских/теггированных метрик. Опциональное поле. Значение по умолчанию: `all`.
Если используются метрики только одного типа или производительность проверки правил некритична, можно не использовать. По умолчанию создается только один тип правил для проверки. Иначе, если хотя бы для одного правила указано отличное от умолчания значение, создаются 2 независимых типа правил - для обычных (классические root.branch.leaf) и теггированных метрик (root.branch.leaf;tag1=value1).
Правила по умолчанию попадают в оба правила обоих типов.
Возможные значения:
- `all` (default) - универсальное правило, назначается также по умолчанию, если поле не задано
- `plain` - правило для плоских метрик (без тегов). Поле `regexp` обрабатывается как регулярное выражение.
- `tagged` - правило для теггированных метрик (метрика хранится в БД в формате `someName?tag1=value1&tag2=value2&tag3=value3`), регулярное выражение должно быть отсортированно по именам тегов, первым - значение тега `__name__`, если есть. Поле `regexp` обрабатывается как регулярное выражение.
- `tag_list` - правило для теггированных метрик, простой DSL для упрощения задания регулярного выражения в формате тегов graphite `someName;tag1=value1;tag2=value2`, `someName` или `tag1=value1;tag2=value2`. Поле `regexp` транслируется в правило `tagged`. Cортировать по именам тегов не обязательно, оно отсортируется автоматически. Значение тега (но не имя) может быть регулярным выражением (например `env=(dev|staging)`).
- `regexp` шаблон имени метрики (регулярное выражение или DSL).
- `age` минимальный возраст данных в секундах.
- `precision` точность определения возраста данных в секундах. Должен быть делителем для 86400 (количество секунд в сутках).
- `function` имя агрегирующей функции, которую следует применить к данным, чей возраст оказался в интервале `[age, age + precision]`. Допустимые функции: min/max/any/avg. Avg вычисляется неточно, как среднее от средних.
### Пример конфигурации {#configuration-example}
### Пример конфигурации без разделения типа правил {#configuration-example}
``` xml
<graphite_rollup>
@ -169,6 +180,80 @@ default
</graphite_rollup>
```
### Пример конфигурации c разделением типа правил {#configuration-typed-example}
``` xml
<graphite_rollup>
<version_column_name>Version</version_column_name>
<pattern>
<rule_type>plain</rule_type>
<regexp>click_cost</regexp>
<function>any</function>
<retention>
<age>0</age>
<precision>5</precision>
</retention>
<retention>
<age>86400</age>
<precision>60</precision>
</retention>
</pattern>
<pattern>
<rule_type>tagged</rule_type>
<regexp>^((.*)|.)min\?</regexp>
<function>min</function>
<retention>
<age>0</age>
<precision>5</precision>
</retention>
<retention>
<age>86400</age>
<precision>60</precision>
</retention>
</pattern>
<pattern>
<rule_type>tagged</rule_type>
<regexp><![CDATA[^someName\?(.*&)*tag1=value1(&|$)]]></regexp>
<function>min</function>
<retention>
<age>0</age>
<precision>5</precision>
</retention>
<retention>
<age>86400</age>
<precision>60</precision>
</retention>
</pattern>
<pattern>
<rule_type>tag_list</rule_type>
<regexp>someName;tag2=value2</regexp>
<retention>
<age>0</age>
<precision>5</precision>
</retention>
<retention>
<age>86400</age>
<precision>60</precision>
</retention>
</pattern>
<default>
<function>max</function>
<retention>
<age>0</age>
<precision>60</precision>
</retention>
<retention>
<age>3600</age>
<precision>300</precision>
</retention>
<retention>
<age>86400</age>
<precision>3600</precision>
</retention>
</default>
</graphite_rollup>
```
!!! warning "Внимание"
Прореживание данных производится во время слияний. Обычно для старых партиций слияния не запускаются, поэтому для прореживания надо инициировать незапланированное слияние используя [optimize](../../../sql-reference/statements/optimize.md). Или использовать дополнительные инструменты, например [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer).

View File

@ -48,10 +48,8 @@ CREATE TABLE merge.hits_buffer AS merge.hits ENGINE = Buffer(merge, hits, 16, 10
Если у одного из столбцов таблицы Buffer и подчинённой таблицы не совпадает тип, то в лог сервера будет записано сообщение об ошибке и буфер будет очищен.
То же самое происходит, если подчинённая таблица не существует в момент сброса буфера.
Если есть необходимость выполнить ALTER для подчинённой таблицы и для таблицы Buffer, то рекомендуется удалить таблицу Buffer, затем выполнить ALTER подчинённой таблицы, а после создать таблицу Buffer заново.
!!! attention "Внимание"
В релизах до 28 сентября 2020 года выполнение ALTER на таблице Buffer ломает структуру блоков и вызывает ошибку (см. [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117)), поэтому удаление буфера и его пересоздание — единственный вариант миграции для данного движка. Перед выполнением ALTER на таблице Buffer убедитесь, что в вашей версии эта ошибка устранена.
В релизах до 26 октября 2021 года выполнение ALTER на таблице Buffer ломает структуру блоков и вызывает ошибку (см. [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117) и [#30565](https://github.com/ClickHouse/ClickHouse/pull/30565)), поэтому удаление буфера и его пересоздание — единственный вариант миграции для данного движка. Перед выполнением ALTER на таблице Buffer убедитесь, что в вашей версии эта ошибка устранена.
При нештатном перезапуске сервера, данные, находящиеся в буфере, будут потеряны.

View File

@ -105,7 +105,7 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
```xml
<encryption_codecs>
<aes_128_gcm_siv>
<key_hex from_env="KEY"></key_hex>
<key_hex from_env="ENVVAR"></key_hex>
</aes_128_gcm_siv>
</encryption_codecs>
```
@ -118,7 +118,7 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
<encryption_codecs>
<aes_128_gcm_siv>
<key_hex id="0">00112233445566778899aabbccddeeff</key_hex>
<key_hex id="1" from_env=".."></key_hex>
<key_hex id="1" from_env="ENVVAR"></key_hex>
<current_key_id>1</current_key_id>
</aes_128_gcm_siv>
</encryption_codecs>

View File

@ -4,11 +4,11 @@ toc_title: "Функции для работы с индексами H3"
# Функции для работы с индексами H3 {#h3index}
[H3](https://eng.uber.com/h3/) — это система геокодирования, которая делит поверхность Земли на равные шестигранные ячейки. Система поддерживает иерархию (вложенность) ячеек, т.е. каждый "родительский" шестигранник может быть поделен на семь одинаковых вложенных "дочерних" шестигранников, и так далее.
[H3](https://eng.uber.com/h3/) — это система геокодирования, которая делит поверхность Земли на равные шестиугольные ячейки. Система поддерживает иерархию (вложенность) ячеек, т.е. каждый "родительский" шестиугольник может быть поделен на семь одинаковых вложенных "дочерних" шестиугольников, и так далее.
Уровень вложенности называется "разрешением" и может принимать значение от `0` до `15`, где `0` соответствует "базовым" ячейкам самого верхнего уровня (наиболее крупным).
Для каждой точки, имеющей широту и долготу, можно получить 64-битный индекс H3, соответствующий номеру шестигранной ячейки, где эта точка находится.
Для каждой точки, имеющей широту и долготу, можно получить 64-битный индекс H3, соответствующий номеру шестриугольной ячейки, где эта точка находится.
Индексы H3 используются, в основном, для геопозиционирования и расчета расстояний.
@ -24,7 +24,7 @@ h3IsValid(h3index)
**Параметр**
- `h3index` — идентификатор шестигранника. Тип данных: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `h3index` — идентификатор шестриугольника. Тип данных: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Возвращаемые значения**
@ -61,7 +61,7 @@ h3GetResolution(h3index)
**Параметр**
- `h3index` — идентификатор шестигранника. Тип данных: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `h3index` — идентификатор шестиугольника. Тип данных: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Возвращаемые значения**
@ -88,7 +88,7 @@ SELECT h3GetResolution(639821929606596015) AS resolution;
## h3EdgeAngle {#h3edgeangle}
Рассчитывает средний размер стороны шестигранника [H3](#h3index) в градусах.
Рассчитывает средний размер стороны шестиугольника [H3](#h3index) в градусах.
**Синтаксис**
@ -102,7 +102,7 @@ h3EdgeAngle(resolution)
**Возвращаемое значение**
- Средняя длина стороны шестигранника [H3](#h3index) в градусах. Тип данных: [Float64](../../../sql-reference/data-types/float.md).
- Средняя длина стороны шестиугольника [H3](#h3index) в градусах. Тип данных: [Float64](../../../sql-reference/data-types/float.md).
**Пример**
@ -122,7 +122,7 @@ SELECT h3EdgeAngle(10) AS edgeAngle;
## h3EdgeLengthM {#h3edgelengthm}
Рассчитывает средний размер стороны шестигранника [H3](#h3index) в метрах.
Рассчитывает средний размер стороны шестиугольника [H3](#h3index) в метрах.
**Синтаксис**
@ -136,7 +136,7 @@ h3EdgeLengthM(resolution)
**Возвращаемое значение**
- Средняя длина стороны шестигранника H3 в метрах, тип — [Float64](../../../sql-reference/data-types/float.md).
- Средняя длина стороны шестиугольника H3 в метрах, тип — [Float64](../../../sql-reference/data-types/float.md).
**Пример**
@ -172,7 +172,7 @@ geoToH3(lon, lat, resolution)
**Возвращаемые значения**
- Порядковый номер шестигранника.
- Порядковый номер шестиугольника.
- 0 в случае ошибки.
Тип данных: [UInt64](../../../sql-reference/data-types/int-uint.md).
@ -195,7 +195,7 @@ SELECT geoToH3(37.79506683, 55.71290588, 15) AS h3Index;
## h3ToGeo {#h3togeo}
Возвращает географические координаты долготы и широты центра шестигранника, соответствующие указанному [H3](#h3index)-индексу.
Возвращает географические координаты долготы и широты центра шестиугольника, соответствующие указанному [H3](#h3index)-индексу.
**Синтаксис**
@ -265,7 +265,7 @@ SELECT h3ToGeoBoundary(644325524701193974) AS coordinates;
## h3kRing {#h3kring}
Возвращает [H3](#h3index)-индексы шестигранников в радиусе `k` от данного в произвольном порядке.
Возвращает [H3](#h3index)-индексы шестиугольника в радиусе `k` от данного в произвольном порядке.
**Синтаксис**
@ -275,7 +275,7 @@ h3kRing(h3index, k)
**Аргументы**
- `h3index` — идентификатор шестигранника. Тип данных: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `h3index` — идентификатор шестиугольника. Тип данных: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `k` — радиус. Тип данных: [целое число](../../../sql-reference/data-types/int-uint.md)
**Возвращаемые значения**
@ -607,7 +607,7 @@ h3IsResClassIII(index)
**Параметр**
- `index` — порядковый номер шестигранника. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `index` — порядковый номер шестиугольника. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Возвращаемые значения**
@ -644,7 +644,7 @@ h3IsPentagon(index)
**Параметр**
- `index` — порядковый номер шестигранника. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `index` — порядковый номер шестиугольника. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Возвращаемые значения**

View File

@ -246,6 +246,46 @@ CREATE TABLE codec_example
ENGINE = MergeTree()
```
### Кодеки шифрования {#create-query-encryption-codecs}
Эти кодеки не сжимают данные, вместо этого они зашифровывают данные на диске. Воспользоваться кодеками можно, только когда ключ шифрования задан параметрами [шифрования](../../../operations/server-configuration-parameters/settings.md#server-settings-encryption). Обратите внимание: ставить кодеки шифрования имеет смысл в самый конец цепочки кодеков, потому что зашифрованные данные, как правило, нельзя сжать релевантным образом.
Кодеки шифрования:
- `CODEC('AES-128-GCM-SIV')` — Зашифровывает данные с помощью AES-128 в режиме [RFC 8452](https://tools.ietf.org/html/rfc8452) GCM-SIV.
- `CODEC('AES-256-GCM-SIV')` — Зашифровывает данные с помощью AES-256 в режиме GCM-SIV.
Эти кодеки используют фиксированный одноразовый ключ шифрования. Таким образом, это детерминированное шифрование. Оно совместимо с поддерживающими дедупликацию движками, в частности, [ReplicatedMergeTree](../../../engines/table-engines/mergetree-family/replication.md). Однако у шифрования имеется недостаток: если дважды зашифровать один и тот же блок данных, текст на выходе получится одинаковым, и злоумышленник, у которого есть доступ к диску, заметит эту эквивалентность (при этом доступа к содержимому он не получит).
!!! attention "Внимание"
Большинство движков, включая семейство `MergeTree`, создают на диске индексные файлы, не применяя кодеки. А значит, в том случае, если зашифрованный столбец индексирован, на диске отобразится незашифрованный текст.
!!! attention "Внимание"
Если вы выполняете запрос SELECT с упоминанием конкретного значения в зашифрованном столбце (например, при использовании секции WHERE), это значение может появиться в [system.query_log](../../../operations/system-tables/query_log.md). Рекомендуем отключить логирование.
**Пример**
```sql
CREATE TABLE mytable
(
x String Codec(AES_128_GCM_SIV)
)
ENGINE = MergeTree ORDER BY x;
```
!!!note "Замечание"
Если необходимо применить сжатие, это нужно явно прописать в запросе. Без этого будет выполнено только шифрование данных.
**Пример**
```sql
CREATE TABLE mytable
(
x String Codec(Delta, LZ4, AES_128_GCM_SIV)
)
ENGINE = MergeTree ORDER BY x;
```
## Временные таблицы {#temporary-tables}
ClickHouse поддерживает временные таблицы со следующими характеристиками:

View File

@ -43,7 +43,7 @@ CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1]
- `HOST ANY` — Пользователь может подключиться с любого хоста. Используется по умолчанию.
- `HOST LOCAL` — Пользователь может подключиться только локально.
- `HOST NAME 'fqdn'` — Хост задается через FQDN. Например, `HOST NAME 'mysite.com'`.
- `HOST NAME REGEXP 'regexp'` — Позволяет использовать регулярные выражения [pcre](http://www.pcre.org/), чтобы задать хосты. Например, `HOST NAME REGEXP '.*\.mysite\.com'`.
- `HOST REGEXP 'regexp'` — Позволяет использовать регулярные выражения [pcre](http://www.pcre.org/), чтобы задать хосты. Например, `HOST REGEXP '.*\.mysite\.com'`.
- `HOST LIKE 'template'` — Позволяет использовать оператор [LIKE](../../functions/string-search-functions.md#function-like) для фильтрации хостов. Например, `HOST LIKE '%'` эквивалентен `HOST ANY`; `HOST LIKE '%.mysite.com'` разрешает подключение со всех хостов в домене `mysite.com`.
Также, чтобы задать хост, вы можете использовать `@` вместе с именем пользователя. Примеры:

View File

@ -28,7 +28,7 @@ INSERT INTO t VALUES (1, 'Hello, world'), (2, 'abc'), (3, 'def')
## Комментарии {#comments}
Поддерживаются комментарии в SQL-стиле и C-стиле.
Комментарии в SQL-стиле: от `--` до конца строки. Пробел после `--` может не ставиться.
Комментарии в SQL-стиле: от `--`, `#!` или `# ` до конца строки. Пробел после `--` и `#!` может не ставиться.
Комментарии в C-стиле: от `/*` до `*/`. Такие комментарии могут быть многострочными. Пробелы тоже не обязательны.
## Ключевые слова {#syntax-keywords}
@ -104,9 +104,9 @@ INSERT INTO t VALUES (1, 'Hello, world'), (2, 'abc'), (3, 'def')
### Heredoc {#heredeoc}
Синтаксис [heredoc](https://ru.wikipedia.org/wiki/Heredoc-синтаксис) — это способ определения строк с сохранением исходного формата (часто с переносом строки). `Heredoc` задается как произвольный строковый литерал между двумя символами `$`, например `$heredoc$`. Значение между двумя `heredoc` обрабатывается "как есть".
Синтаксис [heredoc](https://ru.wikipedia.org/wiki/Heredoc-синтаксис) — это способ определения строк с сохранением исходного формата (часто с переносом строки). `Heredoc` задается как произвольный строковый литерал между двумя символами `$`, например `$heredoc$`. Значение между двумя `heredoc` обрабатывается "как есть".
Синтаксис `heredoc` часто используют для вставки кусков кода SQL, HTML, XML и т.п.
Синтаксис `heredoc` часто используют для вставки кусков кода SQL, HTML, XML и т.п.
**Пример**

View File

@ -62,7 +62,7 @@ def build_for_lang(lang, args):
strict=True,
theme=theme_cfg,
nav=blog_nav,
copyright='©20162021 ClickHouse, Inc.',
copyright='©20162022 ClickHouse, Inc.',
use_directory_urls=True,
repo_name='ClickHouse/ClickHouse',
repo_url='https://github.com/ClickHouse/ClickHouse/',
@ -97,10 +97,6 @@ def build_for_lang(lang, args):
with open(os.path.join(args.blog_output_dir, lang, 'rss.xml'), 'w') as f:
f.write(rss_template.render({'config': raw_config}))
# TODO: AMP for blog
# if not args.skip_amp:
# amp.build_amp(lang, args, cfg)
logging.info(f'Finished building {lang} blog')
except exceptions.ConfigurationError as e:

View File

@ -90,7 +90,10 @@ def concatenate(lang, docs_path, single_page_file, nav):
line)
# If failed to replace the relative link, print to log
if '../' in line:
# But with some exceptions:
# - "../src/" -- for cmake-in-clickhouse.md (link to sources)
# - "../usr/share" -- changelog entry that has "../usr/share/zoneinfo"
if '../' in line and (not '../usr/share' in line) and (not '../src/' in line):
logging.info('Failed to resolve relative link:')
logging.info(path)
logging.info(line)

View File

@ -26,4 +26,6 @@ toc_title: Introduction
- [Replicated](../../engines/database-engines/replicated.md)
- [SQLite](../../engines/database-engines/sqlite.md)
[来源文章](https://clickhouse.com/docs/en/database_engines/) <!--hide-->

View File

@ -1 +0,0 @@
../../../en/engines/database-engines/sqlite.md

View File

@ -0,0 +1,80 @@
---
toc_priority: 32
toc_title: SQLite
---
# SQLite {#sqlite}
允许连接到[SQLite](https://www.sqlite.org/index.html)数据库并支持ClickHouse和SQLite交换数据 执行 `INSERT``SELECT` 查询。
## 创建一个数据库 {#creating-a-database}
``` sql
CREATE DATABASE sqlite_database
ENGINE = SQLite('db_path')
```
**引擎参数**
- `db_path` — SQLite 数据库文件的路径.
## 数据类型的支持 {#data_types-support}
| SQLite | ClickHouse |
|---------------|---------------------------------------------------------|
| INTEGER | [Int32](../../sql-reference/data-types/int-uint.md) |
| REAL | [Float32](../../sql-reference/data-types/float.md) |
| TEXT | [String](../../sql-reference/data-types/string.md) |
| BLOB | [String](../../sql-reference/data-types/string.md) |
## 技术细节和建议 {#specifics-and-recommendations}
SQLite将整个数据库(定义、表、索引和数据本身)存储为主机上的单个跨平台文件。在写入过程中SQLite会锁定整个数据库文件因此写入操作是顺序执行的。读操作可以是多任务的。
SQLite不需要服务管理(如启动脚本)或基于`GRANT`和密码的访问控制。访问控制是通过授予数据库文件本身的文件系统权限来处理的。
## 使用示例 {#usage-example}
数据库在ClickHouse连接到SQLite:
``` sql
CREATE DATABASE sqlite_db ENGINE = SQLite('sqlite.db');
SHOW TABLES FROM sqlite_db;
```
``` text
┌──name───┐
│ table1 │
│ table2 │
└─────────┘
```
展示数据表中的内容:
``` sql
SELECT * FROM sqlite_db.table1;
```
``` text
┌─col1──┬─col2─┐
│ line1 │ 1 │
│ line2 │ 2 │
│ line3 │ 3 │
└───────┴──────┘
```
从ClickHouse表插入数据到SQLite表:
``` sql
CREATE TABLE clickhouse_table(`col1` String,`col2` Int16) ENGINE = MergeTree() ORDER BY col2;
INSERT INTO clickhouse_table VALUES ('text',10);
INSERT INTO sqlite_db.table1 SELECT * FROM clickhouse_table;
SELECT * FROM sqlite_db.table1;
```
``` text
┌─col1──┬─col2─┐
│ line1 │ 1 │
│ line2 │ 2 │
│ line3 │ 3 │
│ text │ 10 │
└───────┴──────┘
```

View File

@ -1 +0,0 @@
../../../en/faq/general/columnar-database.md

View File

@ -0,0 +1,25 @@
---
title: 什么是列存储数据库?
toc_hidden: true
toc_priority: 101
---
# 什么是列存储数据库? {#what-is-a-columnar-database}
列存储数据库独立存储每个列的数据。这只允许从磁盘读取任何给定查询中使用的列的数据。其代价是影响整行的操作会按比例变得更昂贵。列存储数据库的同义词是面向列的数据库管理系统。ClickHouse就是这样一个典型的例子。
列存储数据库的主要优点是:
- 查询只使用许多列其中的少数列。
— 聚合对大量数据的查询。
— 按列压缩。
下面是构建报表时传统的面向行系统和柱状数据库之间的区别:
**传统行存储**
!(传统行存储)(https://clickhouse.com/docs/en/images/row-oriented.gif)
**列存储**
!(列存储)(https://clickhouse.com/docs/en/images/column-oriented.gif)
列存储数据库是分析应用程序的首选因为它允许在一个表中有许多列以防万一但不会在读取查询执行时为未使用的列付出代价。面向列的数据库是为大数据处理而设计的因为和数据仓库一样它们通常使用分布式的低成本硬件集群来提高吞吐量。ClickHouse结合了[分布式](../../engines/table-engines/special/distributed.md)和[复制式](../../engines/table-engines/mergetree-family/replication.md)两类表。

View File

@ -1 +0,0 @@
../../../en/faq/general/dbms-naming.md

View File

@ -0,0 +1,17 @@
---
title: "\u201CClickHouse\u201D 有什么含义?"
toc_hidden: true
toc_priority: 10
---
# “ClickHouse” 有什么含义? {#what-does-clickhouse-mean}
它是“**点击**流”和“数据**仓库**”的组合。它来自于Yandex最初的用例。在Metrica网站上ClickHouse本应该保存人们在互联网上的所有点击记录现在它仍然在做这项工作。你可以在[ClickHouse history](../../introduction/history.md)页面上阅读更多关于这个用例的信息。
这个由两部分组成的意思有两个结果:
- 唯一正确的写“Click**H** house”的方式是用大写H。
- 如果需要缩写,请使用“**CH**”。由于一些历史原因缩写CK在中国也很流行主要是因为中文中最早的一个关于ClickHouse的演讲使用了这种形式。
!!! info “有趣的事实”
多年后ClickHouse闻名于世, 这种命名方法:结合各有深意的两个词被赞扬为最好的数据库命名方式, 卡内基梅隆大学数据库副教授[Andy Pavlo做的研究](https://www.cs.cmu.edu/~pavlo/blog/2020/03/on-naming-a-database-management-system.html) 。ClickHouse与Postgres共同获得“史上最佳数据库名”奖。

View File

@ -1 +0,0 @@
../../../en/faq/general/how-do-i-contribute-code-to-clickhouse.md

View File

@ -0,0 +1,17 @@
---
title: 我如何为ClickHouse贡献代码?
toc_hidden: true
toc_priority: 120
---
# 我如何为ClickHouse贡献代码? {#how-do-i-contribute-code-to-clickhouse}
ClickHouse是一个开源项目[在GitHub上开发](https://github.com/ClickHouse/ClickHouse)。
按照惯例,贡献指南发布在源代码库根目录的 [CONTRIBUTING.md](https://github.com/ClickHouse/ClickHouse/blob/master/CONTRIBUTING.md)文件中。
如果你想对ClickHouse提出实质性的改变建议可以考虑[在GitHub上发布一个问题](https://github.com/ClickHouse/ClickHouse/issues/new/choose),解释一下你想做什么,先与维护人员和社区讨论一下。[此类RFC问题的例子](https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aissue+is%3Aopen+rfc)。
如果您的贡献与安全相关,也请查看[我们的安全政策](https://github.com/ClickHouse/ClickHouse/security/policy/)。

View File

@ -1 +0,0 @@
../../../en/faq/general/index.md

View File

@ -0,0 +1,27 @@
---
title: ClickHouse 有关常见问题
toc_hidden_folder: true
toc_priority: 1
toc_title: General
---
# ClickHouse 有关常见问题 {#general-questions}
常见问题:
- [什么是 ClickHouse?](../../index.md#what-is-clickhouse)
- [为何 ClickHouse 如此迅捷?](../../faq/general/why-clickhouse-is-so-fast.md)
- [谁在使用 ClickHouse?](../../faq/general/who-is-using-clickhouse.md)
- [“ClickHouse” 有什么含义?](../../faq/general/dbms-naming.md)
- [ “Не тормозит” 有什么含义?](../../faq/general/ne-tormozit.md)
- [什么是 OLAP?](../../faq/general/olap.md)
- [什么是列存储数据库?](../../faq/general/columnar-database.md)
- [为何不使用 MapReduce等技术?](../../faq/general/mapreduce.md)
- [我如何为 ClickHouse贡献代码?](../../faq/general/how-do-i-contribute-code-to-clickhouse.md)
!!! info "没找到您需要的内容?"
请查阅 [其他 F.A.Q. 类别](../../faq/index.md) 或者从左侧导航栏浏览其他文档
{## [原始文档](https://clickhouse.com/docs/en/faq/general/) ##}

View File

@ -1 +0,0 @@
../../../en/faq/general/mapreduce.md

Some files were not shown because too many files have changed in this diff Show More