diff --git a/.clang-format b/.clang-format
index c8b9672dc7d..d8f273702c8 100644
--- a/.clang-format
+++ b/.clang-format
@@ -12,6 +12,7 @@ BraceWrapping:
AfterUnion: true
BeforeCatch: true
BeforeElse: true
+ BeforeLambdaBody: true
IndentBraces: false
BreakConstructorInitializersBeforeComma: false
Cpp11BracedListStyle: true
diff --git a/.clang-tidy b/.clang-tidy
index ecb8ac6dcbf..ddd0ee6d911 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -142,6 +142,7 @@ Checks: '-*,
clang-analyzer-cplusplus.PlacementNewChecker,
clang-analyzer-cplusplus.SelfAssignment,
clang-analyzer-deadcode.DeadStores,
+ clang-analyzer-cplusplus.Move,
clang-analyzer-optin.cplusplus.VirtualCall,
clang-analyzer-security.insecureAPI.UncheckedReturn,
clang-analyzer-security.insecureAPI.bcmp,
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 91b9ea5bf3d..5816a58081d 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -86,6 +86,7 @@ jobs:
StyleCheck:
needs: DockerHubPush
runs-on: [self-hosted, style-checker]
+ if: ${{ success() || failure() }}
steps:
- name: Set envs
run: |
@@ -93,6 +94,8 @@ jobs:
TEMP_PATH=${{ runner.temp }}/style_check
EOF
- name: Download changed images
+ # even if artifact does not exist, e.g. on `do not test` label or failed Docker job
+ continue-on-error: true
uses: actions/download-artifact@v2
with:
name: changed_images
@@ -1062,6 +1065,41 @@ jobs:
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
+ FunctionalStatelessTestReleaseS3:
+ needs: [BuilderDebRelease]
+ runs-on: [self-hosted, func-tester]
+ steps:
+ - name: Set envs
+ run: |
+ cat >> "$GITHUB_ENV" << 'EOF'
+ TEMP_PATH=${{runner.temp}}/stateless_s3_storage
+ REPORTS_PATH=${{runner.temp}}/reports_dir
+ CHECK_NAME=Stateless tests (release, s3 storage, actions)
+ REPO_COPY=${{runner.temp}}/stateless_s3_storage/ClickHouse
+ KILL_TIMEOUT=10800
+ EOF
+ - name: Download json reports
+ uses: actions/download-artifact@v2
+ with:
+ path: ${{ env.REPORTS_PATH }}
+ - name: Clear repository
+ run: |
+ sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+ - name: Check out repository code
+ uses: actions/checkout@v2
+ - name: Functional test
+ run: |
+ sudo rm -fr "$TEMP_PATH"
+ mkdir -p "$TEMP_PATH"
+ cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+ cd "$REPO_COPY/tests/ci"
+ python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+ - name: Cleanup
+ if: always()
+ run: |
+ docker kill "$(docker ps -q)" ||:
+ docker rm -f "$(docker ps -a -q)" ||:
+ sudo rm -fr "$TEMP_PATH"
FunctionalStatelessTestAarch64:
needs: [BuilderDebAarch64]
runs-on: [self-hosted, func-tester-aarch64]
@@ -2841,6 +2879,7 @@ jobs:
- FunctionalStatefulTestDebug
- FunctionalStatefulTestRelease
- FunctionalStatefulTestReleaseDatabaseOrdinary
+ - FunctionalStatelessTestReleaseS3
- FunctionalStatefulTestAarch64
- FunctionalStatefulTestAsan
- FunctionalStatefulTestTsan
diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
new file mode 100644
index 00000000000..a172947b2fc
--- /dev/null
+++ b/.github/workflows/nightly.yml
@@ -0,0 +1,73 @@
+name: NightlyBuilds
+
+env:
+ # Force the stdout and stderr streams to be unbuffered
+ PYTHONUNBUFFERED: 1
+
+"on":
+ schedule:
+ - cron: '13 3 * * *'
+
+jobs:
+ DockerHubPushAarch64:
+ runs-on: [self-hosted, style-checker-aarch64]
+ steps:
+ - name: Clear repository
+ run: |
+ sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+ - name: Check out repository code
+ uses: actions/checkout@v2
+ - name: Images check
+ run: |
+ cd "$GITHUB_WORKSPACE/tests/ci"
+ python3 docker_images_check.py --suffix aarch64 --all
+ - name: Upload images files to artifacts
+ uses: actions/upload-artifact@v2
+ with:
+ name: changed_images_aarch64
+ path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json
+ DockerHubPushAmd64:
+ runs-on: [self-hosted, style-checker]
+ steps:
+ - name: Clear repository
+ run: |
+ sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+ - name: Check out repository code
+ uses: actions/checkout@v2
+ - name: Images check
+ run: |
+ cd "$GITHUB_WORKSPACE/tests/ci"
+ python3 docker_images_check.py --suffix amd64 --all
+ - name: Upload images files to artifacts
+ uses: actions/upload-artifact@v2
+ with:
+ name: changed_images_amd64
+ path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json
+ DockerHubPush:
+ needs: [DockerHubPushAmd64, DockerHubPushAarch64]
+ runs-on: [self-hosted, style-checker]
+ steps:
+ - name: Clear repository
+ run: |
+ sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+ - name: Check out repository code
+ uses: actions/checkout@v2
+ - name: Download changed aarch64 images
+ uses: actions/download-artifact@v2
+ with:
+ name: changed_images_aarch64
+ path: ${{ runner.temp }}
+ - name: Download changed amd64 images
+ uses: actions/download-artifact@v2
+ with:
+ name: changed_images_amd64
+ path: ${{ runner.temp }}
+ - name: Images check
+ run: |
+ cd "$GITHUB_WORKSPACE/tests/ci"
+ python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64
+ - name: Upload images files to artifacts
+ uses: actions/upload-artifact@v2
+ with:
+ name: changed_images
+ path: ${{ runner.temp }}/changed_images.json
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index cd8517de8fe..960e24d693c 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -111,6 +111,7 @@ jobs:
StyleCheck:
needs: DockerHubPush
runs-on: [self-hosted, style-checker]
+ if: ${{ success() || failure() }}
steps:
- name: Set envs
run: |
@@ -118,6 +119,8 @@ jobs:
TEMP_PATH=${{ runner.temp }}/style_check
EOF
- name: Download changed images
+ # even if artifact does not exist, e.g. on `do not test` label or failed Docker job
+ continue-on-error: true
uses: actions/download-artifact@v2
with:
name: changed_images
@@ -1212,6 +1215,41 @@ jobs:
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
+ FunctionalStatelessTestReleaseS3:
+ needs: [BuilderDebRelease]
+ runs-on: [self-hosted, func-tester]
+ steps:
+ - name: Set envs
+ run: |
+ cat >> "$GITHUB_ENV" << 'EOF'
+ TEMP_PATH=${{runner.temp}}/stateless_s3_storage
+ REPORTS_PATH=${{runner.temp}}/reports_dir
+ CHECK_NAME=Stateless tests (release, s3 storage, actions)
+ REPO_COPY=${{runner.temp}}/stateless_s3_storage/ClickHouse
+ KILL_TIMEOUT=10800
+ EOF
+ - name: Download json reports
+ uses: actions/download-artifact@v2
+ with:
+ path: ${{ env.REPORTS_PATH }}
+ - name: Clear repository
+ run: |
+ sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+ - name: Check out repository code
+ uses: actions/checkout@v2
+ - name: Functional test
+ run: |
+ sudo rm -fr "$TEMP_PATH"
+ mkdir -p "$TEMP_PATH"
+ cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+ cd "$REPO_COPY/tests/ci"
+ python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+ - name: Cleanup
+ if: always()
+ run: |
+ docker kill "$(docker ps -q)" ||:
+ docker rm -f "$(docker ps -a -q)" ||:
+ sudo rm -fr "$TEMP_PATH"
FunctionalStatelessTestAarch64:
needs: [BuilderDebAarch64]
runs-on: [self-hosted, func-tester-aarch64]
@@ -3034,6 +3072,7 @@ jobs:
- FunctionalStatefulTestTsan
- FunctionalStatefulTestMsan
- FunctionalStatefulTestUBsan
+ - FunctionalStatelessTestReleaseS3
- StressTestDebug
- StressTestAsan
- StressTestTsan
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 77bc285196c..46e36c846d0 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -22,7 +22,6 @@ jobs:
- name: Check out repository code
uses: actions/checkout@v2
- name: Download packages and push to Artifactory
- env:
run: |
rm -rf "$TEMP_PATH" && mkdir -p "$REPO_COPY"
cp -r "$GITHUB_WORKSPACE" "$REPO_COPY"
diff --git a/.github/workflows/tags_stable.yml b/.github/workflows/tags_stable.yml
new file mode 100644
index 00000000000..30b6bfb027e
--- /dev/null
+++ b/.github/workflows/tags_stable.yml
@@ -0,0 +1,38 @@
+name: TagsStableWorkflow
+# - Gets artifacts from S3
+# - Sends it to JFROG Artifactory
+# - Adds them to the release assets
+
+on: # yamllint disable-line rule:truthy
+ push:
+ tags:
+ - 'v*-stable'
+ - 'v*-lts'
+
+
+jobs:
+ UpdateVersions:
+ runs-on: [self-hosted, style-checker]
+ steps:
+ - name: Get tag name
+ run: echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV"
+ - name: Check out repository code
+ uses: actions/checkout@v2
+ with:
+ ref: master
+ - name: Generate versions
+ run: |
+ git fetch --tags
+ ./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv
+ - name: Create Pull Request
+ uses: peter-evans/create-pull-request@v3
+ with:
+ commit-message: Update version_date.tsv after ${{ env.GITHUB_TAG }}
+ branch: auto/${{ env.GITHUB_TAG }}
+ delete-branch: true
+ title: Update version_date.tsv after ${{ env.GITHUB_TAG }}
+ body: |
+ Update version_date.tsv after ${{ env.GITHUB_TAG }}
+
+ Changelog category (leave one):
+ - Not for changelog (changelog entry is not required)
diff --git a/.gitmodules b/.gitmodules
index ed023ab348b..91f4ddb2007 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -259,3 +259,6 @@
[submodule "contrib/azure"]
path = contrib/azure
url = https://github.com/ClickHouse-Extras/azure-sdk-for-cpp.git
+[submodule "contrib/minizip-ng"]
+ path = contrib/minizip-ng
+ url = https://github.com/zlib-ng/minizip-ng
diff --git a/.potato.yml b/.potato.yml
deleted file mode 100644
index 7cb87c58bd1..00000000000
--- a/.potato.yml
+++ /dev/null
@@ -1,27 +0,0 @@
-# This is the configuration file with settings for Potato.
-# Potato is an internal Yandex technology that allows us to sync internal [Yandex.Tracker](https://yandex.com/tracker/) and GitHub.
-
-# For all PRs where documentation is needed, just add a 'pr-feature' label and we will include it into documentation sprints.
-
-# The project name.
-name: clickhouse
-# Object handlers defines which handlers we use.
-handlers:
- # The handler for creating an Yandex.Tracker issue.
- - name: issue-create
- params:
- triggers:
- # The trigger for creating the Yandex.Tracker issue. When the specified event occurs, it transfers PR data to Yandex.Tracker.
- github:pullRequest:labeled:
- data:
- # The Yandex.Tracker queue to create the issue in. Each issue in Tracker belongs to one of the project queues.
- queue: CLICKHOUSEDOCS
- # The issue title.
- summary: '[Potato] Pull Request #{{pullRequest.number}}'
- # The issue description.
- description: >
- {{pullRequest.description}}
-
- Ссылка на Pull Request: {{pullRequest.webUrl}}
- # The condition for creating the Yandex.Tracker issue.
- condition: eventPayload.labels.filter(label => ['pr-feature'].includes(label.name)).length
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b11ea650dc7..f27e9cdbea4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -67,7 +67,7 @@ if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git" AND NOT EXISTS "${ClickHouse_SOURC
message (FATAL_ERROR "Submodules are not initialized. Run\n\tgit submodule update --init --recursive")
endif ()
-include (cmake/find/ccache.cmake)
+include (cmake/ccache.cmake)
# Take care to add prlimit in command line before ccache, or else ccache thinks that
# prlimit is compiler, and clang++ is its input file, and refuses to work with
@@ -182,7 +182,7 @@ if (COMPILER_CLANG)
if (HAS_USE_CTOR_HOMING)
# For more info see https://blog.llvm.org/posts/2021-04-05-constructor-homing-for-debug-info/
- if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO")
+ if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xclang -fuse-ctor-homing")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Xclang -fuse-ctor-homing")
endif()
@@ -247,8 +247,6 @@ endif()
if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
set(USE_DEBUG_HELPERS ON)
-else ()
- set(USE_DEBUG_HELPERS ON)
endif()
option(USE_DEBUG_HELPERS "Enable debug helpers" ${USE_DEBUG_HELPERS})
@@ -403,17 +401,6 @@ else ()
option(WERROR "Enable -Werror compiler option" ON)
endif ()
-if (WERROR)
- # Don't pollute CMAKE_CXX_FLAGS with -Werror as it will break some CMake checks.
- # Instead, adopt modern cmake usage requirement.
- target_compile_options(global-libs INTERFACE "-Werror")
-endif ()
-
-# Make this extra-checks for correct library dependencies.
-if (OS_LINUX AND NOT SANITIZE)
- target_link_options(global-libs INTERFACE "-Wl,--no-undefined")
-endif ()
-
# Increase stack size on Musl. We need big stack for our recursive-descend parser.
if (USE_MUSL)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-z,stack-size=2097152")
@@ -421,6 +408,7 @@ endif ()
include(cmake/dbms_glob_sources.cmake)
+add_library(global-group INTERFACE)
if (OS_LINUX OR OS_ANDROID)
include(cmake/linux/default_libs.cmake)
elseif (OS_DARWIN)
@@ -428,6 +416,18 @@ elseif (OS_DARWIN)
elseif (OS_FREEBSD)
include(cmake/freebsd/default_libs.cmake)
endif ()
+link_libraries(global-group)
+
+if (WERROR)
+ # Don't pollute CMAKE_CXX_FLAGS with -Werror as it will break some CMake checks.
+ # Instead, adopt modern cmake usage requirement.
+ target_compile_options(global-group INTERFACE "-Werror")
+endif ()
+
+# Make this extra-checks for correct library dependencies.
+if (OS_LINUX AND NOT SANITIZE)
+ target_link_options(global-group INTERFACE "-Wl,--no-undefined")
+endif ()
######################################
### Add targets below this comment ###
diff --git a/SECURITY.md b/SECURITY.md
index f002dd53ca9..ca3c8b439fd 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -22,9 +22,10 @@ The following versions of ClickHouse server are currently being supported with s
| 21.7 | :x: |
| 21.8 | ✅ |
| 21.9 | :x: |
-| 21.10 | ✅ |
+| 21.10 | :x: |
| 21.11 | ✅ |
| 21.12 | ✅ |
+| 22.1 | ✅ |
## Reporting a Vulnerability
diff --git a/base/base/LineReader.cpp b/base/base/LineReader.cpp
index 9491f957762..f4e741a54e7 100644
--- a/base/base/LineReader.cpp
+++ b/base/base/LineReader.cpp
@@ -2,7 +2,9 @@
#include
#include
+#include
+#include
#include
#include
#include
@@ -34,13 +36,37 @@ bool hasInputData()
return select(1, &fds, nullptr, nullptr, &timeout) == 1;
}
+struct NoCaseCompare
+{
+ bool operator()(const std::string & str1, const std::string & str2)
+ {
+ return std::lexicographical_compare(begin(str1), end(str1), begin(str2), end(str2), [](const char c1, const char c2)
+ {
+ return std::tolower(c1) < std::tolower(c2);
+ });
+ }
+};
+
+using Words = std::vector;
+template
+void addNewWords(Words & to, const Words & from, Compare comp)
+{
+ size_t old_size = to.size();
+ size_t new_size = old_size + from.size();
+
+ to.reserve(new_size);
+ to.insert(to.end(), from.begin(), from.end());
+ auto middle = to.begin() + old_size;
+ std::inplace_merge(to.begin(), middle, to.end(), comp);
+
+ auto last_unique = std::unique(to.begin(), to.end());
+ to.erase(last_unique, to.end());
}
-std::optional LineReader::Suggest::getCompletions(const String & prefix, size_t prefix_length) const
-{
- if (!ready)
- return std::nullopt;
+}
+replxx::Replxx::completions_t LineReader::Suggest::getCompletions(const String & prefix, size_t prefix_length)
+{
std::string_view last_word;
auto last_word_pos = prefix.find_last_of(word_break_characters);
@@ -48,21 +74,45 @@ std::optional LineReader::Suggest::getCompletio
last_word = prefix;
else
last_word = std::string_view(prefix).substr(last_word_pos + 1, std::string::npos);
-
/// last_word can be empty.
+ std::pair range;
+
+ std::lock_guard lock(mutex);
+
/// Only perform case sensitive completion when the prefix string contains any uppercase characters
- if (std::none_of(prefix.begin(), prefix.end(), [&](auto c) { return c >= 'A' && c <= 'Z'; }))
- return std::equal_range(
+ if (std::none_of(prefix.begin(), prefix.end(), [](char32_t x) { return iswupper(static_cast(x)); }))
+ range = std::equal_range(
words_no_case.begin(), words_no_case.end(), last_word, [prefix_length](std::string_view s, std::string_view prefix_searched)
{
return strncasecmp(s.data(), prefix_searched.data(), prefix_length) < 0;
});
else
- return std::equal_range(words.begin(), words.end(), last_word, [prefix_length](std::string_view s, std::string_view prefix_searched)
+ range = std::equal_range(words.begin(), words.end(), last_word, [prefix_length](std::string_view s, std::string_view prefix_searched)
{
return strncmp(s.data(), prefix_searched.data(), prefix_length) < 0;
});
+
+ return replxx::Replxx::completions_t(range.first, range.second);
+}
+
+void LineReader::Suggest::addWords(Words && new_words)
+{
+ Words new_words_no_case = new_words;
+ if (!new_words.empty())
+ {
+ std::sort(new_words.begin(), new_words.end());
+ std::sort(new_words_no_case.begin(), new_words_no_case.end(), NoCaseCompare{});
+ }
+
+ {
+ std::lock_guard lock(mutex);
+ addNewWords(words, new_words, std::less{});
+ addNewWords(words_no_case, new_words_no_case, NoCaseCompare{});
+
+ assert(std::is_sorted(words.begin(), words.end()));
+ assert(std::is_sorted(words_no_case.begin(), words_no_case.end(), NoCaseCompare{}));
+ }
}
LineReader::LineReader(const String & history_file_path_, bool multiline_, Patterns extenders_, Patterns delimiters_)
diff --git a/base/base/LineReader.h b/base/base/LineReader.h
index 12a856e2051..33daae49974 100644
--- a/base/base/LineReader.h
+++ b/base/base/LineReader.h
@@ -1,10 +1,12 @@
#pragma once
-#include
-
+#include
#include
#include
#include
+#include
+
+#include
class LineReader
{
@@ -12,14 +14,16 @@ public:
struct Suggest
{
using Words = std::vector;
- using WordsRange = std::pair;
+ /// Get vector for the matched range of words if any.
+ replxx::Replxx::completions_t getCompletions(const String & prefix, size_t prefix_length);
+ void addWords(Words && new_words);
+
+ private:
Words words;
Words words_no_case;
- std::atomic ready{false};
- /// Get iterators for the matched range of words if any.
- std::optional getCompletions(const String & prefix, size_t prefix_length) const;
+ std::mutex mutex;
};
using Patterns = std::vector;
diff --git a/base/base/ReplxxLineReader.cpp b/base/base/ReplxxLineReader.cpp
index 5d99da99c8c..9ea53bb132b 100644
--- a/base/base/ReplxxLineReader.cpp
+++ b/base/base/ReplxxLineReader.cpp
@@ -25,13 +25,6 @@ void trim(String & s)
s.erase(std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !std::isspace(ch); }).base(), s.end());
}
-/// Check if string ends with given character after skipping whitespaces.
-bool ends_with(const std::string_view & s, const std::string_view & p)
-{
- auto ss = std::string_view(s.data(), s.rend() - std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !std::isspace(ch); }));
- return ss.ends_with(p);
-}
-
std::string getEditor()
{
const char * editor = std::getenv("EDITOR");
@@ -132,8 +125,14 @@ void convertHistoryFile(const std::string & path, replxx::Replxx & rx)
}
+static bool replxx_last_is_delimiter = false;
+void ReplxxLineReader::setLastIsDelimiter(bool flag)
+{
+ replxx_last_is_delimiter = flag;
+}
+
ReplxxLineReader::ReplxxLineReader(
- const Suggest & suggest,
+ Suggest & suggest,
const String & history_file_path_,
bool multiline_,
Patterns extenders_,
@@ -179,14 +178,13 @@ ReplxxLineReader::ReplxxLineReader(
auto callback = [&suggest] (const String & context, size_t context_size)
{
- if (auto range = suggest.getCompletions(context, context_size))
- return Replxx::completions_t(range->first, range->second);
- return Replxx::completions_t();
+ return suggest.getCompletions(context, context_size);
};
rx.set_completion_callback(callback);
rx.set_complete_on_empty(false);
rx.set_word_break_characters(word_break_characters);
+ rx.set_ignore_case(true);
if (highlighter)
rx.set_highlighter_callback(highlighter);
@@ -198,21 +196,11 @@ ReplxxLineReader::ReplxxLineReader(
auto commit_action = [this](char32_t code)
{
- std::string_view str = rx.get_state().text();
-
- /// Always commit line when we see extender at the end. It will start a new prompt.
- for (const auto * extender : extenders)
- if (ends_with(str, extender))
- return rx.invoke(Replxx::ACTION::COMMIT_LINE, code);
-
- /// If we see an delimiter at the end, commit right away.
- for (const auto * delimiter : delimiters)
- if (ends_with(str, delimiter))
- return rx.invoke(Replxx::ACTION::COMMIT_LINE, code);
-
/// If we allow multiline and there is already something in the input, start a newline.
- if (multiline && !input.empty())
+ /// NOTE: Lexer is only available if we use highlighter.
+ if (highlighter && multiline && !replxx_last_is_delimiter)
return rx.invoke(Replxx::ACTION::NEW_LINE, code);
+ replxx_last_is_delimiter = false;
return rx.invoke(Replxx::ACTION::COMMIT_LINE, code);
};
/// bind C-j to ENTER action.
diff --git a/base/base/ReplxxLineReader.h b/base/base/ReplxxLineReader.h
index d4cc7de1e7a..b9ec214d02c 100644
--- a/base/base/ReplxxLineReader.h
+++ b/base/base/ReplxxLineReader.h
@@ -9,7 +9,7 @@ class ReplxxLineReader : public LineReader
{
public:
ReplxxLineReader(
- const Suggest & suggest,
+ Suggest & suggest,
const String & history_file_path,
bool multiline,
Patterns extenders_,
@@ -19,6 +19,9 @@ public:
void enableBracketedPaste() override;
+ /// If highlight is on, we will set a flag to denote whether the last token is a delimiter.
+ /// This is useful to determine the behavior of key when multiline is enabled.
+ static void setLastIsDelimiter(bool flag);
private:
InputStatus readOneLine(const String & prompt) override;
void addToHistory(const String & line) override;
diff --git a/base/base/logger_useful.h b/base/base/logger_useful.h
index 1237c6bd47c..ad7d6583f5e 100644
--- a/base/base/logger_useful.h
+++ b/base/base/logger_useful.h
@@ -12,6 +12,8 @@ namespace
{
template constexpr size_t numArgs(Ts &&...) { return sizeof...(Ts); }
template constexpr auto firstArg(T && x, Ts &&...) { return std::forward(x); }
+ /// For implicit conversion of fmt::basic_runtime<> to char* for std::string ctor
+ template constexpr auto firstArg(fmt::basic_runtime && data, Ts &&...) { return data.str.data(); }
}
diff --git a/base/base/sort.h b/base/base/sort.h
index 1f12cc62218..589469fffaa 100644
--- a/base/base/sort.h
+++ b/base/base/sort.h
@@ -1,26 +1,133 @@
#pragma once
+#include
+
+#ifndef NDEBUG
+
+#include
+#include
+
+/** Same as libcxx std::__debug_less. Just without dependency on private part of standard library.
+ * Check that Comparator induce strict weak ordering.
+ */
+template
+class DebugLessComparator
+{
+public:
+ constexpr DebugLessComparator(Comparator & cmp_)
+ : cmp(cmp_)
+ {}
+
+ template
+ constexpr bool operator()(const LhsType & lhs, const RhsType & rhs)
+ {
+ bool lhs_less_than_rhs = cmp(lhs, rhs);
+ if (lhs_less_than_rhs)
+ assert(!cmp(rhs, lhs));
+
+ return lhs_less_than_rhs;
+ }
+
+ template
+ constexpr bool operator()(LhsType & lhs, RhsType & rhs)
+ {
+ bool lhs_less_than_rhs = cmp(lhs, rhs);
+ if (lhs_less_than_rhs)
+ assert(!cmp(rhs, lhs));
+
+ return lhs_less_than_rhs;
+ }
+
+private:
+ Comparator & cmp;
+};
+
+template
+using ComparatorWrapper = DebugLessComparator;
+
+template
+void shuffle(RandomIt first, RandomIt last)
+{
+ static thread_local pcg64 rng(getThreadId());
+ std::shuffle(first, last, rng);
+}
+
+#else
+
+template
+using ComparatorWrapper = Comparator;
+
+#endif
+
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wold-style-cast"
#include
-template
+template
void nth_element(RandomIt first, RandomIt nth, RandomIt last)
{
- ::miniselect::floyd_rivest_select(first, nth, last);
+ using value_type = typename std::iterator_traits::value_type;
+ using comparator = std::less;
+
+ comparator compare;
+ ComparatorWrapper compare_wrapper = compare;
+
+#ifndef NDEBUG
+ ::shuffle(first, last);
+#endif
+
+ ::miniselect::floyd_rivest_select(first, nth, last, compare_wrapper);
+
+#ifndef NDEBUG
+ ::shuffle(first, nth);
+
+ if (nth != last)
+ ::shuffle(nth + 1, last);
+#endif
}
-template
-void partial_sort(RandomIt first, RandomIt middle, RandomIt last)
-{
- ::miniselect::floyd_rivest_partial_sort(first, middle, last);
-}
-
-template
+template
void partial_sort(RandomIt first, RandomIt middle, RandomIt last, Compare compare)
{
- ::miniselect::floyd_rivest_partial_sort(first, middle, last, compare);
+#ifndef NDEBUG
+ ::shuffle(first, last);
+#endif
+
+ ComparatorWrapper compare_wrapper = compare;
+ ::miniselect::floyd_rivest_partial_sort(first, middle, last, compare_wrapper);
+
+#ifndef NDEBUG
+ ::shuffle(middle, last);
+#endif
+}
+
+template
+void partial_sort(RandomIt first, RandomIt middle, RandomIt last)
+{
+ using value_type = typename std::iterator_traits::value_type;
+ using comparator = std::less;
+
+ ::partial_sort(first, middle, last, comparator());
}
#pragma GCC diagnostic pop
+
+template
+void sort(RandomIt first, RandomIt last, Compare compare)
+{
+#ifndef NDEBUG
+ ::shuffle(first, last);
+#endif
+
+ ComparatorWrapper compare_wrapper = compare;
+ ::pdqsort(first, last, compare_wrapper);
+}
+
+template
+void sort(RandomIt first, RandomIt last)
+{
+ using value_type = typename std::iterator_traits::value_type;
+ using comparator = std::less;
+ ::sort(first, last, comparator());
+}
diff --git a/base/base/wide_integer_impl.h b/base/base/wide_integer_impl.h
index ec146fd5821..ed2c2972cfe 100644
--- a/base/base/wide_integer_impl.h
+++ b/base/base/wide_integer_impl.h
@@ -12,6 +12,18 @@
#include
#include
+#include
+#include
+
+/// Use same extended double for all platforms
+#if (LDBL_MANT_DIG == 64)
+#define CONSTEXPR_FROM_DOUBLE constexpr
+using FromDoubleIntermediateType = long double;
+#else
+/// `wide_integer_from_builtin` can't be constexpr with non-literal `cpp_bin_float_double_extended`
+#define CONSTEXPR_FROM_DOUBLE
+using FromDoubleIntermediateType = boost::multiprecision::cpp_bin_float_double_extended;
+#endif
namespace wide
{
@@ -265,12 +277,23 @@ struct integer::_impl
constexpr static void set_multiplier(integer & self, T t) noexcept
{
constexpr uint64_t max_int = std::numeric_limits::max();
-
+ static_assert(std::is_same_v || std::is_same_v);
/// Implementation specific behaviour on overflow (if we don't check here, stack overflow will triggered in bigint_cast).
- if (!std::isfinite(t))
+ if constexpr (std::is_same_v)
{
- self = 0;
- return;
+ if (!std::isfinite(t))
+ {
+ self = 0;
+ return;
+ }
+ }
+ else
+ {
+ if (!boost::math::isfinite(t))
+ {
+ self = 0;
+ return;
+ }
}
const T alpha = t / static_cast(max_int);
@@ -278,13 +301,13 @@ struct integer::_impl
if (alpha <= static_cast(max_int))
self = static_cast(alpha);
else // max(double) / 2^64 will surely contain less than 52 precision bits, so speed up computations.
- set_multiplier(self, alpha);
+ set_multiplier(self, static_cast(alpha));
self *= max_int;
self += static_cast(t - floor(alpha) * static_cast(max_int)); // += b_i
}
- constexpr static void wide_integer_from_builtin(integer & self, double rhs) noexcept
+ CONSTEXPR_FROM_DOUBLE static void wide_integer_from_builtin(integer & self, double rhs) noexcept
{
constexpr int64_t max_int = std::numeric_limits::max();
constexpr int64_t min_int = std::numeric_limits::lowest();
@@ -294,24 +317,17 @@ struct integer::_impl
/// the result may not fit in 64 bits.
/// The example of such a number is 9.22337e+18.
/// As to_Integral does a static_cast to int64_t, it may result in UB.
- /// The necessary check here is that long double has enough significant (mantissa) bits to store the
+ /// The necessary check here is that FromDoubleIntermediateType has enough significant (mantissa) bits to store the
/// int64_t max value precisely.
- // TODO Be compatible with Apple aarch64
-#if not (defined(__APPLE__) && defined(__aarch64__))
- static_assert(LDBL_MANT_DIG >= 64,
- "On your system long double has less than 64 precision bits, "
- "which may result in UB when initializing double from int64_t");
-#endif
-
- if (rhs > static_cast(min_int) && rhs < static_cast(max_int))
+ if (rhs > static_cast(min_int) && rhs < static_cast(max_int))
{
self = static_cast(rhs);
return;
}
- const long double rhs_long_double = (static_cast(rhs) < 0)
- ? -static_cast(rhs)
+ const FromDoubleIntermediateType rhs_long_double = (static_cast(rhs) < 0)
+ ? -static_cast(rhs)
: rhs;
set_multiplier(self, rhs_long_double);
diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp
index f3026d7c87a..311349a2ba7 100644
--- a/base/daemon/BaseDaemon.cpp
+++ b/base/daemon/BaseDaemon.cpp
@@ -79,18 +79,14 @@ static void call_default_signal_handler(int sig)
raise(sig);
}
-static constexpr size_t max_query_id_size = 127;
-
static const size_t signal_pipe_buf_size =
sizeof(int)
+ sizeof(siginfo_t)
- + sizeof(ucontext_t)
+ + sizeof(ucontext_t*)
+ sizeof(StackTrace)
+ sizeof(UInt32)
- + max_query_id_size + 1 /// query_id + varint encoded length
+ sizeof(void*);
-
using signal_function = void(int, siginfo_t*, void*);
static void writeSignalIDtoSignalPipe(int sig)
@@ -129,18 +125,14 @@ static void signalHandler(int sig, siginfo_t * info, void * context)
char buf[signal_pipe_buf_size];
DB::WriteBufferFromFileDescriptorDiscardOnFailure out(signal_pipe.fds_rw[1], signal_pipe_buf_size, buf);
- const ucontext_t signal_context = *reinterpret_cast(context);
- const StackTrace stack_trace(signal_context);
-
- StringRef query_id = DB::CurrentThread::getQueryId(); /// This is signal safe.
- query_id.size = std::min(query_id.size, max_query_id_size);
+ const ucontext_t * signal_context = reinterpret_cast(context);
+ const StackTrace stack_trace(*signal_context);
DB::writeBinary(sig, out);
DB::writePODBinary(*info, out);
DB::writePODBinary(signal_context, out);
DB::writePODBinary(stack_trace, out);
DB::writeBinary(UInt32(getThreadId()), out);
- DB::writeStringBinary(query_id, out);
DB::writePODBinary(DB::current_thread, out);
out.next();
@@ -184,6 +176,8 @@ public:
void run() override
{
+ static_assert(PIPE_BUF >= 512);
+ static_assert(signal_pipe_buf_size <= PIPE_BUF, "Only write of PIPE_BUF to pipe is atomic and the minimal known PIPE_BUF across supported platforms is 512");
char buf[signal_pipe_buf_size];
DB::ReadBufferFromFileDescriptor in(signal_pipe.fds_rw[0], signal_pipe_buf_size, buf);
@@ -227,10 +221,9 @@ public:
else
{
siginfo_t info{};
- ucontext_t context{};
+ ucontext_t * context{};
StackTrace stack_trace(NoCapture{});
UInt32 thread_num{};
- std::string query_id;
DB::ThreadStatus * thread_ptr{};
if (sig != SanitizerTrap)
@@ -241,12 +234,11 @@ public:
DB::readPODBinary(stack_trace, in);
DB::readBinary(thread_num, in);
- DB::readBinary(query_id, in);
DB::readPODBinary(thread_ptr, in);
/// This allows to receive more signals if failure happens inside onFault function.
/// Example: segfault while symbolizing stack trace.
- std::thread([=, this] { onFault(sig, info, context, stack_trace, thread_num, query_id, thread_ptr); }).detach();
+ std::thread([=, this] { onFault(sig, info, context, stack_trace, thread_num, thread_ptr); }).detach();
}
}
}
@@ -279,18 +271,27 @@ private:
void onFault(
int sig,
const siginfo_t & info,
- const ucontext_t & context,
+ ucontext_t * context,
const StackTrace & stack_trace,
UInt32 thread_num,
- const std::string & query_id,
DB::ThreadStatus * thread_ptr) const
{
DB::ThreadStatus thread_status;
+ String query_id;
+ String query;
+
/// Send logs from this thread to client if possible.
/// It will allow client to see failure messages directly.
if (thread_ptr)
{
+ query_id = thread_ptr->getQueryId().toString();
+
+ if (auto thread_group = thread_ptr->getThreadGroup())
+ {
+ query = thread_group->query;
+ }
+
if (auto logs_queue = thread_ptr->getInternalTextLogsQueue())
DB::CurrentThread::attachInternalTextLogsQueue(logs_queue, DB::LogsLevel::trace);
}
@@ -305,19 +306,19 @@ private:
}
else
{
- LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (query_id: {}) Received signal {} ({})",
+ LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (query_id: {}) (query: {}) Received signal {} ({})",
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info,
- thread_num, query_id, strsignal(sig), sig);
+ thread_num, query_id, query, strsignal(sig), sig);
}
String error_message;
if (sig != SanitizerTrap)
- error_message = signalToErrorMessage(sig, info, context);
+ error_message = signalToErrorMessage(sig, info, *context);
else
error_message = "Sanitizer trap.";
- LOG_FATAL(log, error_message);
+ LOG_FATAL(log, fmt::runtime(error_message));
if (stack_trace.getSize())
{
@@ -330,11 +331,11 @@ private:
for (size_t i = stack_trace.getOffset(); i < stack_trace.getSize(); ++i)
bare_stacktrace << ' ' << stack_trace.getFramePointers()[i];
- LOG_FATAL(log, bare_stacktrace.str());
+ LOG_FATAL(log, fmt::runtime(bare_stacktrace.str()));
}
/// Write symbolized stack trace line by line for better grep-ability.
- stack_trace.toStringEveryLine([&](const std::string & s) { LOG_FATAL(log, s); });
+ stack_trace.toStringEveryLine([&](const std::string & s) { LOG_FATAL(log, fmt::runtime(s)); });
#if defined(OS_LINUX)
/// Write information about binary checksum. It can be difficult to calculate, so do it only after printing stack trace.
@@ -389,20 +390,16 @@ static void sanitizerDeathCallback()
const StackTrace stack_trace;
- StringRef query_id = DB::CurrentThread::getQueryId();
- query_id.size = std::min(query_id.size, max_query_id_size);
-
int sig = SignalListener::SanitizerTrap;
DB::writeBinary(sig, out);
DB::writePODBinary(stack_trace, out);
DB::writeBinary(UInt32(getThreadId()), out);
- DB::writeStringBinary(query_id, out);
DB::writePODBinary(DB::current_thread, out);
out.next();
/// The time that is usually enough for separate thread to print info into log.
- sleepForSeconds(10);
+ sleepForSeconds(20);
}
#endif
diff --git a/cmake/find/ccache.cmake b/cmake/ccache.cmake
similarity index 100%
rename from cmake/find/ccache.cmake
rename to cmake/ccache.cmake
diff --git a/cmake/find/cxx.cmake b/cmake/cxx.cmake
similarity index 100%
rename from cmake/find/cxx.cmake
rename to cmake/cxx.cmake
diff --git a/cmake/darwin/default_libs.cmake b/cmake/darwin/default_libs.cmake
index a6ee800d59b..1f92663a4b9 100644
--- a/cmake/darwin/default_libs.cmake
+++ b/cmake/darwin/default_libs.cmake
@@ -22,16 +22,12 @@ set(CMAKE_OSX_DEPLOYMENT_TARGET 10.15)
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
-include (cmake/find/cxx.cmake)
-
-add_library(global-group INTERFACE)
+include (cmake/cxx.cmake)
target_link_libraries(global-group INTERFACE
$
)
-link_libraries(global-group)
-
# FIXME: remove when all contribs will get custom cmake lists
install(
TARGETS global-group global-libs
diff --git a/cmake/freebsd/default_libs.cmake b/cmake/freebsd/default_libs.cmake
index a5847c95387..65d5f0511d9 100644
--- a/cmake/freebsd/default_libs.cmake
+++ b/cmake/freebsd/default_libs.cmake
@@ -22,17 +22,13 @@ set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS})
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
-include (cmake/find/unwind.cmake)
-include (cmake/find/cxx.cmake)
-
-add_library(global-group INTERFACE)
+include (cmake/unwind.cmake)
+include (cmake/cxx.cmake)
target_link_libraries(global-group INTERFACE
$
)
-link_libraries(global-group)
-
# FIXME: remove when all contribs will get custom cmake lists
install(
TARGETS global-group global-libs
diff --git a/cmake/linux/default_libs.cmake b/cmake/linux/default_libs.cmake
index 426ae482ea3..21bead7020c 100644
--- a/cmake/linux/default_libs.cmake
+++ b/cmake/linux/default_libs.cmake
@@ -42,18 +42,15 @@ if (NOT OS_ANDROID)
add_subdirectory(base/harmful)
endif ()
-include (cmake/find/unwind.cmake)
-include (cmake/find/cxx.cmake)
+include (cmake/unwind.cmake)
+include (cmake/cxx.cmake)
-add_library(global-group INTERFACE)
target_link_libraries(global-group INTERFACE
-Wl,--start-group
$
-Wl,--end-group
)
-link_libraries(global-group)
-
# FIXME: remove when all contribs will get custom cmake lists
install(
TARGETS global-group global-libs
diff --git a/cmake/find/unwind.cmake b/cmake/unwind.cmake
similarity index 100%
rename from cmake/find/unwind.cmake
rename to cmake/unwind.cmake
diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt
index 6172f231b6e..9cf307c473e 100644
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@@ -78,6 +78,7 @@ add_contrib (croaring-cmake croaring)
add_contrib (zstd-cmake zstd)
add_contrib (zlib-ng-cmake zlib-ng)
add_contrib (bzip2-cmake bzip2)
+add_contrib (minizip-ng-cmake minizip-ng)
add_contrib (snappy-cmake snappy)
add_contrib (rocksdb-cmake rocksdb)
add_contrib (thrift-cmake thrift)
diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt
index 54bfead6da7..2e4059efc17 100644
--- a/contrib/arrow-cmake/CMakeLists.txt
+++ b/contrib/arrow-cmake/CMakeLists.txt
@@ -29,12 +29,6 @@ if (OS_FREEBSD)
message (FATAL_ERROR "Using internal parquet library on FreeBSD is not supported")
endif()
-if(USE_STATIC_LIBRARIES)
- set(FLATBUFFERS_LIBRARY flatbuffers)
-else()
- set(FLATBUFFERS_LIBRARY flatbuffers_shared)
-endif()
-
set (CMAKE_CXX_STANDARD 17)
set(ARROW_VERSION "6.0.1")
@@ -95,9 +89,16 @@ set(FLATBUFFERS_BUILD_TESTS OFF CACHE BOOL "Skip flatbuffers tests")
add_subdirectory(${FLATBUFFERS_SRC_DIR} "${FLATBUFFERS_BINARY_DIR}")
-message(STATUS "FLATBUFFERS_LIBRARY: ${FLATBUFFERS_LIBRARY}")
+add_library(_flatbuffers INTERFACE)
+if(USE_STATIC_LIBRARIES)
+ target_link_libraries(_flatbuffers INTERFACE flatbuffers)
+else()
+ target_link_libraries(_flatbuffers INTERFACE flatbuffers_shared)
+endif()
+target_include_directories(_flatbuffers INTERFACE ${FLATBUFFERS_INCLUDE_DIR})
# === hdfs
+# NOTE: cannot use ch_contrib::hdfs since it's INCLUDE_DIRECTORIES does not includes trailing "hdfs/"
set(HDFS_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libhdfs3/include/hdfs/")
# arrow-cmake cmake file calling orc cmake subroutine which detects certain compiler features.
@@ -123,8 +124,6 @@ configure_file("${ORC_SOURCE_SRC_DIR}/Adaptor.hh.in" "${ORC_BUILD_INCLUDE_DIR}/A
# ARROW_ORC + adapters/orc/CMakefiles
set(ORC_SRCS
- "${ARROW_SRC_DIR}/arrow/adapters/orc/adapter.cc"
- "${ARROW_SRC_DIR}/arrow/adapters/orc/adapter_util.cc"
"${ORC_SOURCE_SRC_DIR}/Exceptions.cc"
"${ORC_SOURCE_SRC_DIR}/OrcFile.cc"
"${ORC_SOURCE_SRC_DIR}/Reader.cc"
@@ -151,6 +150,22 @@ set(ORC_SRCS
"${ORC_ADDITION_SOURCE_DIR}/orc_proto.pb.cc"
)
+add_library(_orc ${ORC_SRCS})
+target_link_libraries(_orc PRIVATE
+ ch_contrib::protobuf
+ ch_contrib::lz4
+ ch_contrib::snappy
+ ch_contrib::zlib
+ ch_contrib::zstd)
+target_include_directories(_orc SYSTEM BEFORE PUBLIC ${ORC_INCLUDE_DIR})
+target_include_directories(_orc SYSTEM BEFORE PUBLIC ${ORC_BUILD_INCLUDE_DIR})
+target_include_directories(_orc SYSTEM PRIVATE
+ ${ORC_SOURCE_SRC_DIR}
+ ${ORC_SOURCE_WRAP_DIR}
+ ${ORC_BUILD_SRC_DIR}
+ ${ORC_ADDITION_SOURCE_DIR}
+ ${ARROW_SRC_DIR})
+
# === arrow
@@ -336,7 +351,8 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/ipc/reader.cc"
"${LIBRARY_DIR}/ipc/writer.cc"
- ${ORC_SRCS}
+ "${ARROW_SRC_DIR}/arrow/adapters/orc/adapter.cc"
+ "${ARROW_SRC_DIR}/arrow/adapters/orc/adapter_util.cc"
)
add_definitions(-DARROW_WITH_LZ4)
@@ -356,30 +372,27 @@ endif ()
add_library(_arrow ${ARROW_SRCS})
-# Arrow dependencies
-add_dependencies(_arrow ${FLATBUFFERS_LIBRARY})
+target_link_libraries(_arrow PRIVATE
+ boost::filesystem
-target_link_libraries(_arrow PRIVATE ${FLATBUFFERS_LIBRARY} boost::filesystem)
+ _flatbuffers
+
+ ch_contrib::double_conversion
+
+ ch_contrib::lz4
+ ch_contrib::snappy
+ ch_contrib::zlib
+ ch_contrib::zstd
+ ch_contrib::zstd
+)
+target_link_libraries(_arrow PUBLIC _orc)
add_dependencies(_arrow protoc)
target_include_directories(_arrow SYSTEM BEFORE PUBLIC ${ARROW_SRC_DIR})
target_include_directories(_arrow SYSTEM BEFORE PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/cpp/src")
-target_link_libraries(_arrow PRIVATE ch_contrib::double_conversion)
-target_link_libraries(_arrow PRIVATE ch_contrib::protobuf)
-target_link_libraries(_arrow PRIVATE ch_contrib::lz4)
-target_link_libraries(_arrow PRIVATE ch_contrib::snappy)
-target_link_libraries(_arrow PRIVATE ch_contrib::zlib)
-target_link_libraries(_arrow PRIVATE ch_contrib::zstd)
-target_include_directories(_arrow SYSTEM BEFORE PUBLIC ${ORC_INCLUDE_DIR})
-target_include_directories(_arrow SYSTEM BEFORE PUBLIC ${ORC_BUILD_INCLUDE_DIR})
-target_include_directories(_arrow SYSTEM PRIVATE ${ORC_SOURCE_SRC_DIR})
-target_include_directories(_arrow SYSTEM PRIVATE ${ORC_SOURCE_WRAP_DIR})
-target_include_directories(_arrow SYSTEM PRIVATE ${ORC_BUILD_SRC_DIR})
-target_include_directories(_arrow SYSTEM PRIVATE ${ORC_ADDITION_SOURCE_DIR})
target_include_directories(_arrow SYSTEM PRIVATE ${ARROW_SRC_DIR})
-target_include_directories(_arrow SYSTEM PRIVATE ${FLATBUFFERS_INCLUDE_DIR})
target_include_directories(_arrow SYSTEM PRIVATE ${HDFS_INCLUDE_DIR})
# === parquet
diff --git a/contrib/cassandra-cmake/CMakeLists.txt b/contrib/cassandra-cmake/CMakeLists.txt
index 416dca6f2bc..81c1fab3882 100644
--- a/contrib/cassandra-cmake/CMakeLists.txt
+++ b/contrib/cassandra-cmake/CMakeLists.txt
@@ -56,19 +56,11 @@ list(APPEND SOURCES ${CASS_SRC_DIR}/atomic/atomic_std.hpp)
add_library(_curl_hostcheck OBJECT ${CASS_SRC_DIR}/third_party/curl/hostcheck.cpp)
add_library(_hdr_histogram OBJECT ${CASS_SRC_DIR}/third_party/hdr_histogram/hdr_histogram.cpp)
add_library(_http-parser OBJECT ${CASS_SRC_DIR}/third_party/http-parser/http_parser.c)
-add_library(_minizip OBJECT
- ${CASS_SRC_DIR}/third_party/minizip/ioapi.c
- ${CASS_SRC_DIR}/third_party/minizip/zip.c
- ${CASS_SRC_DIR}/third_party/minizip/unzip.c)
-
-target_link_libraries(_minizip ch_contrib::zlib)
-target_compile_definitions(_minizip PRIVATE "-Dz_crc_t=unsigned long")
list(APPEND INCLUDE_DIRS
${CASS_SRC_DIR}/third_party/curl
${CASS_SRC_DIR}/third_party/hdr_histogram
${CASS_SRC_DIR}/third_party/http-parser
- ${CASS_SRC_DIR}/third_party/minizip
${CASS_SRC_DIR}/third_party/mt19937_64
${CASS_SRC_DIR}/third_party/rapidjson/rapidjson
${CASS_SRC_DIR}/third_party/sparsehash/src)
@@ -123,10 +115,9 @@ add_library(_cassandra
${SOURCES}
$
$
- $
- $)
+ $)
-target_link_libraries(_cassandra ch_contrib::zlib)
+target_link_libraries(_cassandra ch_contrib::zlib ch_contrib::minizip)
target_include_directories(_cassandra PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${INCLUDE_DIRS})
target_include_directories(_cassandra SYSTEM BEFORE PUBLIC ${CASS_INCLUDE_DIR})
target_compile_definitions(_cassandra PRIVATE CASS_BUILDING)
diff --git a/contrib/fmtlib b/contrib/fmtlib
index c108ee1d590..b6f4ceaed0a 160000
--- a/contrib/fmtlib
+++ b/contrib/fmtlib
@@ -1 +1 @@
-Subproject commit c108ee1d590089ccf642fc85652b845924067af2
+Subproject commit b6f4ceaed0a0a24ccf575fab6c56dd50ccf6f1a9
diff --git a/contrib/fmtlib-cmake/CMakeLists.txt b/contrib/fmtlib-cmake/CMakeLists.txt
index d8cb721b9ba..fecec5f3e43 100644
--- a/contrib/fmtlib-cmake/CMakeLists.txt
+++ b/contrib/fmtlib-cmake/CMakeLists.txt
@@ -1,7 +1,10 @@
set (SRCS
+ # NOTE: do not build module for now:
+ # ../fmtlib/src/fmt.cc
../fmtlib/src/format.cc
../fmtlib/src/os.cc
+ ../fmtlib/include/fmt/args.h
../fmtlib/include/fmt/chrono.h
../fmtlib/include/fmt/color.h
../fmtlib/include/fmt/compile.h
@@ -11,9 +14,9 @@ set (SRCS
../fmtlib/include/fmt/locale.h
../fmtlib/include/fmt/os.h
../fmtlib/include/fmt/ostream.h
- ../fmtlib/include/fmt/posix.h
../fmtlib/include/fmt/printf.h
../fmtlib/include/fmt/ranges.h
+ ../fmtlib/include/fmt/xchar.h
)
add_library(_fmt ${SRCS})
diff --git a/contrib/minizip-ng b/contrib/minizip-ng
new file mode 160000
index 00000000000..6cffc951851
--- /dev/null
+++ b/contrib/minizip-ng
@@ -0,0 +1 @@
+Subproject commit 6cffc951851620e0fac1993be75e4713c334de03
diff --git a/contrib/minizip-ng-cmake/CMakeLists.txt b/contrib/minizip-ng-cmake/CMakeLists.txt
new file mode 100644
index 00000000000..4aabbd3c9fb
--- /dev/null
+++ b/contrib/minizip-ng-cmake/CMakeLists.txt
@@ -0,0 +1,168 @@
+option(ENABLE_MINIZIP "Enable minizip-ng the zip manipulation library" ${ENABLE_LIBRARIES})
+if (NOT ENABLE_MINIZIP)
+ message (STATUS "minizip-ng disabled")
+ return()
+endif()
+
+set(_MINIZIP_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/minizip-ng")
+
+# Initial source files
+set(MINIZIP_SRC
+ ${_MINIZIP_SOURCE_DIR}/mz_crypt.c
+ ${_MINIZIP_SOURCE_DIR}/mz_os.c
+ ${_MINIZIP_SOURCE_DIR}/mz_strm.c
+ ${_MINIZIP_SOURCE_DIR}/mz_strm_buf.c
+ ${_MINIZIP_SOURCE_DIR}/mz_strm_mem.c
+ ${_MINIZIP_SOURCE_DIR}/mz_strm_split.c
+ ${_MINIZIP_SOURCE_DIR}/mz_zip.c
+ ${_MINIZIP_SOURCE_DIR}/mz_zip_rw.c)
+
+# Initial header files
+set(MINIZIP_HDR
+ ${_MINIZIP_SOURCE_DIR}/mz.h
+ ${_MINIZIP_SOURCE_DIR}/mz_os.h
+ ${_MINIZIP_SOURCE_DIR}/mz_crypt.h
+ ${_MINIZIP_SOURCE_DIR}/mz_strm.h
+ ${_MINIZIP_SOURCE_DIR}/mz_strm_buf.h
+ ${_MINIZIP_SOURCE_DIR}/mz_strm_mem.h
+ ${_MINIZIP_SOURCE_DIR}/mz_strm_split.h
+ ${_MINIZIP_SOURCE_DIR}/mz_strm_os.h
+ ${_MINIZIP_SOURCE_DIR}/mz_zip.h
+ ${_MINIZIP_SOURCE_DIR}/mz_zip_rw.h)
+
+set(MINIZIP_INC ${_MINIZIP_SOURCE_DIR})
+
+set(MINIZIP_DEF)
+set(MINIZIP_PUBLIC_DEF)
+set(MINIZIP_LIB)
+
+# Check if zlib is present
+set(MZ_ZLIB ON)
+if(MZ_ZLIB)
+ # Use zlib from ClickHouse contrib
+ list(APPEND MINIZIP_LIB ch_contrib::zlib)
+
+ list(APPEND MINIZIP_SRC
+ ${_MINIZIP_SOURCE_DIR}/mz_strm_zlib.c)
+
+ list(APPEND MINIZIP_HDR
+ ${_MINIZIP_SOURCE_DIR}/mz_strm_zlib.h)
+
+ list(APPEND MINIZIP_DEF "-DHAVE_ZLIB")
+endif()
+
+# Check if bzip2 is present
+set(MZ_BZIP2 ${ENABLE_BZIP2})
+if(MZ_BZIP2)
+ # Use bzip2 from ClickHouse contrib
+ list(APPEND MINIZIP_LIB ch_contrib::bzip2)
+
+ list(APPEND MINIZIP_SRC
+ ${_MINIZIP_SOURCE_DIR}/mz_strm_bzip.c)
+
+ list(APPEND MINIZIP_HDR
+ ${_MINIZIP_SOURCE_DIR}/mz_strm_bzip.h)
+
+ list(APPEND MINIZIP_DEF "-DHAVE_BZIP2")
+endif()
+
+# Check if liblzma is present
+set(MZ_LZMA ON)
+if(MZ_LZMA)
+ # Use liblzma from ClickHouse contrib
+ list(APPEND MINIZIP_LIB ch_contrib::xz)
+
+ list(APPEND MINIZIP_SRC
+ ${_MINIZIP_SOURCE_DIR}/mz_strm_lzma.c)
+
+ list(APPEND MINIZIP_HDR
+ ${_MINIZIP_SOURCE_DIR}/mz_strm_lzma.h)
+
+ list(APPEND MINIZIP_DEF "-DHAVE_LZMA")
+endif()
+
+# Check if zstd is present
+set(MZ_ZSTD ON)
+if(MZ_ZSTD)
+ # Use zstd from ClickHouse contrib
+ list(APPEND MINIZIP_LIB ch_contrib::zstd)
+
+ list(APPEND MINIZIP_SRC
+ ${_MINIZIP_SOURCE_DIR}/mz_strm_zstd.c)
+
+ list(APPEND MINIZIP_HDR
+ ${_MINIZIP_SOURCE_DIR}/mz_strm_zstd.h)
+
+ list(APPEND MINIZIP_DEF "-DHAVE_ZSTD")
+endif()
+
+if(NOT MZ_ZLIB AND NOT MZ_ZSTD AND NOT MZ_BZIP2 AND NOT MZ_LZMA)
+ message(STATUS "Compression not supported due to missing libraries")
+
+ list(APPEND MINIZIP_DEF -DMZ_ZIP_NO_DECOMPRESSION)
+ list(APPEND MINIZIP_DEF -DMZ_ZIP_NO_COMPRESSION)
+endif()
+
+# Check to see if openssl installation is present
+set(MZ_OPENSSL ${ENABLE_SSL})
+if(MZ_OPENSSL)
+ # Use openssl from ClickHouse contrib
+ list(APPEND MINIZIP_LIB OpenSSL::SSL OpenSSL::Crypto)
+
+ list(APPEND MINIZIP_SRC
+ ${_MINIZIP_SOURCE_DIR}/mz_crypt_openssl.c)
+endif()
+
+# Include WinZIP AES encryption
+set(MZ_WZAES ${ENABLE_SSL})
+if(MZ_WZAES)
+ list(APPEND MINIZIP_DEF -DHAVE_WZAES)
+
+ list(APPEND MINIZIP_SRC
+ ${_MINIZIP_SOURCE_DIR}/mz_strm_wzaes.c)
+
+ list(APPEND MINIZIP_HDR
+ ${_MINIZIP_SOURCE_DIR}/mz_strm_wzaes.h)
+endif()
+
+# Include traditional PKWare encryption
+set(MZ_PKCRYPT ON)
+if(MZ_PKCRYPT)
+ list(APPEND MINIZIP_DEF -DHAVE_PKCRYPT)
+
+ list(APPEND MINIZIP_SRC
+ ${_MINIZIP_SOURCE_DIR}/mz_strm_pkcrypt.c)
+
+ list(APPEND MINIZIP_HDR
+ ${_MINIZIP_SOURCE_DIR}/mz_strm_pkcrypt.h)
+endif()
+
+# Unix specific
+if(UNIX)
+ list(APPEND MINIZIP_SRC
+ ${_MINIZIP_SOURCE_DIR}/mz_os_posix.c
+ ${_MINIZIP_SOURCE_DIR}/mz_strm_os_posix.c)
+endif()
+
+# Include compatibility layer
+set(MZ_COMPAT ON)
+if(MZ_COMPAT)
+ list(APPEND MINIZIP_SRC
+ ${_MINIZIP_SOURCE_DIR}/mz_compat.c)
+
+ list(APPEND MINIZIP_HDR
+ ${_MINIZIP_SOURCE_DIR}/mz_compat.h
+ zip.h
+ unzip.h)
+
+ list(APPEND MINIZIP_INC "${CMAKE_CURRENT_SOURCE_DIR}")
+ list(APPEND MINIZIP_PUBLIC_DEF "-DMZ_COMPAT_VERSION=110")
+endif()
+
+add_library(_minizip ${MINIZIP_SRC} ${MINIZIP_HDR})
+target_include_directories(_minizip PUBLIC ${MINIZIP_INC})
+target_compile_definitions(_minizip PUBLIC ${MINIZIP_PUBLIC_DEF})
+target_compile_definitions(_minizip PRIVATE ${MINIZIP_DEF})
+target_link_libraries(_minizip PRIVATE ${MINIZIP_LIB})
+
+add_library(ch_contrib::minizip ALIAS _minizip)
diff --git a/contrib/minizip-ng-cmake/unzip.h b/contrib/minizip-ng-cmake/unzip.h
new file mode 100644
index 00000000000..61cbd974e31
--- /dev/null
+++ b/contrib/minizip-ng-cmake/unzip.h
@@ -0,0 +1,13 @@
+/* unzip.h -- Compatibility layer shim
+ part of the minizip-ng project
+
+ This program is distributed under the terms of the same license as zlib.
+ See the accompanying LICENSE file for the full text of the license.
+*/
+
+#ifndef MZ_COMPAT_UNZIP
+#define MZ_COMPAT_UNZIP
+
+#include "mz_compat.h"
+
+#endif
diff --git a/contrib/minizip-ng-cmake/zip.h b/contrib/minizip-ng-cmake/zip.h
new file mode 100644
index 00000000000..cf38ac91a04
--- /dev/null
+++ b/contrib/minizip-ng-cmake/zip.h
@@ -0,0 +1,13 @@
+/* zip.h -- Compatibility layer shim
+ part of the minizip-ng project
+
+ This program is distributed under the terms of the same license as zlib.
+ See the accompanying LICENSE file for the full text of the license.
+*/
+
+#ifndef MZ_COMPAT_ZIP
+#define MZ_COMPAT_ZIP
+
+#include "mz_compat.h"
+
+#endif
diff --git a/contrib/orc b/contrib/orc
index 0a936f6bbdb..f9a393ed243 160000
--- a/contrib/orc
+++ b/contrib/orc
@@ -1 +1 @@
-Subproject commit 0a936f6bbdb9303308973073f8623b5a8d82eae1
+Subproject commit f9a393ed2433a60034795284f82d093b348f2102
diff --git a/contrib/replxx b/contrib/replxx
index f019cba7ea1..9460e5e0fc1 160000
--- a/contrib/replxx
+++ b/contrib/replxx
@@ -1 +1 @@
-Subproject commit f019cba7ea1bcd1b4feb7826f28ed57fb581b04c
+Subproject commit 9460e5e0fc10f78f460af26a6bd928798cac864d
diff --git a/contrib/rocksdb-cmake/CMakeLists.txt b/contrib/rocksdb-cmake/CMakeLists.txt
index 902d29a9630..529d7f0c4e3 100644
--- a/contrib/rocksdb-cmake/CMakeLists.txt
+++ b/contrib/rocksdb-cmake/CMakeLists.txt
@@ -72,11 +72,6 @@ else()
if(WITH_ZSTD)
add_definitions(-DZSTD)
- include_directories(${ZSTD_INCLUDE_DIR})
- include_directories("${ZSTD_INCLUDE_DIR}/common")
- include_directories("${ZSTD_INCLUDE_DIR}/dictBuilder")
- include_directories("${ZSTD_INCLUDE_DIR}/deprecated")
-
list(APPEND THIRDPARTY_LIBS ch_contrib::zstd)
endif()
endif()
@@ -132,11 +127,6 @@ endif()
if(CMAKE_SYSTEM_NAME MATCHES "Darwin")
add_definitions(-DOS_MACOSX)
- if(CMAKE_SYSTEM_PROCESSOR MATCHES arm)
- add_definitions(-DIOS_CROSS_COMPILE -DROCKSDB_LITE)
- # no debug info for IOS, that will make our library big
- add_definitions(-DNDEBUG)
- endif()
elseif(CMAKE_SYSTEM_NAME MATCHES "Linux")
add_definitions(-DOS_LINUX)
elseif(CMAKE_SYSTEM_NAME MATCHES "SunOS")
diff --git a/debian/clickhouse-server.service b/debian/clickhouse-server.service
index bc19235cb3a..a9400b24270 100644
--- a/debian/clickhouse-server.service
+++ b/debian/clickhouse-server.service
@@ -16,6 +16,8 @@ Restart=always
RestartSec=30
RuntimeDirectory=clickhouse-server
ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=/run/clickhouse-server/clickhouse-server.pid
+# Minus means that this file is optional.
+EnvironmentFile=-/etc/default/clickhouse
LimitCORE=infinity
LimitNOFILE=500000
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE
diff --git a/docker-compose.yml b/docker-compose.yml
deleted file mode 100644
index 3e3cfc38218..00000000000
--- a/docker-compose.yml
+++ /dev/null
@@ -1,15 +0,0 @@
-version: "2"
-
-services:
- builder:
- image: clickhouse/clickhouse-builder
- build: docker/builder
- client:
- image: clickhouse/clickhouse-client
- build: docker/client
- command: ['--host', 'server']
- server:
- image: clickhouse/clickhouse-server
- build: docker/server
- ports:
- - 8123:8123
diff --git a/docker/images.json b/docker/images.json
index 354bdaa8728..01284d4de69 100644
--- a/docker/images.json
+++ b/docker/images.json
@@ -32,6 +32,7 @@
"dependent": []
},
"docker/test/pvs": {
+ "only_amd64": true,
"name": "clickhouse/pvs-test",
"dependent": []
},
@@ -72,6 +73,7 @@
"dependent": []
},
"docker/test/integration/runner": {
+ "only_amd64": true,
"name": "clickhouse/integration-tests-runner",
"dependent": []
},
@@ -124,6 +126,7 @@
"dependent": []
},
"docker/test/integration/kerberos_kdc": {
+ "only_amd64": true,
"name": "clickhouse/kerberos-kdc",
"dependent": []
},
@@ -137,6 +140,7 @@
]
},
"docker/test/integration/kerberized_hadoop": {
+ "only_amd64": true,
"name": "clickhouse/kerberized-hadoop",
"dependent": []
},
diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh
index 1ebaed752a6..e18c07bf2c1 100755
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@@ -185,15 +185,14 @@ handle SIGUSR2 nostop noprint pass
handle SIG$RTMIN nostop noprint pass
info signals
continue
+gcore
backtrace full
-info locals
+thread apply all backtrace full
info registers
disassemble /s
up
-info locals
disassemble /s
up
-info locals
disassemble /s
p \"done\"
detach
@@ -314,6 +313,11 @@ quit
|| echo "Fuzzer failed ($fuzzer_exit_code). See the logs." ; } \
| tail -1 > description.txt
fi
+
+ if test -f core.*; then
+ pigz core.*
+ mv core.*.gz core.gz
+ fi
}
case "$stage" in
@@ -345,6 +349,10 @@ case "$stage" in
time fuzz
;&
"report")
+CORE_LINK=''
+if [ -f core.gz ]; then
+ CORE_LINK='core.gz'
+fi
cat > report.html <
@@ -386,6 +394,7 @@ th { cursor: pointer; }
fuzzer.logserver.logmain.log
+${CORE_LINK}
Test name
Test status
Description
diff --git a/docker/test/integration/kerberized_hadoop/Dockerfile b/docker/test/integration/kerberized_hadoop/Dockerfile
index 025f4b27fde..592c3e36ef7 100644
--- a/docker/test/integration/kerberized_hadoop/Dockerfile
+++ b/docker/test/integration/kerberized_hadoop/Dockerfile
@@ -15,9 +15,10 @@ RUN curl -o krb5-libs-1.10.3-65.el6.x86_64.rpm ftp://ftp.pbone.net/mirror/vault.
rm -fr *.rpm
RUN cd /tmp && \
- curl http://archive.apache.org/dist/commons/daemon/source/commons-daemon-1.0.15-src.tar.gz -o commons-daemon-1.0.15-src.tar.gz && \
- tar xzf commons-daemon-1.0.15-src.tar.gz && \
- cd commons-daemon-1.0.15-src/src/native/unix && \
- ./configure && \
- make && \
- cp ./jsvc /usr/local/hadoop/sbin
+ curl http://archive.apache.org/dist/commons/daemon/source/commons-daemon-1.0.15-src.tar.gz -o commons-daemon-1.0.15-src.tar.gz && \
+ tar xzf commons-daemon-1.0.15-src.tar.gz && \
+ cd commons-daemon-1.0.15-src/src/native/unix && \
+ ./configure && \
+ make && \
+ cp ./jsvc /usr/local/hadoop-2.7.0/sbin && \
+ [ -e /usr/local/hadoop ] || ln -s ./hadoop-2.7.0 /usr/local/hadoop
diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile
index 1aad2ae6770..22dd2e14456 100644
--- a/docker/test/integration/runner/Dockerfile
+++ b/docker/test/integration/runner/Dockerfile
@@ -58,9 +58,7 @@ RUN apt-get update \
RUN dockerd --version; docker --version
-ARG TARGETARCH
-# FIXME: psycopg2-binary is not available for aarch64, we skip it for now
-RUN test x$TARGETARCH = xarm64 || ( python3 -m pip install \
+RUN python3 -m pip install \
PyMySQL \
aerospike==4.0.0 \
avro==1.10.2 \
@@ -90,7 +88,7 @@ RUN test x$TARGETARCH = xarm64 || ( python3 -m pip install \
urllib3 \
requests-kerberos \
pyhdfs \
- azure-storage-blob )
+ azure-storage-blob
COPY modprobe.sh /usr/local/bin/modprobe
COPY dockerd-entrypoint.sh /usr/local/bin/
diff --git a/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml b/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml
index 88be3e45085..e1b4d393169 100644
--- a/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml
+++ b/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml
@@ -4,7 +4,7 @@ services:
kerberizedhdfs1:
cap_add:
- DAC_READ_SEARCH
- image: clickhouse/kerberized-hadoop
+ image: clickhouse/kerberized-hadoop:${DOCKER_KERBERIZED_HADOOP_TAG:-latest}
hostname: kerberizedhdfs1
restart: always
volumes:
diff --git a/docker/test/integration/runner/dockerd-entrypoint.sh b/docker/test/integration/runner/dockerd-entrypoint.sh
index 8109ef7ae64..34414abc3f5 100755
--- a/docker/test/integration/runner/dockerd-entrypoint.sh
+++ b/docker/test/integration/runner/dockerd-entrypoint.sh
@@ -45,6 +45,7 @@ export DOCKER_MYSQL_JS_CLIENT_TAG=${DOCKER_MYSQL_JS_CLIENT_TAG:=latest}
export DOCKER_MYSQL_PHP_CLIENT_TAG=${DOCKER_MYSQL_PHP_CLIENT_TAG:=latest}
export DOCKER_POSTGRESQL_JAVA_CLIENT_TAG=${DOCKER_POSTGRESQL_JAVA_CLIENT_TAG:=latest}
export DOCKER_KERBEROS_KDC_TAG=${DOCKER_KERBEROS_KDC_TAG:=latest}
+export DOCKER_KERBERIZED_HADOOP_TAG=${DOCKER_KERBERIZED_HADOOP_TAG:=latest}
cd /ClickHouse/tests/integration
exec "$@"
diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile
index eddaf969f33..fb47ed0cefa 100644
--- a/docker/test/performance-comparison/Dockerfile
+++ b/docker/test/performance-comparison/Dockerfile
@@ -1,5 +1,5 @@
# docker build -t clickhouse/performance-comparison .
-FROM ubuntu:18.04
+FROM ubuntu:20.04
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
diff --git a/docker/test/pvs/Dockerfile b/docker/test/pvs/Dockerfile
index f484feecfd0..01cc7c97548 100644
--- a/docker/test/pvs/Dockerfile
+++ b/docker/test/pvs/Dockerfile
@@ -4,11 +4,7 @@
ARG FROM_TAG=latest
FROM clickhouse/binary-builder:$FROM_TAG
-# PVS studio doesn't support aarch64/arm64, so there is a check for it everywhere
-# We'll produce an empty image for arm64
-ARG TARGETARCH
-
-RUN test x$TARGETARCH = xarm64 || ( apt-get update --yes \
+RUN apt-get update --yes \
&& apt-get install \
bash \
wget \
@@ -21,7 +17,7 @@ RUN test x$TARGETARCH = xarm64 || ( apt-get update --yes \
libprotoc-dev \
libgrpc++-dev \
libc-ares-dev \
- --yes --no-install-recommends )
+ --yes --no-install-recommends
#RUN wget -nv -O - http://files.viva64.com/etc/pubkey.txt | sudo apt-key add -
#RUN sudo wget -nv -O /etc/apt/sources.list.d/viva64.list http://files.viva64.com/etc/viva64.list
@@ -33,7 +29,7 @@ RUN test x$TARGETARCH = xarm64 || ( apt-get update --yes \
ENV PKG_VERSION="pvs-studio-latest"
-RUN test x$TARGETARCH = xarm64 || ( set -x \
+RUN set -x \
&& export PUBKEY_HASHSUM="ad369a2e9d8b8c30f5a9f2eb131121739b79c78e03fef0f016ea51871a5f78cd4e6257b270dca0ac3be3d1f19d885516" \
&& wget -nv https://files.viva64.com/etc/pubkey.txt -O /tmp/pubkey.txt \
&& echo "${PUBKEY_HASHSUM} /tmp/pubkey.txt" | sha384sum -c \
@@ -41,7 +37,7 @@ RUN test x$TARGETARCH = xarm64 || ( set -x \
&& wget -nv "https://files.viva64.com/${PKG_VERSION}.deb" \
&& { debsig-verify ${PKG_VERSION}.deb \
|| echo "WARNING: Some file was just downloaded from the internet without any validation and we are installing it into the system"; } \
- && dpkg -i "${PKG_VERSION}.deb" )
+ && dpkg -i "${PKG_VERSION}.deb"
ENV CCACHE_DIR=/test_output/ccache
diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh
index d6d9f189e89..7c2563f2d86 100755
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@@ -12,7 +12,11 @@ dpkg -i package_folder/clickhouse-common-static_*.deb
dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
dpkg -i package_folder/clickhouse-server_*.deb
dpkg -i package_folder/clickhouse-client_*.deb
-dpkg -i package_folder/clickhouse-test_*.deb
+if [[ -n "$TEST_CASES_FROM_DEB" ]] && [[ "$TEST_CASES_FROM_DEB" -eq 1 ]]; then
+ dpkg -i package_folder/clickhouse-test_*.deb
+else
+ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
+fi
# install test configs
/usr/share/clickhouse-test/config/install.sh
@@ -85,6 +89,10 @@ function run_tests()
# everything in parallel except DatabaseReplicated. See below.
fi
+ if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then
+ ADDITIONAL_OPTIONS+=('--s3-storage')
+ fi
+
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
ADDITIONAL_OPTIONS+=('--replicated-database')
ADDITIONAL_OPTIONS+=('--jobs')
diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh
index 4387d16ea7c..e57dbc38ded 100755
--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@@ -148,14 +148,12 @@ info signals
continue
gcore
backtrace full
-info locals
+thread apply all backtrace full
info registers
disassemble /s
up
-info locals
disassemble /s
up
-info locals
disassemble /s
p \"done\"
detach
@@ -269,5 +267,5 @@ clickhouse-local --structure "test String, res String" -q "SELECT 'failure', tes
# Default filename is 'core.PROCESS_ID'
for core in core.*; do
pigz $core
- mv $core.gz /output/
+ mv $core.gz /test_output/
done
diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile
index a68b52170e0..85c751edfbe 100644
--- a/docker/test/style/Dockerfile
+++ b/docker/test/style/Dockerfile
@@ -11,6 +11,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
curl \
git \
libxml2-utils \
+ moreutils \
pylint \
python3-pip \
shellcheck \
diff --git a/docker/test/style/process_style_check_result.py b/docker/test/style/process_style_check_result.py
index b7e00c49e06..655b7d70243 100755
--- a/docker/test/style/process_style_check_result.py
+++ b/docker/test/style/process_style_check_result.py
@@ -10,72 +10,26 @@ def process_result(result_folder):
status = "success"
description = ""
test_results = []
+ checks = (
+ ("header duplicates", "duplicate_output.txt"),
+ ("shellcheck", "shellcheck_output.txt"),
+ ("style", "style_output.txt"),
+ ("typos", "typos_output.txt"),
+ ("whitespaces", "whitespaces_output.txt"),
+ ("workflows", "workflows_output.txt"),
+ )
- duplicate_log_path = "{}/duplicate_output.txt".format(result_folder)
- if not os.path.exists(duplicate_log_path):
- logging.info("No header duplicates check log on path %s", duplicate_log_path)
- return "exception", "No header duplicates check log", []
- elif os.stat(duplicate_log_path).st_size != 0:
- description += " Header duplicates check failed. "
- test_results.append(("Header duplicates check", "FAIL"))
- status = "failure"
- else:
- test_results.append(("Header duplicates check", "OK"))
-
- shellcheck_log_path = "{}/shellcheck_output.txt".format(result_folder)
- if not os.path.exists(shellcheck_log_path):
- logging.info("No shellcheck log on path %s", shellcheck_log_path)
- return "exception", "No shellcheck log", []
- elif os.stat(shellcheck_log_path).st_size != 0:
- description += " Shellcheck check failed. "
- test_results.append(("Shellcheck ", "FAIL"))
- status = "failure"
- else:
- test_results.append(("Shellcheck", "OK"))
-
- style_log_path = "{}/style_output.txt".format(result_folder)
- if not os.path.exists(style_log_path):
- logging.info("No style check log on path %s", style_log_path)
- return "exception", "No style check log", []
- elif os.stat(style_log_path).st_size != 0:
- description += "Style check failed. "
- test_results.append(("Style check", "FAIL"))
- status = "failure"
- else:
- test_results.append(("Style check", "OK"))
-
- typos_log_path = "{}/typos_output.txt".format(result_folder)
- if not os.path.exists(typos_log_path):
- logging.info("No typos check log on path %s", typos_log_path)
- return "exception", "No typos check log", []
- elif os.stat(typos_log_path).st_size != 0:
- description += "Typos check failed. "
- test_results.append(("Typos check", "FAIL"))
- status = "failure"
- else:
- test_results.append(("Typos check", "OK"))
-
- whitespaces_log_path = "{}/whitespaces_output.txt".format(result_folder)
- if not os.path.exists(whitespaces_log_path):
- logging.info("No whitespaces check log on path %s", whitespaces_log_path)
- return "exception", "No whitespaces check log", []
- elif os.stat(whitespaces_log_path).st_size != 0:
- description += "Whitespaces check failed. "
- test_results.append(("Whitespaces check", "FAIL"))
- status = "failure"
- else:
- test_results.append(("Whitespaces check", "OK"))
-
- workflows_log_path = "{}/workflows_output.txt".format(result_folder)
- if not os.path.exists(workflows_log_path):
- logging.info("No workflows check log on path %s", style_log_path)
- return "exception", "No workflows check log", []
- elif os.stat(whitespaces_log_path).st_size != 0:
- description += "Workflows check failed. "
- test_results.append(("Workflows check", "FAIL"))
- status = "failure"
- else:
- test_results.append(("Workflows check", "OK"))
+ for name, out_file in checks:
+ full_path = os.path.join(result_folder, out_file)
+ if not os.path.exists(full_path):
+ logging.info("No %s check log on path %s", name, full_path)
+ return "exception", f"No {name} check log", []
+ elif os.stat(full_path).st_size != 0:
+ description += f"Check {name} failed. "
+ test_results.append((f"Check {name}", "FAIL"))
+ status = "failure"
+ else:
+ test_results.append((f"Check {name}", "OK"))
if not description:
description += "Style check success"
diff --git a/docker/test/style/run.sh b/docker/test/style/run.sh
index 98bc0053ab9..ce3ea4e50a6 100755
--- a/docker/test/style/run.sh
+++ b/docker/test/style/run.sh
@@ -3,10 +3,16 @@
# yaml check is not the best one
cd /ClickHouse/utils/check-style || echo -e "failure\tRepo not found" > /test_output/check_status.tsv
+echo "Check duplicates" | ts
./check-duplicate-includes.sh |& tee /test_output/duplicate_output.txt
+echo "Check style" | ts
./check-style -n |& tee /test_output/style_output.txt
+echo "Check typos" | ts
./check-typos |& tee /test_output/typos_output.txt
+echo "Check whitespaces" | ts
./check-whitespaces -n |& tee /test_output/whitespaces_output.txt
+echo "Check sorkflows" | ts
./check-workflows |& tee /test_output/workflows_output.txt
+echo "Check shell scripts with shellcheck" | ts
./shellcheck-run.sh |& tee /test_output/shellcheck_output.txt
/process_style_check_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv
diff --git a/docker/test/testflows/runner/Dockerfile b/docker/test/testflows/runner/Dockerfile
index d15f237587b..fbff6fd5e97 100644
--- a/docker/test/testflows/runner/Dockerfile
+++ b/docker/test/testflows/runner/Dockerfile
@@ -43,24 +43,27 @@ RUN pip3 install urllib3 testflows==1.7.20 docker-compose==1.29.1 docker==5.0.0
ENV DOCKER_CHANNEL stable
ENV DOCKER_VERSION 20.10.6
-RUN set -eux; \
- \
-# this "case" statement is generated via "update.sh"
- \
- if ! wget -nv -O docker.tgz "https://download.docker.com/linux/static/${DOCKER_CHANNEL}/x86_64/docker-${DOCKER_VERSION}.tgz"; then \
- echo >&2 "error: failed to download 'docker-${DOCKER_VERSION}' from '${DOCKER_CHANNEL}' for '${x86_64}'"; \
- exit 1; \
- fi; \
- \
- tar --extract \
+# Architecture of the image when BuildKit/buildx is used
+ARG TARGETARCH
+
+# Install docker
+RUN arch=${TARGETARCH:-amd64} \
+ && case $arch in \
+ amd64) rarch=x86_64 ;; \
+ arm64) rarch=aarch64 ;; \
+ esac \
+ && set -eux \
+ && if ! wget -nv -O docker.tgz "https://download.docker.com/linux/static/${DOCKER_CHANNEL}/${rarch}/docker-${DOCKER_VERSION}.tgz"; then \
+ echo >&2 "error: failed to download 'docker-${DOCKER_VERSION}' from '${DOCKER_CHANNEL}' for '${rarch}'" \
+ && exit 1; \
+ fi \
+ && tar --extract \
--file docker.tgz \
--strip-components 1 \
--directory /usr/local/bin/ \
- ; \
- rm docker.tgz; \
- \
- dockerd --version; \
- docker --version
+ && rm docker.tgz \
+ && dockerd --version \
+ && docker --version
COPY modprobe.sh /usr/local/bin/modprobe
COPY dockerd-entrypoint.sh /usr/local/bin/
diff --git a/docs/_includes/cmake_in_clickhouse_header.md b/docs/_includes/cmake_in_clickhouse_header.md
index f950cdcc6db..02019f13964 100644
--- a/docs/_includes/cmake_in_clickhouse_header.md
+++ b/docs/_includes/cmake_in_clickhouse_header.md
@@ -22,7 +22,7 @@ cmake .. \
1. ClickHouse's source CMake files (located in the root directory and in `/src`).
2. Arch-dependent CMake files (located in `/cmake/*os_name*`).
-3. Libraries finders (search for contrib libraries, located in `/cmake/find`).
+3. Libraries finders (search for contrib libraries, located in `/contrib/*/CMakeLists.txt`).
3. Contrib build CMake files (used instead of libraries' own CMake files, located in `/cmake/modules`)
## List of CMake flags
diff --git a/docs/_includes/install/deb.sh b/docs/_includes/install/deb.sh
index 7dcca601d33..21106e9fc47 100644
--- a/docs/_includes/install/deb.sh
+++ b/docs/_includes/install/deb.sh
@@ -8,4 +8,4 @@ sudo apt-get update
sudo apt-get install -y clickhouse-server clickhouse-client
sudo service clickhouse-server start
-clickhouse-client
+clickhouse-client # or "clickhouse-client --password" if you set up a password.
diff --git a/docs/_includes/install/rpm.sh b/docs/_includes/install/rpm.sh
index de4a07420f7..e3fd1232047 100644
--- a/docs/_includes/install/rpm.sh
+++ b/docs/_includes/install/rpm.sh
@@ -4,4 +4,4 @@ sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/clickhouse.re
sudo yum install clickhouse-server clickhouse-client
sudo /etc/init.d/clickhouse-server start
-clickhouse-client
+clickhouse-client # or "clickhouse-client --password" if you set up a password.
diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md
index ccf6da355b9..f7d7100d181 100644
--- a/docs/en/development/developer-instruction.md
+++ b/docs/en/development/developer-instruction.md
@@ -125,10 +125,6 @@ For installing CMake and Ninja on Mac OS X first install Homebrew and then insta
Next, check the version of CMake: `cmake --version`. If it is below 3.12, you should install a newer version from the website: https://cmake.org/download/.
-## Optional External Libraries {#optional-external-libraries}
-
-ClickHouse uses several external libraries for building. All of them do not need to be installed separately as they are built together with ClickHouse from the sources located in the submodules. You can check the list in `contrib`.
-
## C++ Compiler {#c-compiler}
Compilers Clang starting from version 11 is supported for building ClickHouse.
diff --git a/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md b/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md
index 79ba06096b2..e1d571c909c 100644
--- a/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md
@@ -97,13 +97,16 @@ Structure of the `patterns` section:
``` text
pattern
+ rule_type
regexp
function
pattern
+ rule_type
regexp
age + precision
...
pattern
+ rule_type
regexp
function
age + precision
@@ -127,12 +130,20 @@ When processing a row, ClickHouse checks the rules in the `pattern` sections. Ea
Fields for `pattern` and `default` sections:
-- `regexp`– A pattern for the metric name.
+- `rule_type` - a rule's type. It's applied only to a particular metrics. The engine use it to separate plain and tagged metrics. Optional parameter. Default value: `all`.
+It's unnecessary when performance is not critical, or only one metrics type is used, e.g. plain metrics. By default only one type of rules set is created. Otherwise, if any of special types is defined, two different sets are created. One for plain metrics (root.branch.leaf) and one for tagged metrics (root.branch.leaf;tag1=value1).
+The default rules are ended up in both sets.
+Valid values:
+ - `all` (default) - a universal rule, used when `rule_type` is omitted.
+ - `plain` - a rule for plain metrics. The field `regexp` is processed as regular expression.
+ - `tagged` - a rule for tagged metrics (metrics are stored in DB in the format of `someName?tag1=value1&tag2=value2&tag3=value3`). Regular expression must be sorted by tags' names, first tag must be `__name__` if exists. The field `regexp` is processed as regular expression.
+ - `tag_list` - a rule for tagged matrics, a simple DSL for easier metric description in graphite format `someName;tag1=value1;tag2=value2`, `someName`, or `tag1=value1;tag2=value2`. The field `regexp` is translated into a `tagged` rule. The sorting by tags' names is unnecessary, ti will be done automatically. A tag's value (but not a name) can be set as a regular expression, e.g. `env=(dev|staging)`.
+- `regexp` – A pattern for the metric name (a regular or DSL).
- `age` – The minimum age of the data in seconds.
- `precision`– How precisely to define the age of the data in seconds. Should be a divisor for 86400 (seconds in a day).
- `function` – The name of the aggregating function to apply to data whose age falls within the range `[age, age + precision]`. Accepted functions: min / max / any / avg. The average is calculated imprecisely, like the average of the averages.
-### Configuration Example {#configuration-example}
+### Configuration Example without rules types {#configuration-example}
``` xml
@@ -167,6 +178,81 @@ Fields for `pattern` and `default` sections:
```
+### Configuration Example with rules types {#configuration-typed-example}
+
+``` xml
+
+ Version
+
+ plain
+ click_cost
+ any
+
+ 0
+ 5
+
+
+ 86400
+ 60
+
+
+
+ tagged
+ ^((.*)|.)min\?
+ min
+
+ 0
+ 5
+
+
+ 86400
+ 60
+
+
+
+ tagged
+
+ min
+
+ 0
+ 5
+
+
+ 86400
+ 60
+
+
+
+ tag_list
+ someName;tag2=value2
+
+ 0
+ 5
+
+
+ 86400
+ 60
+
+
+
+ max
+
+ 0
+ 60
+
+
+ 3600
+ 300
+
+
+ 86400
+ 3600
+
+
+
+```
+
+
!!! warning "Warning"
Data rollup is performed during merges. Usually, for old partitions, merges are not started, so for rollup it is necessary to trigger an unscheduled merge using [optimize](../../../sql-reference/statements/optimize.md). Or use additional tools, for example [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer).
diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md
index 6769f48a466..92865c94475 100644
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@@ -886,3 +886,12 @@ S3 disk can be configured as `main` or `cold` storage:
```
In case of `cold` option a data can be moved to S3 if local disk free size will be smaller than `move_factor * disk_size` or by TTL move rule.
+
+## Virtual Columns {#virtual-columns}
+
+- `_part` — Name of a part.
+- `_part_index` — Sequential index of the part in the query result.
+- `_partition_id` — Name of a partition.
+- `_part_uuid` — Unique part identifier (if enabled MergeTree setting `assign_part_uuids`).
+- `_partition_value` — Values (a tuple) of a `partition by` expression.
+- `_sample_factor` — Sample factor (from the query).
diff --git a/docs/en/engines/table-engines/special/buffer.md b/docs/en/engines/table-engines/special/buffer.md
index 884774cbfae..d1f92d347a4 100644
--- a/docs/en/engines/table-engines/special/buffer.md
+++ b/docs/en/engines/table-engines/special/buffer.md
@@ -54,10 +54,8 @@ If the set of columns in the Buffer table does not match the set of columns in a
If the types do not match for one of the columns in the Buffer table and a subordinate table, an error message is entered in the server log, and the buffer is cleared.
The same thing happens if the subordinate table does not exist when the buffer is flushed.
-If you need to run ALTER for a subordinate table, and the Buffer table, we recommend first deleting the Buffer table, running ALTER for the subordinate table, then creating the Buffer table again.
-
!!! attention "Attention"
- Running ALTER on the Buffer table in releases made before 28 Sep 2020 will cause a `Block structure mismatch` error (see [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117)), so deleting the Buffer table and then recreating is the only option. It is advisable to check that this error is fixed in your release before trying to run ALTER on the Buffer table.
+ Running ALTER on the Buffer table in releases made before 26 Oct 2021 will cause a `Block structure mismatch` error (see [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117) and [#30565](https://github.com/ClickHouse/ClickHouse/pull/30565)), so deleting the Buffer table and then recreating is the only option. It is advisable to check that this error is fixed in your release before trying to run ALTER on the Buffer table.
If the server is restarted abnormally, the data in the buffer is lost.
diff --git a/docs/en/engines/table-engines/special/distributed.md b/docs/en/engines/table-engines/special/distributed.md
index faa1026b919..4d2454298f2 100644
--- a/docs/en/engines/table-engines/special/distributed.md
+++ b/docs/en/engines/table-engines/special/distributed.md
@@ -209,6 +209,8 @@ When querying a `Distributed` table, `SELECT` queries are sent to all shards and
When the `max_parallel_replicas` option is enabled, query processing is parallelized across all replicas within a single shard. For more information, see the section [max_parallel_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas).
+To learn more about how distibuted `in` and `global in` queries are processed, refer to [this](../../../sql-reference/operators/in.md#select-distributed-subqueries) documentation.
+
## Virtual Columns {#virtual-columns}
- `_shard_num` — Contains the `shard_num` value from the table `system.clusters`. Type: [UInt32](../../../sql-reference/data-types/int-uint.md).
diff --git a/docs/en/engines/table-engines/special/url.md b/docs/en/engines/table-engines/special/url.md
index 04f035206b5..26d928085ce 100644
--- a/docs/en/engines/table-engines/special/url.md
+++ b/docs/en/engines/table-engines/special/url.md
@@ -7,18 +7,29 @@ toc_title: URL
Queries data to/from a remote HTTP/HTTPS server. This engine is similar to the [File](../../../engines/table-engines/special/file.md) engine.
-Syntax: `URL(URL, Format)`
+Syntax: `URL(URL [,Format] [,CompressionMethod])`
+
+- The `URL` parameter must conform to the structure of a Uniform Resource Locator. The specified URL must point to a server that uses HTTP or HTTPS. This does not require any additional headers for getting a response from the server.
+
+- The `Format` must be one that ClickHouse can use in `SELECT` queries and, if necessary, in `INSERTs`. For the full list of supported formats, see [Formats](../../../interfaces/formats.md#formats).
+
+- `CompressionMethod` indicates that whether the HTTP body should be compressed. If the compression is enabled, the HTTP packets sent by the URL engine contain 'Content-Encoding' header to indicate which compression method is used.
+
+To enable compression, please first make sure the remote HTTP endpoint indicated by the `URL` parameter supports corresponding compression algorithm.
+
+The supported `CompressionMethod` should be one of following:
+- gzip or gz
+- deflate
+- brotli or br
+- lzma or xz
+- zstd or zst
+- lz4
+- bz2
+- snappy
+- none
## Usage {#using-the-engine-in-the-clickhouse-server}
-The `format` must be one that ClickHouse can use in
-`SELECT` queries and, if necessary, in `INSERTs`. For the full list of supported formats, see
-[Formats](../../../interfaces/formats.md#formats).
-
-The `URL` must conform to the structure of a Uniform Resource Locator. The specified URL must point to a server
-that uses HTTP or HTTPS. This does not require any
-additional headers for getting a response from the server.
-
`INSERT` and `SELECT` queries are transformed to `POST` and `GET` requests,
respectively. For processing `POST` requests, the remote server must support
[Chunked transfer encoding](https://en.wikipedia.org/wiki/Chunked_transfer_encoding).
diff --git a/docs/en/faq/index.md b/docs/en/faq/index.md
index d845b8c5898..891e1ea464e 100644
--- a/docs/en/faq/index.md
+++ b/docs/en/faq/index.md
@@ -25,6 +25,7 @@ Categories:
- **[Operations](../faq/operations/index.md)**
- [Which ClickHouse version to use in production?](../faq/operations/production.md)
- [Is it possible to delete old records from a ClickHouse table?](../faq/operations/delete-old-data.md)
+ - [Does ClickHouse support multi-region replication?](../faq/operations/multi-region-replication.md)
- **[Integration](../faq/integration/index.md)**
- [How do I export data from ClickHouse to a file?](../faq/integration/file-export.md)
- [What if I have a problem with encodings when connecting to Oracle via ODBC?](../faq/integration/oracle-odbc.md)
diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md
index 70a1b8349ff..c03daf45b02 100644
--- a/docs/en/getting-started/install.md
+++ b/docs/en/getting-started/install.md
@@ -69,14 +69,14 @@ You can also download and install packages manually from [here](https://repo.cli
It is recommended to use official pre-compiled `tgz` archives for all Linux distributions, where installation of `deb` or `rpm` packages is not possible.
The required version can be downloaded with `curl` or `wget` from repository https://repo.clickhouse.com/tgz/.
-After that downloaded archives should be unpacked and installed with installation scripts. Example for the latest version:
+After that downloaded archives should be unpacked and installed with installation scripts. Example for the latest stable version:
``` bash
-export LATEST_VERSION=`curl https://api.github.com/repos/ClickHouse/ClickHouse/tags 2>/dev/null | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -n 1`
-curl -O https://repo.clickhouse.com/tgz/clickhouse-common-static-$LATEST_VERSION.tgz
-curl -O https://repo.clickhouse.com/tgz/clickhouse-common-static-dbg-$LATEST_VERSION.tgz
-curl -O https://repo.clickhouse.com/tgz/clickhouse-server-$LATEST_VERSION.tgz
-curl -O https://repo.clickhouse.com/tgz/clickhouse-client-$LATEST_VERSION.tgz
+export LATEST_VERSION=`curl https://api.github.com/repos/ClickHouse/ClickHouse/tags 2>/dev/null | grep stable | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -n 1`
+curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz
+curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz
+curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz
+curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz
tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz
sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh
diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md
index f8f6f26d208..d72fb4d6f17 100644
--- a/docs/en/interfaces/http.md
+++ b/docs/en/interfaces/http.md
@@ -23,11 +23,13 @@ Web UI can be accessed here: `http://localhost:8123/play`.
![Web UI](../images/play.png)
-In health-check scripts use `GET /ping` request. This handler always returns “Ok.” (with a line feed at the end). Available from version 18.12.13.
+In health-check scripts use `GET /ping` request. This handler always returns “Ok.” (with a line feed at the end). Available from version 18.12.13. See also `/replicas_status` to check replica's delay.
``` bash
$ curl 'http://localhost:8123/ping'
Ok.
+$ curl 'http://localhost:8123/replicas_status'
+Ok.
```
Send the request as a URL ‘query’ parameter, or as a POST. Or send the beginning of the query in the ‘query’ parameter, and the rest in the POST (we’ll explain later why this is necessary). The size of the URL is limited to 16 KB, so keep this in mind when sending large queries.
diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md
index 5efa1b971bc..9c7fab7424d 100644
--- a/docs/en/introduction/adopters.md
+++ b/docs/en/introduction/adopters.md
@@ -67,6 +67,7 @@ toc_title: Adopters
| Geniee | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) |
| Genotek | Bioinformatics | Main product | — | — | [Video, August 2020](https://youtu.be/v3KyZbz9lEE) |
| Gigapipe | Managed ClickHouse | Main product | — | — | [Official website](https://gigapipe.com/) |
+| Gigasheet | Analytics | Main product | — | — | Direct Reference, February 2022|
| Glaber | Monitoring | Main product | — | — | [Website](https://glaber.io/) |
| GraphCDN | CDN | Traffic Analytics | — | — | [Blog Post in English, August 2021](https://altinity.com/blog/delivering-insight-on-graphql-apis-with-clickhouse-at-graphcdn/) |
| Grouparoo | Data Warehouse Integrations | Main product | — | — | [Official Website, November 2021](https://www.grouparoo.com/integrations) |
diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md
index fcfc675f9d7..35ec5d858f5 100644
--- a/docs/en/operations/clickhouse-keeper.md
+++ b/docs/en/operations/clickhouse-keeper.md
@@ -108,7 +108,13 @@ Examples of configuration for quorum with three nodes can be found in [integrati
ClickHouse Keeper is bundled into the ClickHouse server package, just add configuration of `` and start ClickHouse server as always. If you want to run standalone ClickHouse Keeper you can start it in a similar way with:
```bash
-clickhouse-keeper --config /etc/your_path_to_config/config.xml --daemon
+clickhouse-keeper --config /etc/your_path_to_config/config.xml
+```
+
+If you don't have the symlink (`clickhouse-keeper`) you can create it or specify `keeper` as argument:
+
+```bash
+clickhouse keeper --config /etc/your_path_to_config/config.xml
```
## Four Letter Word Commands {#four-letter-word-commands}
diff --git a/docs/en/operations/opentelemetry.md b/docs/en/operations/opentelemetry.md
index 6dac8736372..ec27ecfd6b2 100644
--- a/docs/en/operations/opentelemetry.md
+++ b/docs/en/operations/opentelemetry.md
@@ -14,7 +14,7 @@ toc_title: OpenTelemetry Support
ClickHouse accepts trace context HTTP headers, as described by the [W3C recommendation](https://www.w3.org/TR/trace-context/). It also accepts trace context over a native protocol that is used for communication between ClickHouse servers or between the client and server. For manual testing, trace context headers conforming to the Trace Context recommendation can be supplied to `clickhouse-client` using `--opentelemetry-traceparent` and `--opentelemetry-tracestate` flags.
-If no parent trace context is supplied, ClickHouse can start a new trace, with probability controlled by the [opentelemetry_start_trace_probability](../operations/settings/settings.md#opentelemetry-start-trace-probability) setting.
+If no parent trace context is supplied or the provided trace context does not comply with W3C standard above, ClickHouse can start a new trace, with probability controlled by the [opentelemetry_start_trace_probability](../operations/settings/settings.md#opentelemetry-start-trace-probability) setting.
## Propagating the Trace Context
@@ -46,8 +46,8 @@ ENGINE = URL('http://127.0.0.1:9411/api/v2/spans', 'JSONEachRow')
SETTINGS output_format_json_named_tuples_as_objects = 1,
output_format_json_array_of_rows = 1 AS
SELECT
- lower(hex(reinterpretAsFixedString(trace_id))) AS traceId,
- lower(hex(parent_span_id)) AS parentId,
+ lower(hex(trace_id)) AS traceId,
+ case when parent_span_id = 0 then '' else lower(hex(parent_span_id)) end AS parentId,
lower(hex(span_id)) AS id,
operation_name AS name,
start_time_us AS timestamp,
diff --git a/docs/en/operations/optimizing-performance/sampling-query-profiler.md b/docs/en/operations/optimizing-performance/sampling-query-profiler.md
index 9244592d515..72cfa59b8b2 100644
--- a/docs/en/operations/optimizing-performance/sampling-query-profiler.md
+++ b/docs/en/operations/optimizing-performance/sampling-query-profiler.md
@@ -27,7 +27,7 @@ To analyze the `trace_log` system table:
For security reasons, introspection functions are disabled by default.
-- Use the `addressToLine`, `addressToSymbol` and `demangle` [introspection functions](../../sql-reference/functions/introspection.md) to get function names and their positions in ClickHouse code. To get a profile for some query, you need to aggregate data from the `trace_log` table. You can aggregate data by individual functions or by the whole stack traces.
+- Use the `addressToLine`, `addressToLineWithInlines`, `addressToSymbol` and `demangle` [introspection functions](../../sql-reference/functions/introspection.md) to get function names and their positions in ClickHouse code. To get a profile for some query, you need to aggregate data from the `trace_log` table. You can aggregate data by individual functions or by the whole stack traces.
If you need to visualize `trace_log` info, try [flamegraph](../../interfaces/third-party/gui/#clickhouse-flamegraph) and [speedscope](https://github.com/laplab/clickhouse-speedscope).
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 8a0fd618d32..56f1e7fe3bb 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -1803,6 +1803,48 @@ If an INSERTed block is skipped due to deduplication in the source table, there
At the same time, this behaviour “breaks” `INSERT` idempotency. If an `INSERT` into the main table was successful and `INSERT` into a materialized view failed (e.g. because of communication failure with Zookeeper) a client will get an error and can retry the operation. However, the materialized view won’t receive the second insert because it will be discarded by deduplication in the main (source) table. The setting `deduplicate_blocks_in_dependent_materialized_views` allows for changing this behaviour. On retry, a materialized view will receive the repeat insert and will perform a deduplication check by itself,
ignoring check result for the source table, and will insert rows lost because of the first failure.
+## insert_deduplication_token {#insert_deduplication_token}
+
+The setting allows a user to provide own deduplication semantic in MergeTree/ReplicatedMergeTree
+For example, by providing a unique value for the setting in each INSERT statement,
+user can avoid the same inserted data being deduplicated.
+
+Possilbe values:
+
+- Any string
+
+Default value: empty string (disabled)
+
+`insert_deduplication_token` is used for deduplication _only_ when not empty.
+
+Example:
+
+```sql
+CREATE TABLE test_table
+( A Int64 )
+ENGINE = MergeTree
+ORDER BY A
+SETTINGS non_replicated_deduplication_window = 100;
+
+INSERT INTO test_table Values SETTINGS insert_deduplication_token = 'test' (1);
+
+-- the next insert won't be deduplicated because insert_deduplication_token is different
+INSERT INTO test_table Values SETTINGS insert_deduplication_token = 'test1' (1);
+
+-- the next insert will be deduplicated because insert_deduplication_token
+-- is the same as one of the previous
+INSERT INTO test_table Values SETTINGS insert_deduplication_token = 'test' (2);
+
+SELECT * FROM test_table
+
+┌─A─┐
+│ 1 │
+└───┘
+┌─A─┐
+│ 1 │
+└───┘
+```
+
## max_network_bytes {#settings-max-network-bytes}
Limits the data volume (in bytes) that is received or transmitted over the network when executing a query. This setting applies to every individual query.
@@ -2304,7 +2346,7 @@ Possible values:
- 1 — Enabled.
- 0 — Disabled.
-Default value: `0`.
+Default value: `1`.
## output_format_parallel_formatting {#output-format-parallel-formatting}
@@ -2315,7 +2357,7 @@ Possible values:
- 1 — Enabled.
- 0 — Disabled.
-Default value: `0`.
+Default value: `1`.
## min_chunk_bytes_for_parallel_parsing {#min-chunk-bytes-for-parallel-parsing}
diff --git a/docs/en/operations/system-tables/stack_trace.md b/docs/en/operations/system-tables/stack_trace.md
index eb1824a6f66..e2135e4beb6 100644
--- a/docs/en/operations/system-tables/stack_trace.md
+++ b/docs/en/operations/system-tables/stack_trace.md
@@ -2,7 +2,7 @@
Contains stack traces of all server threads. Allows developers to introspect the server state.
-To analyze stack frames, use the `addressToLine`, `addressToSymbol` and `demangle` [introspection functions](../../sql-reference/functions/introspection.md).
+To analyze stack frames, use the `addressToLine`, `addressToLineWithInlines`, `addressToSymbol` and `demangle` [introspection functions](../../sql-reference/functions/introspection.md).
Columns:
diff --git a/docs/en/operations/system-tables/trace_log.md b/docs/en/operations/system-tables/trace_log.md
index 4902b09004d..ab08ef7415c 100644
--- a/docs/en/operations/system-tables/trace_log.md
+++ b/docs/en/operations/system-tables/trace_log.md
@@ -4,7 +4,7 @@ Contains stack traces collected by the sampling query profiler.
ClickHouse creates this table when the [trace_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) server configuration section is set. Also the [query_profiler_real_time_period_ns](../../operations/settings/settings.md#query_profiler_real_time_period_ns) and [query_profiler_cpu_time_period_ns](../../operations/settings/settings.md#query_profiler_cpu_time_period_ns) settings should be set.
-To analyze logs, use the `addressToLine`, `addressToSymbol` and `demangle` introspection functions.
+To analyze logs, use the `addressToLine`, `addressToLineWithInlines`, `addressToSymbol` and `demangle` introspection functions.
Columns:
diff --git a/docs/en/sql-reference/aggregate-functions/reference/studentttest.md b/docs/en/sql-reference/aggregate-functions/reference/studentttest.md
index fd391298bc3..7d8d255e15b 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/studentttest.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/studentttest.md
@@ -10,7 +10,7 @@ Applies Student's t-test to samples from two populations.
**Syntax**
``` sql
-studentTTest(sample_data, sample_index)
+studentTTest([confidence_level])(sample_data, sample_index)
```
Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population.
@@ -21,12 +21,19 @@ The null hypothesis is that means of populations are equal. Normal distribution
- `sample_data` — Sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
- `sample_index` — Sample index. [Integer](../../../sql-reference/data-types/int-uint.md).
+**Parameters**
+
+- `confidence_level` — Confidence level in order to calculate confidence intervals. [Float](../../../sql-reference/data-types/float.md).
+
+
**Returned values**
-[Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
+[Tuple](../../../sql-reference/data-types/tuple.md) with two or four elements (if the optional `confidence_level` is specified):
- calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md).
- calculated p-value. [Float64](../../../sql-reference/data-types/float.md).
+- [calculated confidence-interval-low.] [Float64](../../../sql-reference/data-types/float.md).
+- [calculated confidence-interval-high.] [Float64](../../../sql-reference/data-types/float.md).
**Example**
diff --git a/docs/en/sql-reference/aggregate-functions/reference/welchttest.md b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md
index 62f5761b32e..2e127f87f9f 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/welchttest.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md
@@ -10,7 +10,7 @@ Applies Welch's t-test to samples from two populations.
**Syntax**
``` sql
-welchTTest(sample_data, sample_index)
+welchTTest([confidence_level])(sample_data, sample_index)
```
Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population.
@@ -21,12 +21,18 @@ The null hypothesis is that means of populations are equal. Normal distribution
- `sample_data` — Sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
- `sample_index` — Sample index. [Integer](../../../sql-reference/data-types/int-uint.md).
+**Parameters**
+
+- `confidence_level` — Confidence level in order to calculate confidence intervals. [Float](../../../sql-reference/data-types/float.md).
+
**Returned values**
-[Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
+[Tuple](../../../sql-reference/data-types/tuple.md) with two or four elements (if the optional `confidence_level` is specified)
- calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md).
- calculated p-value. [Float64](../../../sql-reference/data-types/float.md).
+- [calculated confidence-interval-low.] [Float64](../../../sql-reference/data-types/float.md).
+- [calculated confidence-interval-high.] [Float64](../../../sql-reference/data-types/float.md).
**Example**
diff --git a/docs/en/sql-reference/data-types/int-uint.md b/docs/en/sql-reference/data-types/int-uint.md
index 588b5a2d7d6..4cc590d9fa5 100644
--- a/docs/en/sql-reference/data-types/int-uint.md
+++ b/docs/en/sql-reference/data-types/int-uint.md
@@ -1,9 +1,9 @@
---
toc_priority: 40
-toc_title: UInt8, UInt16, UInt32, UInt64, UInt256, Int8, Int16, Int32, Int64, Int128, Int256
+toc_title: UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256
---
-# UInt8, UInt16, UInt32, UInt64, UInt256, Int8, Int16, Int32, Int64, Int128, Int256 {#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64-int128-int256}
+# UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256
Fixed-length integers, with or without a sign.
diff --git a/docs/en/sql-reference/functions/functions-for-nulls.md b/docs/en/sql-reference/functions/functions-for-nulls.md
index 29de9ee4b70..42307093dda 100644
--- a/docs/en/sql-reference/functions/functions-for-nulls.md
+++ b/docs/en/sql-reference/functions/functions-for-nulls.md
@@ -120,7 +120,7 @@ The `mail` and `phone` fields are of type String, but the `icq` field is `UInt32
Get the first available contact method for the customer from the contact list:
``` sql
-SELECT coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook;
+SELECT name, coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook;
```
``` text
diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md
index 21b570c65d4..1be68c6bdd4 100644
--- a/docs/en/sql-reference/functions/introspection.md
+++ b/docs/en/sql-reference/functions/introspection.md
@@ -113,6 +113,111 @@ trace_source_code_lines: /lib/x86_64-linux-gnu/libpthread-2.27.so
/build/glibc-OTsEL5/glibc-2.27/misc/../sysdeps/unix/sysv/linux/x86_64/clone.S:97
```
+## addressToLineWithInlines {#addresstolinewithinlines}
+
+Similar to `addressToLine`, but it will return an Array with all inline functions, and will be much slower as a price.
+
+If you use official ClickHouse packages, you need to install the `clickhouse-common-static-dbg` package.
+
+**Syntax**
+
+``` sql
+addressToLineWithInlines(address_of_binary_instruction)
+```
+
+**Arguments**
+
+- `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process.
+
+**Returned value**
+
+- Array which first element is source code filename and the line number in this file delimited by colon. And from second element, inline functions' source code filename and line number and function name are listed.
+
+- Array with single element which is name of a binary, if the function couldn’t find the debug information.
+
+- Empty array, if the address is not valid.
+
+Type: [Array(String)](../../sql-reference/data-types/array.md).
+
+**Example**
+
+Enabling introspection functions:
+
+``` sql
+SET allow_introspection_functions=1;
+```
+
+Applying the function to address.
+
+```sql
+SELECT addressToLineWithInlines(531055181::UInt64);
+```
+
+``` text
+┌─addressToLineWithInlines(CAST('531055181', 'UInt64'))────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
+│ ['./src/Functions/addressToLineWithInlines.cpp:98','./build_normal_debug/./src/Functions/addressToLineWithInlines.cpp:176:DB::(anonymous namespace)::FunctionAddressToLineWithInlines::implCached(unsigned long) const'] │
+└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
+```
+
+Applying the function to the whole stack trace:
+
+``` sql
+SELECT
+ ta, addressToLineWithInlines(arrayJoin(trace) as ta)
+FROM system.trace_log
+WHERE
+ query_id = '5e173544-2020-45de-b645-5deebe2aae54';
+```
+
+The [arrayJoin](../../sql-reference/functions/array-functions.md#array-functions-join) functions will split array to rows.
+
+``` text
+┌────────ta─┬─addressToLineWithInlines(arrayJoin(trace))───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
+│ 365497529 │ ['./build_normal_debug/./contrib/libcxx/include/string_view:252'] │
+│ 365593602 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:191'] │
+│ 365593866 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │
+│ 365592528 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │
+│ 365591003 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:477'] │
+│ 365590479 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:442'] │
+│ 365590600 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:457'] │
+│ 365598941 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │
+│ 365607098 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │
+│ 365590571 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:451'] │
+│ 365598941 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │
+│ 365607098 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │
+│ 365590571 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:451'] │
+│ 365598941 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │
+│ 365607098 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │
+│ 365590571 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:451'] │
+│ 365598941 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │
+│ 365597289 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:807'] │
+│ 365599840 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:1118'] │
+│ 531058145 │ ['./build_normal_debug/./src/Functions/addressToLineWithInlines.cpp:152'] │
+│ 531055181 │ ['./src/Functions/addressToLineWithInlines.cpp:98','./build_normal_debug/./src/Functions/addressToLineWithInlines.cpp:176:DB::(anonymous namespace)::FunctionAddressToLineWithInlines::implCached(unsigned long) const'] │
+│ 422333613 │ ['./build_normal_debug/./src/Functions/IFunctionAdaptors.h:21'] │
+│ 586866022 │ ['./build_normal_debug/./src/Functions/IFunction.cpp:216'] │
+│ 586869053 │ ['./build_normal_debug/./src/Functions/IFunction.cpp:264'] │
+│ 586873237 │ ['./build_normal_debug/./src/Functions/IFunction.cpp:334'] │
+│ 597901620 │ ['./build_normal_debug/./src/Interpreters/ExpressionActions.cpp:601'] │
+│ 597898534 │ ['./build_normal_debug/./src/Interpreters/ExpressionActions.cpp:718'] │
+│ 630442912 │ ['./build_normal_debug/./src/Processors/Transforms/ExpressionTransform.cpp:23'] │
+│ 546354050 │ ['./build_normal_debug/./src/Processors/ISimpleTransform.h:38'] │
+│ 626026993 │ ['./build_normal_debug/./src/Processors/ISimpleTransform.cpp:89'] │
+│ 626294022 │ ['./build_normal_debug/./src/Processors/Executors/ExecutionThreadContext.cpp:45'] │
+│ 626293730 │ ['./build_normal_debug/./src/Processors/Executors/ExecutionThreadContext.cpp:63'] │
+│ 626169525 │ ['./build_normal_debug/./src/Processors/Executors/PipelineExecutor.cpp:213'] │
+│ 626170308 │ ['./build_normal_debug/./src/Processors/Executors/PipelineExecutor.cpp:178'] │
+│ 626166348 │ ['./build_normal_debug/./src/Processors/Executors/PipelineExecutor.cpp:329'] │
+│ 626163461 │ ['./build_normal_debug/./src/Processors/Executors/PipelineExecutor.cpp:84'] │
+│ 626323536 │ ['./build_normal_debug/./src/Processors/Executors/PullingAsyncPipelineExecutor.cpp:85'] │
+│ 626323277 │ ['./build_normal_debug/./src/Processors/Executors/PullingAsyncPipelineExecutor.cpp:112'] │
+│ 626323133 │ ['./build_normal_debug/./contrib/libcxx/include/type_traits:3682'] │
+│ 626323041 │ ['./build_normal_debug/./contrib/libcxx/include/tuple:1415'] │
+└───────────┴──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
+
+```
+
+
## addressToSymbol {#addresstosymbol}
Converts virtual memory address inside ClickHouse server process to the symbol from ClickHouse object files.
diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md
index 8502fcdcf66..96bceb8958c 100644
--- a/docs/en/sql-reference/functions/tuple-functions.md
+++ b/docs/en/sql-reference/functions/tuple-functions.md
@@ -22,7 +22,7 @@ tuple(x, y, …)
## tupleElement {#tupleelement}
A function that allows getting a column from a tuple.
-‘N’ is the column index, starting from 1. N must be a constant. ‘N’ must be a constant. ‘N’ must be a strict postive integer no greater than the size of the tuple.
+‘N’ is the column index, starting from 1. ‘N’ must be a constant. ‘N’ must be a strict postive integer no greater than the size of the tuple.
There is no cost to execute the function.
The function implements the operator `x.N`.
diff --git a/docs/en/sql-reference/operators/in.md b/docs/en/sql-reference/operators/in.md
index 3d8d2673468..d8468370f3e 100644
--- a/docs/en/sql-reference/operators/in.md
+++ b/docs/en/sql-reference/operators/in.md
@@ -216,6 +216,17 @@ This is more optimal than using the normal IN. However, keep the following point
It also makes sense to specify a local table in the `GLOBAL IN` clause, in case this local table is only available on the requestor server and you want to use data from it on remote servers.
+### Distributed Subqueries and max_rows_in_set
+
+You can use [`max_rows_in_set`](../../operations/settings/query-complexity.md#max-rows-in-set) and [`max_bytes_in_set`](../../operations/settings/query-complexity.md#max-rows-in-set) to control how much data is tranferred during distributed queries.
+
+This is specially important if the `global in` query returns a large amount of data. Consider the following sql -
+```sql
+select * from table1 where col1 global in (select col1 from table2 where )
+```
+
+If `some_predicate` is not selective enough, it will return large amount of data and cause performance issues. In such cases, it is wise to limit the data transfer over the network. Also, note that [`set_overflow_mode`](../../operations/settings/query-complexity.md#set_overflow_mode) is set to `throw` (by default) meaning that an exception is raised when these thresholds are met.
+
### Distributed Subqueries and max_parallel_replicas {#max_parallel_replica-subqueries}
When max_parallel_replicas is greater than 1, distributed queries are further transformed. For example, the following:
diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md
index 2e562e20467..6bb63ea06a6 100644
--- a/docs/en/sql-reference/statements/alter/column.md
+++ b/docs/en/sql-reference/statements/alter/column.md
@@ -197,12 +197,13 @@ ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
## MATERIALIZE COLUMN {#materialize-column}
-Materializes the column in the parts where the column is missing. This is useful in case of creating a new column with complicated `DEFAULT` or `MATERIALIZED` expression. Calculation of the column directly on `SELECT` query can cause bigger request execution time, so it is reasonable to use `MATERIALIZE COLUMN` for such columns. To perform same manipulation for existing column, use `FINAL` modifier.
+Materializes or updates a column with an expression for a default value (`DEFAULT` or `MATERIALIZED`).
+It is used if it is necessary to add or update a column with a complicated expression, because evaluating such an expression directly on `SELECT` executing turns out to be expensive.
Syntax:
```sql
-ALTER TABLE table MATERIALIZE COLUMN col [FINAL];
+ALTER TABLE table MATERIALIZE COLUMN col;
```
**Example**
@@ -211,20 +212,34 @@ ALTER TABLE table MATERIALIZE COLUMN col [FINAL];
DROP TABLE IF EXISTS tmp;
SET mutations_sync = 2;
CREATE TABLE tmp (x Int64) ENGINE = MergeTree() ORDER BY tuple() PARTITION BY tuple();
-INSERT INTO tmp SELECT * FROM system.numbers LIMIT 10;
+INSERT INTO tmp SELECT * FROM system.numbers LIMIT 5;
ALTER TABLE tmp ADD COLUMN s String MATERIALIZED toString(x);
ALTER TABLE tmp MATERIALIZE COLUMN s;
+SELECT groupArray(x), groupArray(s) FROM (select x,s from tmp order by x);
+
+┌─groupArray(x)─┬─groupArray(s)─────────┐
+│ [0,1,2,3,4] │ ['0','1','2','3','4'] │
+└───────────────┴───────────────────────┘
+
+ALTER TABLE tmp MODIFY COLUMN s String MATERIALIZED toString(round(100/x));
+
+INSERT INTO tmp SELECT * FROM system.numbers LIMIT 5,5;
+
SELECT groupArray(x), groupArray(s) FROM tmp;
-```
-**Result:**
+┌─groupArray(x)─────────┬─groupArray(s)──────────────────────────────────┐
+│ [0,1,2,3,4,5,6,7,8,9] │ ['0','1','2','3','4','20','17','14','12','11'] │
+└───────────────────────┴────────────────────────────────────────────────┘
-```sql
-┌─groupArray(x)─────────┬─groupArray(s)─────────────────────────────┐
-│ [0,1,2,3,4,5,6,7,8,9] │ ['0','1','2','3','4','5','6','7','8','9'] │
-└───────────────────────┴───────────────────────────────────────────┘
+ALTER TABLE tmp MATERIALIZE COLUMN s;
+
+SELECT groupArray(x), groupArray(s) FROM tmp;
+
+┌─groupArray(x)─────────┬─groupArray(s)─────────────────────────────────────────┐
+│ [0,1,2,3,4,5,6,7,8,9] │ ['inf','100','50','33','25','20','17','14','12','11'] │
+└───────────────────────┴───────────────────────────────────────────────────────┘
```
**See Also**
diff --git a/docs/en/sql-reference/statements/grant.md b/docs/en/sql-reference/statements/grant.md
index 2b1262f7d3c..1b2b63ba0e7 100644
--- a/docs/en/sql-reference/statements/grant.md
+++ b/docs/en/sql-reference/statements/grant.md
@@ -172,6 +172,7 @@ Hierarchy of privileges:
- `SYSTEM FLUSH LOGS`
- [INTROSPECTION](#grant-introspection)
- `addressToLine`
+ - `addressToLineWithInlines`
- `addressToSymbol`
- `demangle`
- [SOURCES](#grant-sources)
@@ -430,6 +431,7 @@ Allows using [introspection](../../operations/optimizing-performance/sampling-qu
- `INTROSPECTION`. Level: `GROUP`. Aliases: `INTROSPECTION FUNCTIONS`
- `addressToLine`. Level: `GLOBAL`
+ - `addressToLineWithInlines`. Level: `GLOBAL`
- `addressToSymbol`. Level: `GLOBAL`
- `demangle`. Level: `GLOBAL`
diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md
index ee6893812cc..b24f0213e4e 100644
--- a/docs/en/sql-reference/statements/select/order-by.md
+++ b/docs/en/sql-reference/statements/select/order-by.md
@@ -285,7 +285,7 @@ ORDER BY expr [WITH FILL] [FROM const_expr] [TO const_expr] [STEP const_numeric_
`WITH FILL` can be applied for fields with Numeric (all kinds of float, decimal, int) or Date/DateTime types. When applied for `String` fields, missed values are filled with empty strings.
When `FROM const_expr` not defined sequence of filling use minimal `expr` field value from `ORDER BY`.
When `TO const_expr` not defined sequence of filling use maximum `expr` field value from `ORDER BY`.
-When `STEP const_numeric_expr` defined then `const_numeric_expr` interprets `as is` for numeric types as `days` for Date type and as `seconds` for DateTime type.
+When `STEP const_numeric_expr` defined then `const_numeric_expr` interprets `as is` for numeric types, as `days` for Date type, as `seconds` for DateTime type. It also supports [INTERVAL](https://clickhouse.com/docs/en/sql-reference/data-types/special-data-types/interval/) data type representing time and date intervals.
When `STEP const_numeric_expr` omitted then sequence of filling use `1.0` for numeric type, `1 day` for Date type and `1 second` for DateTime type.
Example of a query without `WITH FILL`:
@@ -402,4 +402,85 @@ Result:
└────────────┴────────────┴──────────┘
```
+The following query uses the `INTERVAL` data type of 1 day for each data filled on column `d1`:
+
+``` sql
+SELECT
+ toDate((number * 10) * 86400) AS d1,
+ toDate(number * 86400) AS d2,
+ 'original' AS source
+FROM numbers(10)
+WHERE (number % 3) = 1
+ORDER BY
+ d1 WITH FILL STEP INTERVAL 1 DAY,
+ d2 WITH FILL;
+```
+
+Result:
+```
+┌─────────d1─┬─────────d2─┬─source───┐
+│ 1970-01-11 │ 1970-01-02 │ original │
+│ 1970-01-12 │ 1970-01-01 │ │
+│ 1970-01-13 │ 1970-01-01 │ │
+│ 1970-01-14 │ 1970-01-01 │ │
+│ 1970-01-15 │ 1970-01-01 │ │
+│ 1970-01-16 │ 1970-01-01 │ │
+│ 1970-01-17 │ 1970-01-01 │ │
+│ 1970-01-18 │ 1970-01-01 │ │
+│ 1970-01-19 │ 1970-01-01 │ │
+│ 1970-01-20 │ 1970-01-01 │ │
+│ 1970-01-21 │ 1970-01-01 │ │
+│ 1970-01-22 │ 1970-01-01 │ │
+│ 1970-01-23 │ 1970-01-01 │ │
+│ 1970-01-24 │ 1970-01-01 │ │
+│ 1970-01-25 │ 1970-01-01 │ │
+│ 1970-01-26 │ 1970-01-01 │ │
+│ 1970-01-27 │ 1970-01-01 │ │
+│ 1970-01-28 │ 1970-01-01 │ │
+│ 1970-01-29 │ 1970-01-01 │ │
+│ 1970-01-30 │ 1970-01-01 │ │
+│ 1970-01-31 │ 1970-01-01 │ │
+│ 1970-02-01 │ 1970-01-01 │ │
+│ 1970-02-02 │ 1970-01-01 │ │
+│ 1970-02-03 │ 1970-01-01 │ │
+│ 1970-02-04 │ 1970-01-01 │ │
+│ 1970-02-05 │ 1970-01-01 │ │
+│ 1970-02-06 │ 1970-01-01 │ │
+│ 1970-02-07 │ 1970-01-01 │ │
+│ 1970-02-08 │ 1970-01-01 │ │
+│ 1970-02-09 │ 1970-01-01 │ │
+│ 1970-02-10 │ 1970-01-05 │ original │
+│ 1970-02-11 │ 1970-01-01 │ │
+│ 1970-02-12 │ 1970-01-01 │ │
+│ 1970-02-13 │ 1970-01-01 │ │
+│ 1970-02-14 │ 1970-01-01 │ │
+│ 1970-02-15 │ 1970-01-01 │ │
+│ 1970-02-16 │ 1970-01-01 │ │
+│ 1970-02-17 │ 1970-01-01 │ │
+│ 1970-02-18 │ 1970-01-01 │ │
+│ 1970-02-19 │ 1970-01-01 │ │
+│ 1970-02-20 │ 1970-01-01 │ │
+│ 1970-02-21 │ 1970-01-01 │ │
+│ 1970-02-22 │ 1970-01-01 │ │
+│ 1970-02-23 │ 1970-01-01 │ │
+│ 1970-02-24 │ 1970-01-01 │ │
+│ 1970-02-25 │ 1970-01-01 │ │
+│ 1970-02-26 │ 1970-01-01 │ │
+│ 1970-02-27 │ 1970-01-01 │ │
+│ 1970-02-28 │ 1970-01-01 │ │
+│ 1970-03-01 │ 1970-01-01 │ │
+│ 1970-03-02 │ 1970-01-01 │ │
+│ 1970-03-03 │ 1970-01-01 │ │
+│ 1970-03-04 │ 1970-01-01 │ │
+│ 1970-03-05 │ 1970-01-01 │ │
+│ 1970-03-06 │ 1970-01-01 │ │
+│ 1970-03-07 │ 1970-01-01 │ │
+│ 1970-03-08 │ 1970-01-01 │ │
+│ 1970-03-09 │ 1970-01-01 │ │
+│ 1970-03-10 │ 1970-01-01 │ │
+│ 1970-03-11 │ 1970-01-01 │ │
+│ 1970-03-12 │ 1970-01-08 │ original │
+└────────────┴────────────┴──────────┘
+```
+
[Original article](https://clickhouse.com/docs/en/sql-reference/statements/select/order-by/)
diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md
index 23d57c22586..b71853f29dd 100644
--- a/docs/en/sql-reference/statements/system.md
+++ b/docs/en/sql-reference/statements/system.md
@@ -72,7 +72,7 @@ Reloads all [CatBoost](../../guides/apply-catboost-model.md#applying-catboost-mo
**Syntax**
```sql
-SYSTEM RELOAD MODELS
+SYSTEM RELOAD MODELS [ON CLUSTER cluster_name]
```
## RELOAD MODEL {#query_language-system-reload-model}
@@ -82,7 +82,7 @@ Completely reloads a CatBoost model `model_name` if the configuration was update
**Syntax**
```sql
-SYSTEM RELOAD MODEL
+SYSTEM RELOAD MODEL [ON CLUSTER cluster_name]
```
## RELOAD FUNCTIONS {#query_language-system-reload-functions}
@@ -92,8 +92,8 @@ Reloads all registered [executable user defined functions](../functions/index.md
**Syntax**
```sql
-RELOAD FUNCTIONS
-RELOAD FUNCTION function_name
+RELOAD FUNCTIONS [ON CLUSTER cluster_name]
+RELOAD FUNCTION [ON CLUSTER cluster_name] function_name
```
## DROP DNS CACHE {#query_language-system-drop-dns-cache}
diff --git a/docs/en/sql-reference/statements/use.md b/docs/en/sql-reference/statements/use.md
index 41cba58bb9d..841c23d333d 100644
--- a/docs/en/sql-reference/statements/use.md
+++ b/docs/en/sql-reference/statements/use.md
@@ -3,14 +3,14 @@ toc_priority: 53
toc_title: USE
---
-# USE 语句 {#use}
+# USE Statement {#use}
``` sql
USE db
```
-用于设置会话的当前数据库。
+Lets you set the current database for the session.
-如果查询语句中没有在表名前面以加点的方式指明数据库名, 则用当前数据库进行搜索。
+The current database is used for searching for tables if the database is not explicitly defined in the query with a dot before the table name.
-使用 HTTP 协议时无法进行此查询,因为没有会话的概念。
+This query can’t be made when using the HTTP protocol, since there is no concept of a session.
diff --git a/docs/en/sql-reference/syntax.md b/docs/en/sql-reference/syntax.md
index 207b2b82cd2..19efef3dc6a 100644
--- a/docs/en/sql-reference/syntax.md
+++ b/docs/en/sql-reference/syntax.md
@@ -30,7 +30,7 @@ There may be any number of space symbols between syntactical constructions (incl
ClickHouse supports either SQL-style and C-style comments:
-- SQL-style comments start with `--` and continue to the end of the line, a space after `--` can be omitted.
+- SQL-style comments start with `--`, `#!` or `# ` and continue to the end of the line, a space after `--` and `#!` can be omitted.
- C-style are from `/*` to `*/`and can be multiline, spaces are not required either.
## Keywords {#syntax-keywords}
@@ -106,9 +106,9 @@ In queries, you can check `NULL` using the [IS NULL](../sql-reference/operators/
### Heredoc {#heredeoc}
-A [heredoc](https://en.wikipedia.org/wiki/Here_document) is a way to define a string (often multiline), while maintaining the original formatting. A heredoc is defined as a custom string literal, placed between two `$` symbols, for example `$heredoc$`. A value between two heredocs is processed "as-is".
+A [heredoc](https://en.wikipedia.org/wiki/Here_document) is a way to define a string (often multiline), while maintaining the original formatting. A heredoc is defined as a custom string literal, placed between two `$` symbols, for example `$heredoc$`. A value between two heredocs is processed "as-is".
-You can use a heredoc to embed snippets of SQL, HTML, or XML code, etc.
+You can use a heredoc to embed snippets of SQL, HTML, or XML code, etc.
**Example**
diff --git a/docs/en/whats-new/roadmap.md b/docs/en/whats-new/roadmap.md
index 8872c42818f..54f8f9d68a3 100644
--- a/docs/en/whats-new/roadmap.md
+++ b/docs/en/whats-new/roadmap.md
@@ -5,6 +5,6 @@ toc_title: Roadmap
# Roadmap {#roadmap}
-The roadmap for the year 2021 is published for open discussion [here](https://github.com/ClickHouse/ClickHouse/issues/17623).
+The roadmap for the year 2022 is published for open discussion [here](https://github.com/ClickHouse/ClickHouse/issues/32513).
{## [Original article](https://clickhouse.com/docs/en/roadmap/) ##}
diff --git a/docs/ko/images/column-oriented.gif b/docs/ko/images/column-oriented.gif
new file mode 100644
index 00000000000..d5ac7c82848
Binary files /dev/null and b/docs/ko/images/column-oriented.gif differ
diff --git a/docs/ko/images/logo.svg b/docs/ko/images/logo.svg
new file mode 100644
index 00000000000..b5ab923ff65
--- /dev/null
+++ b/docs/ko/images/logo.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/ko/images/play.png b/docs/ko/images/play.png
new file mode 100644
index 00000000000..b75aebe4089
Binary files /dev/null and b/docs/ko/images/play.png differ
diff --git a/docs/ko/images/row-oriented.gif b/docs/ko/images/row-oriented.gif
new file mode 100644
index 00000000000..41395b5693e
Binary files /dev/null and b/docs/ko/images/row-oriented.gif differ
diff --git a/docs/ko/index.md b/docs/ko/index.md
new file mode 100644
index 00000000000..f2a6396c069
--- /dev/null
+++ b/docs/ko/index.md
@@ -0,0 +1,94 @@
+---
+toc_priority: 0
+toc_title: 목차
+---
+
+# ClickHouse란? {#what-is-clickhouse}
+
+ClickHouse® 는 query의 온라인 분석 처리(OLAP)를 위한 열 지향(column-oriented) 데이터베이스 관리 시스템(DBMS)입니다.
+
+"보통의" 행 지향(row-oriented) DMBS에서는 데이터가 다음과 같은 순서로 저장됩니다.
+
+| row | WatchID | JavaEnable | Title | GoodEvent | EventTime |
+|-----|-------------|------------|--------------------|-----------|---------------------|
+| #0 | 89354350662 | 1 | Investor Relations | 1 | 2016-05-18 05:19:20 |
+| #1 | 90329509958 | 0 | Contact us | 1 | 2016-05-18 08:10:20 |
+| #2 | 89953706054 | 1 | Mission | 1 | 2016-05-18 07:38:00 |
+| #N | … | … | … | … | … |
+
+즉, 행과 관련된 모든 값들은 물리적으로 나란히 저장됩니다.
+
+행 지향(row-oriented) DMBS의 예시로는 MySQL, Postgres, 그리고 MS SQL 서버 등이 있습니다.
+
+열 지향 (column-oriented) DBMS에서는 데이터가 아래와 같은 방식으로 저장됩니다:
+
+| Row: | #0 | #1 | #2 | #N |
+|-------------|---------------------|---------------------|---------------------|-----|
+| WatchID: | 89354350662 | 90329509958 | 89953706054 | … |
+| JavaEnable: | 1 | 0 | 1 | … |
+| Title: | Investor Relations | Contact us | Mission | … |
+| GoodEvent: | 1 | 1 | 1 | … |
+| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | … |
+
+이 예에서는 데이터가 정렬된 순서만을 보여줍니다. 다른 열의 값들은 서로 분리되어 저장되고, 같은 열의 정보들은 함께 저장됩니다.
+
+열 지향(column-oriented) DBMS 의 종류는 Vertica, Paraccel (Actian Matrix and Amazon Redshift), Sybase IQ, Exasol, Infobright, InfiniDB, MonetDB (VectorWise and Actian Vector), LucidDB, SAP HANA, Google Dremel, Google PowerDrill, Druid, 그리고 kdb+ 등이 있습니다.
+
+데이터를 저장하기 위한 서로 다른 순서는 다른 시나리오에 더 적합합니다. 데이터 접근 시나리오는 쿼리가 수행되는 빈도, 비율 및 비율을 나타내거나, 각 쿼리 유형(행, 열 및 바이트)에 대해 읽은 데이터의 양 데이터 읽기와 업데이트 사이의 관계, 데이터의 작업 크기 및 로컬에서 사용되는 방법 트랜잭션이 사용되는지 여부, 트랜잭션이 얼마나 격리되어 있는지, 데이터 복제 및 논리적 무결성에 대한 요구 사항, 각 쿼리 유형에 대한 대기 시간 및 처리량 요구 사항 등이 있습니다.
+
+시스템의 부하가 높을수록 사용 시나리오의 요구 사항에 맞게 시스템 설정을 사용자 지정하는 것이 더 중요하며 이 사용자 지정은 더욱 세분화됩니다. 상당히 다른 시나리오에 똑같이 적합한 시스템은 없습니다. 만약 높은 부하에서 시스템이 넓은 시나리오 집합에 대해 적응한다면 시스템은 모든 시나리오를 모두 제대로 처리하지 못하거나 가능한 시나리오 중 하나 또는 몇 개에 대해서만 잘 작동할 것입니다.
+
+## OLAP 시나리오의 중요 속성들 {#key-properties-of-olap-scenario}
+
+- 요청(request)의 대부분은 읽기 접근에 관한 것입니다.
+- 데이터는 단일 행이 아니라 상당히 큰 일괄 처리(\> 1000개 행)로 업데이트됩니다. 또는 전혀 업데이트되지 않습니다.
+- 데이터는 DB에 추가되지만 수정되지는 않습니다.
+- 읽기의 경우 DB에서 상당히 많은 수의 행이 추출되지만 열은 일부만 추출됩니다.
+- 테이블은 "넓습니다". 이는 열의 수가 많다는 것을 의미합니다.
+- 쿼리는 상대적으로 드뭅니다(일반적으로 서버당 수백 또는 초당 쿼리 미만).
+- 간단한 쿼리의 경우 약 50ms의 대기 시간이 허용됩니다.
+- 열 값은 숫자와 짧은 문자열(예: URL당 60바이트)과 같이 상당히 작습니다
+- 단일 쿼리를 처리할 때 높은 처리량이 필요합니다(서버당 초당 최대 수십억 행).
+- 트랜잭션이 필요하지 않습니다.
+- 데이터 일관성에 대한 요구 사항이 낮습니다.
+- 쿼리당 하나의 큰 테이블이 존재하고 하나를 제외한 모든 테이블은 작습니다.
+- 쿼리 결과가 원본 데이터보다 훨씬 작습니다. 즉, 데이터가 필터링되거나 집계되므로 결과가 단일 서버의 RAM에 꼭 들어맞습니다.
+
+OLAP 시나리오가 다른 일반적인 시나리오(OLTP 또는 키-값 액세스와 같은)와 매우 다르다는 것을 쉽게 알 수 있습니다. 따라서 적절한 성능을 얻으려면 분석 쿼리를 처리하기 위해 OLTP 또는 키-값 DB를 사용하는 것은 의미가 없습니다. 예를 들어 분석에 MongoDB나 Redis를 사용하려고 하면 OLAP 데이터베이스에 비해 성능이 매우 저하됩니다.
+
+## 왜 열 지향 데이터베이스가 OLAP 시나리오에 적합한가{#why-column-oriented-databases-work-better-in-the-olap-scenario}
+
+열 지향(column-oriented) 데이터베이스는 OLAP 시나리오에 더 적합합니다. 대부분의 쿼리를 처리하는 데 있어서 행 지향(row-oriented) 데이터베이스보다 100배 이상 빠릅니다. 그 이유는 아래에 자세히 설명되어 있지만 사실은 시각적으로 더 쉽게 설명할 수 있습니다.
+
+**행 지향 DBMS**
+
+![Row-oriented](images/row-oriented.gif#)
+
+**열 지향 DBMS**
+
+![Column-oriented](images/column-oriented.gif#)
+
+차이가 보이시나요?
+
+### 입출력 {#inputoutput}
+
+1. 분석 쿼리의 경우 적은 수의 테이블 열만 읽어야 합니다. 열 지향 데이터베이스에서는 필요한 데이터만 읽을 수 있습니다. 예를 들어 100개 중 5개의 열이 필요한 경우 I/O가 20배 감소할 것으로 예상할 수 있습니다.
+2. 데이터는 패킷으로 읽히므로 압축하기가 더 쉽습니다. 열의 데이터도 압축하기 쉽습니다. 이것은 I/O의 볼륨을 더욱 감소시킵니다.
+3. 감소된 I/O로 인해 시스템 캐시에 더 많은 데이터가 들어갑니다.
+
+예를 들어, "각 광고 플랫폼에 대한 레코드 수 계산" 쿼리는 압축되지 않은 1바이트를 차지하는 하나의 "광고 플랫폼 ID" 열을 읽어야 합니다. 트래픽의 대부분이 광고 플랫폼에서 발생하지 않은 경우 이 열의 최소 10배 압축을 기대할 수 있습니다. 빠른 압축 알고리즘을 사용하면 초당 최소 몇 기가바이트의 압축되지 않은 데이터의 속도로 데이터 압축 해제가 가능합니다. 즉, 이 쿼리는 단일 서버에서 초당 약 수십억 행의 속도로 처리될 수 있습니다. 이 속도는 정말 실제로 달성됩니다.
+
+### CPU {#cpu}
+
+쿼리를 수행하려면 많은 행을 처리해야 하므로 별도의 행이 아닌 전체 벡터에 대한 모든 연산을 디스패치하거나 쿼리 엔진을 구현하여 디스패치 비용이 거의 들지 않습니다. 반쯤 괜찮은 디스크 하위 시스템에서 이렇게 하지 않으면 쿼리 인터프리터가 불가피하게 CPU를 정지시킵니다. 데이터를 열에 저장하고 가능한 경우 열별로 처리하는 것이 좋습니다.
+
+이를 수행하기위한 두가지 방법이 있습니다.
+
+1. 벡터 엔진. 모든 연산은 별도의 값 대신 벡터에 대해 작성됩니다. 즉, 작업을 자주 호출할 필요가 없으며 파견 비용도 무시할 수 있습니다. 작업 코드에는 최적화된 내부 주기가 포함되어 있습니다.
+2. 코드 생성. 쿼리에 대해 생성된 코드에는 모든 간접 호출이 있습니다.
+
+이것은 단순한 쿼리를 실행할 때 의미가 없기 때문에 "일반" 데이터베이스에서는 수행되지 않습니다. 그러나 예외가 있습니다. 예를 들어 MemSQL은 코드 생성을 사용하여 SQL 쿼리를 처리할 때 대기 시간을 줄입니다. (비교되게, 분석 DBMS는 대기 시간이 아닌 처리량 최적화가 필요합니다.)
+
+CPU 효율성을 위해 쿼리 언어는 선언적(SQL 또는 MDX)이거나 최소한 벡터(J, K)여야 합니다. 쿼리는 최적화를 허용하는 암시적 루프만 포함해야 합니다.
+
+{## [원문](https://clickhouse.com/docs/en/) ##}
diff --git a/docs/ru/development/build-osx.md b/docs/ru/development/build-osx.md
index a1192b509df..48d92501f06 100644
--- a/docs/ru/development/build-osx.md
+++ b/docs/ru/development/build-osx.md
@@ -2,8 +2,13 @@
toc_priority: 65
toc_title: Сборка на Mac OS X
---
+
# Как собрать ClickHouse на Mac OS X {#how-to-build-clickhouse-on-mac-os-x}
+!!! info "Вам не нужно собирать ClickHouse самостоятельно"
+ Вы можете установить предварительно собранный ClickHouse, как описано в [Быстром старте](https://clickhouse.com/#quick-start).
+ Следуйте инструкциям по установке для `macOS (Intel)` или `macOS (Apple Silicon)`.
+
Сборка должна запускаться с x86_64 (Intel) на macOS версии 10.15 (Catalina) и выше в последней версии компилятора Xcode's native AppleClang, Homebrew's vanilla Clang или в GCC-компиляторах.
## Установка Homebrew {#install-homebrew}
diff --git a/docs/ru/engines/table-engines/mergetree-family/graphitemergetree.md b/docs/ru/engines/table-engines/mergetree-family/graphitemergetree.md
index 9cd8eda0b87..117223127c0 100644
--- a/docs/ru/engines/table-engines/mergetree-family/graphitemergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/graphitemergetree.md
@@ -99,13 +99,16 @@ patterns
``` text
pattern
+ rule_type
regexp
function
pattern
+ rule_type
regexp
age + precision
...
pattern
+ rule_type
regexp
function
age + precision
@@ -129,12 +132,20 @@ default
Поля для разделов `pattern` и `default`:
-- `regexp` – шаблон имени метрики.
+- `rule_type` - тип правила (применяется только к метрикам указанных типов), используется для разделения правил проверки плоских/теггированных метрик. Опциональное поле. Значение по умолчанию: `all`.
+Если используются метрики только одного типа или производительность проверки правил некритична, можно не использовать. По умолчанию создается только один тип правил для проверки. Иначе, если хотя бы для одного правила указано отличное от умолчания значение, создаются 2 независимых типа правил - для обычных (классические root.branch.leaf) и теггированных метрик (root.branch.leaf;tag1=value1).
+Правила по умолчанию попадают в оба правила обоих типов.
+Возможные значения:
+ - `all` (default) - универсальное правило, назначается также по умолчанию, если поле не задано
+ - `plain` - правило для плоских метрик (без тегов). Поле `regexp` обрабатывается как регулярное выражение.
+ - `tagged` - правило для теггированных метрик (метрика хранится в БД в формате `someName?tag1=value1&tag2=value2&tag3=value3`), регулярное выражение должно быть отсортированно по именам тегов, первым - значение тега `__name__`, если есть. Поле `regexp` обрабатывается как регулярное выражение.
+ - `tag_list` - правило для теггированных метрик, простой DSL для упрощения задания регулярного выражения в формате тегов graphite `someName;tag1=value1;tag2=value2`, `someName` или `tag1=value1;tag2=value2`. Поле `regexp` транслируется в правило `tagged`. Cортировать по именам тегов не обязательно, оно отсортируется автоматически. Значение тега (но не имя) может быть регулярным выражением (например `env=(dev|staging)`).
+- `regexp` – шаблон имени метрики (регулярное выражение или DSL).
- `age` – минимальный возраст данных в секундах.
- `precision` – точность определения возраста данных в секундах. Должен быть делителем для 86400 (количество секунд в сутках).
- `function` – имя агрегирующей функции, которую следует применить к данным, чей возраст оказался в интервале `[age, age + precision]`. Допустимые функции: min/max/any/avg. Avg вычисляется неточно, как среднее от средних.
-### Пример конфигурации {#configuration-example}
+### Пример конфигурации без разделения типа правил {#configuration-example}
``` xml
@@ -169,6 +180,80 @@ default
```
+### Пример конфигурации c разделением типа правил {#configuration-typed-example}
+
+``` xml
+
+ Version
+
+ plain
+ click_cost
+ any
+
+ 0
+ 5
+
+
+ 86400
+ 60
+
+
+
+ tagged
+ ^((.*)|.)min\?
+ min
+
+ 0
+ 5
+
+
+ 86400
+ 60
+
+
+
+ tagged
+
+ min
+
+ 0
+ 5
+
+
+ 86400
+ 60
+
+
+
+ tag_list
+ someName;tag2=value2
+
+ 0
+ 5
+
+
+ 86400
+ 60
+
+
+
+ max
+
+ 0
+ 60
+
+
+ 3600
+ 300
+
+
+ 86400
+ 3600
+
+
+
+```
+
!!! warning "Внимание"
Прореживание данных производится во время слияний. Обычно для старых партиций слияния не запускаются, поэтому для прореживания надо инициировать незапланированное слияние используя [optimize](../../../sql-reference/statements/optimize.md). Или использовать дополнительные инструменты, например [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer).
diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md
index 4448372c522..3f140f85396 100644
--- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md
@@ -872,3 +872,13 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
```
Если диск сконфигурирован как `cold`, данные будут переноситься в S3 при срабатывании правил TTL или когда свободное место на локальном диске станет меньше порогового значения, которое определяется как `move_factor * disk_size`.
+
+## Виртуальные столбцы {#virtual-columns}
+
+- `_part` — Имя куска.
+- `_part_index` — Номер куска по порядку в результате запроса.
+- `_partition_id` — Имя партиции.
+- `_part_uuid` — Уникальный идентификатор куска (если включена MergeTree настройка `assign_part_uuids`).
+- `_partition_value` — Значения (кортеж) выражения `partition by`.
+- `_sample_factor` — Коэффициент сэмплирования (из запроса).
+
diff --git a/docs/ru/engines/table-engines/special/buffer.md b/docs/ru/engines/table-engines/special/buffer.md
index 0c1ae591ae3..10b4b9645a2 100644
--- a/docs/ru/engines/table-engines/special/buffer.md
+++ b/docs/ru/engines/table-engines/special/buffer.md
@@ -48,10 +48,8 @@ CREATE TABLE merge.hits_buffer AS merge.hits ENGINE = Buffer(merge, hits, 16, 10
Если у одного из столбцов таблицы Buffer и подчинённой таблицы не совпадает тип, то в лог сервера будет записано сообщение об ошибке и буфер будет очищен.
То же самое происходит, если подчинённая таблица не существует в момент сброса буфера.
-Если есть необходимость выполнить ALTER для подчинённой таблицы и для таблицы Buffer, то рекомендуется удалить таблицу Buffer, затем выполнить ALTER подчинённой таблицы, а после создать таблицу Buffer заново.
-
!!! attention "Внимание"
- В релизах до 28 сентября 2020 года выполнение ALTER на таблице Buffer ломает структуру блоков и вызывает ошибку (см. [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117)), поэтому удаление буфера и его пересоздание — единственный вариант миграции для данного движка. Перед выполнением ALTER на таблице Buffer убедитесь, что в вашей версии эта ошибка устранена.
+ В релизах до 26 октября 2021 года выполнение ALTER на таблице Buffer ломает структуру блоков и вызывает ошибку (см. [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117) и [#30565](https://github.com/ClickHouse/ClickHouse/pull/30565)), поэтому удаление буфера и его пересоздание — единственный вариант миграции для данного движка. Перед выполнением ALTER на таблице Buffer убедитесь, что в вашей версии эта ошибка устранена.
При нештатном перезапуске сервера, данные, находящиеся в буфере, будут потеряны.
diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md
index affa90d9840..5e4b7c6bcb7 100644
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@@ -1736,6 +1736,48 @@ ClickHouse генерирует исключение:
Т.е. если `INSERT` в основную таблицу д.б. пропущен (сдедуплицирован), то автоматически не будет вставки и в материализованные представления. Это имплементировано для того, чтобы работали материализованные представления, которые сильно группируют данные основных `INSERT`, до такой степени что блоки вставляемые в материализованные представления получаются одинаковыми для разных `INSERT` в основную таблицу.
Одновременно это «ломает» идемпотентность вставки в материализованные представления. Т.е. если `INSERT` был успешен в основную таблицу и неуспешен в таблицу материализованного представления (напр. из-за сетевого сбоя при коммуникации с Zookeeper), клиент получит ошибку и попытается повторить `INSERT`. Но вставки в материализованные представления произведено не будет, потому что дедупликация сработает на основной таблице. Настройка `deduplicate_blocks_in_dependent_materialized_views` позволяет это изменить. Т.е. при повторном `INSERT` будет произведена дедупликация на таблице материализованного представления, и повторный инсерт вставит данные в таблицу материализованного представления, которые не удалось вставить из-за сбоя первого `INSERT`.
+## insert_deduplication_token {#insert_deduplication_token}
+
+Этот параметр позволяет пользователю указать собственную семантику дедупликации в MergeTree/ReplicatedMergeTree.
+Например, предоставляя уникальное значение параметра в каждом операторе INSERT,
+пользователь может избежать дедупликации одних и тех же вставленных данных.
+
+Возможные значения:
+
+- Любая строка
+
+Значение по умолчанию: пустая строка (выключено).
+
+`insert_deduplication_token` используется для дедупликации _только_ когда значение не пустое
+
+Example:
+
+```sql
+CREATE TABLE test_table
+( A Int64 )
+ENGINE = MergeTree
+ORDER BY A
+SETTINGS non_replicated_deduplication_window = 100;
+
+INSERT INTO test_table Values SETTINGS insert_deduplication_token = 'test' (1);
+
+-- следующая вставка не будет дедуплицирована, потому что insert_deduplication_token отличается
+INSERT INTO test_table Values SETTINGS insert_deduplication_token = 'test1' (1);
+
+-- следующая вставка будет дедуплицирована, потому что insert_deduplication_token
+-- тот же самый, что и один из предыдущих
+INSERT INTO test_table Values SETTINGS insert_deduplication_token = 'test' (2);
+
+SELECT * FROM test_table
+
+┌─A─┐
+│ 1 │
+└───┘
+┌─A─┐
+│ 1 │
+└───┘
+```
+
## count_distinct_implementation {#settings-count_distinct_implementation}
Задаёт, какая из функций `uniq*` используется при выполнении конструкции [COUNT(DISTINCT …)](../../sql-reference/aggregate-functions/reference/count.md#agg_function-count).
@@ -2119,7 +2161,7 @@ ClickHouse генерирует исключение:
- 1 — включен режим параллельного разбора.
- 0 — отключен режим параллельного разбора.
-Значение по умолчанию: `0`.
+Значение по умолчанию: `1`.
## output_format_parallel_formatting {#output-format-parallel-formatting}
@@ -2130,7 +2172,7 @@ ClickHouse генерирует исключение:
- 1 — включен режим параллельного форматирования.
- 0 — отключен режим параллельного форматирования.
-Значение по умолчанию: `0`.
+Значение по умолчанию: `1`.
## min_chunk_bytes_for_parallel_parsing {#min-chunk-bytes-for-parallel-parsing}
diff --git a/docs/ru/sql-reference/statements/alter/column.md b/docs/ru/sql-reference/statements/alter/column.md
index 4de2d067cce..fea4c00ac05 100644
--- a/docs/ru/sql-reference/statements/alter/column.md
+++ b/docs/ru/sql-reference/statements/alter/column.md
@@ -197,12 +197,13 @@ ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
## MATERIALIZE COLUMN {#materialize-column}
-Материализует столбец таблицы в кусках, в которых отсутствуют значения. Используется, если необходимо создать новый столбец со сложным материализованным выражением или выражением для заполнения по умолчанию (`DEFAULT`), потому как вычисление такого столбца прямо во время выполнения запроса `SELECT` оказывается ощутимо затратным. Чтобы совершить ту же операцию для существующего столбца, используйте модификатор `FINAL`.
+Материализует или обновляет столбец таблицы с выражением для значения по умолчанию (`DEFAULT` или `MATERIALIZED`).
+Используется, если необходимо добавить или обновить столбец со сложным выражением, потому как вычисление такого выражения прямо во время выполнения запроса `SELECT` оказывается ощутимо затратным.
Синтаксис:
```sql
-ALTER TABLE table MATERIALIZE COLUMN col [FINAL];
+ALTER TABLE table MATERIALIZE COLUMN col;
```
**Пример**
@@ -211,21 +212,39 @@ ALTER TABLE table MATERIALIZE COLUMN col [FINAL];
DROP TABLE IF EXISTS tmp;
SET mutations_sync = 2;
CREATE TABLE tmp (x Int64) ENGINE = MergeTree() ORDER BY tuple() PARTITION BY tuple();
-INSERT INTO tmp SELECT * FROM system.numbers LIMIT 10;
+INSERT INTO tmp SELECT * FROM system.numbers LIMIT 5;
ALTER TABLE tmp ADD COLUMN s String MATERIALIZED toString(x);
ALTER TABLE tmp MATERIALIZE COLUMN s;
+SELECT groupArray(x), groupArray(s) FROM (select x,s from tmp order by x);
+
+┌─groupArray(x)─┬─groupArray(s)─────────┐
+│ [0,1,2,3,4] │ ['0','1','2','3','4'] │
+└───────────────┴───────────────────────┘
+
+ALTER TABLE tmp MODIFY COLUMN s String MATERIALIZED toString(round(100/x));
+
+INSERT INTO tmp SELECT * FROM system.numbers LIMIT 5,5;
+
SELECT groupArray(x), groupArray(s) FROM tmp;
+
+┌─groupArray(x)─────────┬─groupArray(s)──────────────────────────────────┐
+│ [0,1,2,3,4,5,6,7,8,9] │ ['0','1','2','3','4','20','17','14','12','11'] │
+└───────────────────────┴────────────────────────────────────────────────┘
+
+ALTER TABLE tmp MATERIALIZE COLUMN s;
+
+SELECT groupArray(x), groupArray(s) FROM tmp;
+
+┌─groupArray(x)─────────┬─groupArray(s)─────────────────────────────────────────┐
+│ [0,1,2,3,4,5,6,7,8,9] │ ['inf','100','50','33','25','20','17','14','12','11'] │
+└───────────────────────┴───────────────────────────────────────────────────────┘
```
-**Результат:**
+**Смотрите также**
-```sql
-┌─groupArray(x)─────────┬─groupArray(s)─────────────────────────────┐
-│ [0,1,2,3,4,5,6,7,8,9] │ ['0','1','2','3','4','5','6','7','8','9'] │
-└───────────────────────┴───────────────────────────────────────────┘
-```
+- [MATERIALIZED](../../statements/create/table.md#materialized).
## Ограничения запроса ALTER {#ogranicheniia-zaprosa-alter}
diff --git a/docs/ru/sql-reference/syntax.md b/docs/ru/sql-reference/syntax.md
index 6705b1068fe..7e9260915a8 100644
--- a/docs/ru/sql-reference/syntax.md
+++ b/docs/ru/sql-reference/syntax.md
@@ -28,7 +28,7 @@ INSERT INTO t VALUES (1, 'Hello, world'), (2, 'abc'), (3, 'def')
## Комментарии {#comments}
Поддерживаются комментарии в SQL-стиле и C-стиле.
-Комментарии в SQL-стиле: от `--` до конца строки. Пробел после `--` может не ставиться.
+Комментарии в SQL-стиле: от `--`, `#!` или `# ` до конца строки. Пробел после `--` и `#!` может не ставиться.
Комментарии в C-стиле: от `/*` до `*/`. Такие комментарии могут быть многострочными. Пробелы тоже не обязательны.
## Ключевые слова {#syntax-keywords}
@@ -104,9 +104,9 @@ INSERT INTO t VALUES (1, 'Hello, world'), (2, 'abc'), (3, 'def')
### Heredoc {#heredeoc}
-Синтаксис [heredoc](https://ru.wikipedia.org/wiki/Heredoc-синтаксис) — это способ определения строк с сохранением исходного формата (часто с переносом строки). `Heredoc` задается как произвольный строковый литерал между двумя символами `$`, например `$heredoc$`. Значение между двумя `heredoc` обрабатывается "как есть".
+Синтаксис [heredoc](https://ru.wikipedia.org/wiki/Heredoc-синтаксис) — это способ определения строк с сохранением исходного формата (часто с переносом строки). `Heredoc` задается как произвольный строковый литерал между двумя символами `$`, например `$heredoc$`. Значение между двумя `heredoc` обрабатывается "как есть".
-Синтаксис `heredoc` часто используют для вставки кусков кода SQL, HTML, XML и т.п.
+Синтаксис `heredoc` часто используют для вставки кусков кода SQL, HTML, XML и т.п.
**Пример**
diff --git a/docs/tools/single_page.py b/docs/tools/single_page.py
index cf41e2b78c2..3d32ba30a21 100644
--- a/docs/tools/single_page.py
+++ b/docs/tools/single_page.py
@@ -90,7 +90,10 @@ def concatenate(lang, docs_path, single_page_file, nav):
line)
# If failed to replace the relative link, print to log
- if '../' in line:
+ # But with some exceptions:
+ # - "../src/" -- for cmake-in-clickhouse.md (link to sources)
+ # - "../usr/share" -- changelog entry that has "../usr/share/zoneinfo"
+ if '../' in line and (not '../usr/share' in line) and (not '../src/' in line):
logging.info('Failed to resolve relative link:')
logging.info(path)
logging.info(line)
diff --git a/docs/zh/engines/database-engines/index.md b/docs/zh/engines/database-engines/index.md
index 10be2e0f041..0d844365fbb 100644
--- a/docs/zh/engines/database-engines/index.md
+++ b/docs/zh/engines/database-engines/index.md
@@ -14,7 +14,7 @@ toc_title: Introduction
- [MySQL](../../engines/database-engines/mysql.md)
-- [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md)
+- [MaterializeMySQL](../../engines/database-engines/materialized-mysql.md)
- [Lazy](../../engines/database-engines/lazy.md)
@@ -26,4 +26,6 @@ toc_title: Introduction
- [Replicated](../../engines/database-engines/replicated.md)
+- [SQLite](../../engines/database-engines/sqlite.md)
+
[来源文章](https://clickhouse.com/docs/en/database_engines/)
diff --git a/docs/zh/engines/database-engines/materialized-mysql.md b/docs/zh/engines/database-engines/materialized-mysql.md
deleted file mode 120000
index 02118b85df4..00000000000
--- a/docs/zh/engines/database-engines/materialized-mysql.md
+++ /dev/null
@@ -1 +0,0 @@
-../../../en/engines/database-engines/materialized-mysql.md
\ No newline at end of file
diff --git a/docs/zh/engines/database-engines/materialized-mysql.md b/docs/zh/engines/database-engines/materialized-mysql.md
new file mode 100644
index 00000000000..f654013494a
--- /dev/null
+++ b/docs/zh/engines/database-engines/materialized-mysql.md
@@ -0,0 +1,274 @@
+---
+toc_priority: 29
+toc_title: MaterializedMySQL
+---
+
+# [experimental] MaterializedMySQL {#materialized-mysql}
+
+!!! warning "警告"
+ 这是一个实验性的特性,不应该在生产中使用.
+
+
+创建ClickHouse数据库,包含MySQL中所有的表,以及这些表中的所有数据。
+
+ClickHouse服务器作为MySQL副本工作。它读取binlog并执行DDL和DML查询。
+
+## 创建数据库 {#creating-a-database}
+
+``` sql
+CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster]
+ENGINE = MaterializedMySQL('host:port', ['database' | database], 'user', 'password') [SETTINGS ...]
+[TABLE OVERRIDE table1 (...), TABLE OVERRIDE table2 (...)]
+```
+
+**引擎参数**
+
+- `host:port` — MySQL 服务地址.
+- `database` — MySQL 数据库名称.
+- `user` — MySQL 用户名.
+- `password` — MySQL 用户密码.
+
+**引擎配置**
+
+
+- `max_rows_in_buffer` — 允许在内存中缓存数据的最大行数(对于单个表和无法查询的缓存数据)。当超过这个数字时,数据将被物化。默认值:`65 505`。
+- `max_bytes_in_buffer` - 允许在内存中缓存数据的最大字节数(对于单个表和无法查询的缓存数据)。当超过这个数字时,数据将被物化。默认值: `1 048 576 `。
+- `max_rows_in_buffers` - 允许在内存中缓存数据的最大行数(用于数据库和无法查询的缓存数据)。当超过这个数字时,数据将被物化。默认值: `65 505`。
+- `max_bytes_in_buffers` - 允许在内存中缓存数据的最大字节数(用于数据库和无法查询的缓存数据)。当超过这个数字时,数据将被物化。默认值: `1 048 576`。
+- `max_flush_data_time ` - 允许数据在内存中缓存的最大毫秒数(对于数据库和无法查询的缓存数据)。当超过这个时间,数据将被物化。默认值: `1000`。
+- `max_wait_time_when_mysql_unavailable` - MySQL不可用时的重试间隔(毫秒)。负值禁用重试。默认值:`1000`。
+— `allows_query_when_mysql_lost `—允许在MySQL丢失时查询物化表。默认值:`0`(`false`)。
+
+```sql
+CREATE DATABASE mysql ENGINE = MaterializedMySQL('localhost:3306', 'db', 'user', '***')
+ SETTINGS
+ allows_query_when_mysql_lost=true,
+ max_wait_time_when_mysql_unavailable=10000;
+```
+
+**MySQL服务器端配置**
+
+为了`MaterializedMySQL`的正确工作,有一些必须设置的`MySQL`端配置设置:
+
+- `default_authentication_plugin = mysql_native_password `,因为 `MaterializedMySQL` 只能授权使用该方法。
+- `gtid_mode = on`,因为基于GTID的日志记录是提供正确的 `MaterializedMySQL`复制的强制要求。
+
+!!! attention "注意"
+ 当打开`gtid_mode`时,您还应该指定`enforce_gtid_consistency = on`。
+
+## 虚拟列 {#virtual-columns}
+
+当使用`MaterializeMySQL`数据库引擎时,[ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md)表与虚拟的`_sign`和`_version`列一起使用。
+
+- `_version` — 事务版本. 类型 [UInt64](../../sql-reference/data-types/int-uint.md).
+- `_sign` — 删除标记. 类型 [Int8](../../sql-reference/data-types/int-uint.md). 可能的值:
+ - `1` — 行没有删除,
+ - `-1` — 行已被删除.
+
+## 支持的数据类型 {#data_types-support}
+
+| MySQL | ClickHouse |
+|-------------------------|--------------------------------------------------------------|
+| TINY | [Int8](../../sql-reference/data-types/int-uint.md) |
+| SHORT | [Int16](../../sql-reference/data-types/int-uint.md) |
+| INT24 | [Int32](../../sql-reference/data-types/int-uint.md) |
+| LONG | [UInt32](../../sql-reference/data-types/int-uint.md) |
+| LONGLONG | [UInt64](../../sql-reference/data-types/int-uint.md) |
+| FLOAT | [Float32](../../sql-reference/data-types/float.md) |
+| DOUBLE | [Float64](../../sql-reference/data-types/float.md) |
+| DECIMAL, NEWDECIMAL | [Decimal](../../sql-reference/data-types/decimal.md) |
+| DATE, NEWDATE | [Date](../../sql-reference/data-types/date.md) |
+| DATETIME, TIMESTAMP | [DateTime](../../sql-reference/data-types/datetime.md) |
+| DATETIME2, TIMESTAMP2 | [DateTime64](../../sql-reference/data-types/datetime64.md) |
+| YEAR | [UInt16](../../sql-reference/data-types/int-uint.md) |
+| TIME | [Int64](../../sql-reference/data-types/int-uint.md) |
+| ENUM | [Enum](../../sql-reference/data-types/enum.md) |
+| STRING | [String](../../sql-reference/data-types/string.md) |
+| VARCHAR, VAR_STRING | [String](../../sql-reference/data-types/string.md) |
+| BLOB | [String](../../sql-reference/data-types/string.md) |
+| GEOMETRY | [String](../../sql-reference/data-types/string.md) |
+| BINARY | [FixedString](../../sql-reference/data-types/fixedstring.md) |
+| BIT | [UInt64](../../sql-reference/data-types/int-uint.md) |
+| SET | [UInt64](../../sql-reference/data-types/int-uint.md) |
+
+[Nullable](../../sql-reference/data-types/nullable.md) 已经被支持.
+
+MySQL中的Time 类型,会被ClickHouse转换成微秒来存储
+
+不支持其他类型。如果MySQL表包含此类类型的列,ClickHouse抛出异常"Unhandled data type"并停止复制。
+
+## 规范和推荐用法 {#specifics-and-recommendations}
+
+### 兼容性限制 {#compatibility-restrictions}
+
+除了数据类型的限制之外,还有一些限制与`MySQL`数据库相比有所不同,这应该在复制之前解决:
+
+- `MySQL` 中的每个表都应该包含 `PRIMARY KEY`。
+- 对于表的复制,那些包含 `ENUM` 字段值超出范围的行(在 `ENUM` 签名中指定)将不起作用。
+
+### DDL Queries {#ddl-queries}
+
+MySQL DDL 语句会被转换成对应的ClickHouse DDL 语句,比如: ([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create/index.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md)). 如果ClickHouse 无法解析某些语句DDL 操作,则会跳过。
+
+
+### 数据复制 {#data-replication}
+
+MaterializedMySQL不支持直接的 `INSERT`, `DELETE` 和 `UPDATE` 查询。然而,它们在数据复制方面得到了支持:
+
+- MySQL `INSERT`查询被转换为`_sign=1`的INSERT查询。
+- MySQL `DELETE`查询被转换为`INSERT`,并且`_sign=-1`。
+- 如果主键被修改了,MySQL的 `UPDATE` 查询将被转换为 `INSERT` 带 `_sign=1` 和INSERT 带有_sign=-1;如果主键没有被修改,则转换为`INSERT`和`_sign=1`。
+
+### MaterializedMySQL 数据表查询 {#select}
+
+`SELECT` 查询从 `MaterializedMySQL`表有一些细节:
+
+ - 如果在SELECT查询中没有指定`_version`,则 [FINAL](../../sql-reference/statements/select/from.md#select-from- FINAL)修饰符被使用,所以只有带有 `MAX(_version)`的行会返回每个主键值。
+
+ - 如果在SELECT查询中没有指定 `_sign`,则默认使用 `WHERE _sign=1 `。所以被删除的行不是
+包含在结果集中。
+
+ - 结果包括列注释,以防MySQL数据库表中存在这些列注释。
+
+### 索引转换 {#index-conversion}
+
+在ClickHouse表中,MySQL的 `PRIMARY KEY` 和 `INDEX` 子句被转换为 `ORDER BY` 元组。
+
+ClickHouse只有一个物理排序,由 `order by` 条件决定。要创建一个新的物理排序,请使用[materialized views](../../sql-reference/statements/create/view.md#materialized)。
+
+**注意**
+
+- `_sign=-1` 的行不会被物理地从表中删除。
+- 级联 `UPDATE/DELETE` 查询不支持 `MaterializedMySQL` 引擎,因为他们在 MySQL binlog中不可见的
+— 复制很容易被破坏。
+— 禁止对数据库和表进行手工操作。
+- `MaterializedMySQL` 受[optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert)设置的影响。当MySQL服务器中的一个表发生变化时,数据会合并到 `MaterializedMySQL` 数据库中相应的表中。
+
+### 表重写 {#table-overrides}
+
+表覆盖可用于自定义ClickHouse DDL查询,从而允许您对应用程序进行模式优化。这对于控制分区特别有用,分区对MaterializedMySQL的整体性能非常重要。
+
+这些是你可以对MaterializedMySQL表重写的模式转换操作:
+
+ * 修改列类型。必须与原始类型兼容,否则复制将失败。例如,可以将`UInt32`列修改为`UInt64`,不能将 `String` 列修改为 `Array(String)`。
+ * 修改 [column TTL](../table-engines/mergetree-family/mergetree/#mergetree-column-ttl).
+ * 修改 [column compression codec](../../sql-reference/statements/create/table/#codecs).
+ * 增加 [ALIAS columns](../../sql-reference/statements/create/table/#alias).
+ * 增加 [skipping indexes](../table-engines/mergetree-family/mergetree/#table_engine-mergetree-data_skipping-indexes)
+ * 增加 [projections](../table-engines/mergetree-family/mergetree/#projections).
+ 请注意,当使用 `SELECT ... FINAL ` (MaterializedMySQL默认是这样做的) 时,预测优化是被禁用的,所以这里是受限的, `INDEX ... TYPE hypothesis `[在v21.12的博客文章中描述]](https://clickhouse.com/blog/en/2021/clickhouse-v21.12-released/)可能在这种情况下更有用。
+ * 修改 [PARTITION BY](../table-engines/mergetree-family/custom-partitioning-key/)
+ * 修改 [ORDER BY](../table-engines/mergetree-family/mergetree/#mergetree-query-clauses)
+ * 修改 [PRIMARY KEY](../table-engines/mergetree-family/mergetree/#mergetree-query-clauses)
+ * 增加 [SAMPLE BY](../table-engines/mergetree-family/mergetree/#mergetree-query-clauses)
+ * 增加 [table TTL](../table-engines/mergetree-family/mergetree/#mergetree-query-clauses)
+
+```sql
+CREATE DATABASE db_name ENGINE = MaterializedMySQL(...)
+[SETTINGS ...]
+[TABLE OVERRIDE table_name (
+ [COLUMNS (
+ [col_name [datatype] [ALIAS expr] [CODEC(...)] [TTL expr], ...]
+ [INDEX index_name expr TYPE indextype[(...)] GRANULARITY val, ...]
+ [PROJECTION projection_name (SELECT [GROUP BY] [ORDER BY]), ...]
+ )]
+ [ORDER BY expr]
+ [PRIMARY KEY expr]
+ [PARTITION BY expr]
+ [SAMPLE BY expr]
+ [TTL expr]
+), ...]
+```
+
+示例:
+
+```sql
+CREATE DATABASE db_name ENGINE = MaterializedMySQL(...)
+TABLE OVERRIDE table1 (
+ COLUMNS (
+ userid UUID,
+ category LowCardinality(String),
+ timestamp DateTime CODEC(Delta, Default)
+ )
+ PARTITION BY toYear(timestamp)
+),
+TABLE OVERRIDE table2 (
+ COLUMNS (
+ client_ip String TTL created + INTERVAL 72 HOUR
+ )
+ SAMPLE BY ip_hash
+)
+```
+
+
+`COLUMNS`列表是稀疏的;根据指定修改现有列,添加额外的ALIAS列。不可能添加普通列或实体化列。具有不同类型的已修改列必须可从原始类型赋值。在执行`CREATE DATABASE` 查询时,目前还没有验证这个或类似的问题,因此需要格外小心。
+
+您可以为还不存在的表指定重写。
+
+!!! warning "警告"
+ 如果使用时不小心,很容易用表重写中断复制。例如:
+
+ * 如果一个ALIAS列被添加了一个表覆盖,并且一个具有相同名称的列后来被添加到源MySQL表,在ClickHouse中转换后的ALTER table查询将失败并停止复制。
+ * 目前可以添加引用可空列的覆盖,而非空列是必需的,例如 `ORDER BY` 或 `PARTITION BY`。这将导致CREATE TABLE查询失败,也会导致复制停止。
+
+## 使用示例 {#examples-of-use}
+
+ MySQL 查询语句:
+
+``` sql
+mysql> CREATE DATABASE db;
+mysql> CREATE TABLE db.test (a INT PRIMARY KEY, b INT);
+mysql> INSERT INTO db.test VALUES (1, 11), (2, 22);
+mysql> DELETE FROM db.test WHERE a=1;
+mysql> ALTER TABLE db.test ADD COLUMN c VARCHAR(16);
+mysql> UPDATE db.test SET c='Wow!', b=222;
+mysql> SELECT * FROM test;
+```
+
+```text
+┌─a─┬───b─┬─c────┐
+│ 2 │ 222 │ Wow! │
+└───┴─────┴──────┘
+```
+
+ClickHouse中的数据库,与MySQL服务器交换数据:
+
+创建的数据库和表:
+
+``` sql
+CREATE DATABASE mysql ENGINE = MaterializedMySQL('localhost:3306', 'db', 'user', '***');
+SHOW TABLES FROM mysql;
+```
+
+``` text
+┌─name─┐
+│ test │
+└──────┘
+```
+
+数据插入之后:
+
+``` sql
+SELECT * FROM mysql.test;
+```
+
+``` text
+┌─a─┬──b─┐
+│ 1 │ 11 │
+│ 2 │ 22 │
+└───┴────┘
+```
+
+删除数据后,添加列并更新:
+
+``` sql
+SELECT * FROM mysql.test;
+```
+
+``` text
+┌─a─┬───b─┬─c────┐
+│ 2 │ 222 │ Wow! │
+└───┴─────┴──────┘
+```
+
+[来源文章](https://clickhouse.com/docs/en/engines/database-engines/materialized-mysql/)
diff --git a/docs/zh/engines/database-engines/postgresql.md b/docs/zh/engines/database-engines/postgresql.md
index 12b8133f404..4d2af9182f9 100644
--- a/docs/zh/engines/database-engines/postgresql.md
+++ b/docs/zh/engines/database-engines/postgresql.md
@@ -24,6 +24,7 @@ ENGINE = PostgreSQL('host:port', 'database', 'user', 'password'[, `use_table_cac
- `database` — 远程数据库名次
- `user` — PostgreSQL用户名称
- `password` — PostgreSQL用户密码
+- `schema` - PostgreSQL 模式
- `use_table_cache` — 定义数据库表结构是否已缓存或不进行。可选的。默认值: `0`.
## 支持的数据类型 {#data_types-support}
diff --git a/docs/zh/engines/database-engines/replicated.md b/docs/zh/engines/database-engines/replicated.md
index 9ffebe04571..bd5841491dd 100644
--- a/docs/zh/engines/database-engines/replicated.md
+++ b/docs/zh/engines/database-engines/replicated.md
@@ -31,6 +31,7 @@ CREATE DATABASE testdb ENGINE = Replicated('zoo_path', 'shard_name', 'replica_na
当创建数据库的新副本时,该副本会自己创建表。如果副本已经不可用很长一段时间,并且已经滞后于复制日志-它用ZooKeeper中的当前元数据检查它的本地元数据,将带有数据的额外表移动到一个单独的非复制数据库(以免意外地删除任何多余的东西),创建缺失的表,如果表名已经被重命名,则更新表名。数据在`ReplicatedMergeTree`级别被复制,也就是说,如果表没有被复制,数据将不会被复制(数据库只负责元数据)。
+允许[`ALTER TABLE ATTACH|FETCH|DROP|DROP DETACHED|DETACH PARTITION|PART`](../../sql-reference/statements/alter/partition.md)查询,但不允许复制。数据库引擎将只向当前副本添加/获取/删除分区/部件。但是,如果表本身使用了Replicated表引擎,那么数据将在使用`ATTACH`后被复制。
## 使用示例 {#usage-example}
创建三台主机的集群:
diff --git a/docs/zh/engines/database-engines/sqlite.md b/docs/zh/engines/database-engines/sqlite.md
deleted file mode 120000
index 776734647c2..00000000000
--- a/docs/zh/engines/database-engines/sqlite.md
+++ /dev/null
@@ -1 +0,0 @@
-../../../en/engines/database-engines/sqlite.md
\ No newline at end of file
diff --git a/docs/zh/engines/database-engines/sqlite.md b/docs/zh/engines/database-engines/sqlite.md
new file mode 100644
index 00000000000..48dca38e4af
--- /dev/null
+++ b/docs/zh/engines/database-engines/sqlite.md
@@ -0,0 +1,80 @@
+---
+toc_priority: 32
+toc_title: SQLite
+---
+
+# SQLite {#sqlite}
+
+允许连接到[SQLite](https://www.sqlite.org/index.html)数据库,并支持ClickHouse和SQLite交换数据, 执行 `INSERT` 和 `SELECT` 查询。
+
+## 创建一个数据库 {#creating-a-database}
+
+``` sql
+ CREATE DATABASE sqlite_database
+ ENGINE = SQLite('db_path')
+```
+
+**引擎参数**
+
+- `db_path` — SQLite 数据库文件的路径.
+
+## 数据类型的支持 {#data_types-support}
+
+| SQLite | ClickHouse |
+|---------------|---------------------------------------------------------|
+| INTEGER | [Int32](../../sql-reference/data-types/int-uint.md) |
+| REAL | [Float32](../../sql-reference/data-types/float.md) |
+| TEXT | [String](../../sql-reference/data-types/string.md) |
+| BLOB | [String](../../sql-reference/data-types/string.md) |
+
+## 技术细节和建议 {#specifics-and-recommendations}
+
+SQLite将整个数据库(定义、表、索引和数据本身)存储为主机上的单个跨平台文件。在写入过程中,SQLite会锁定整个数据库文件,因此写入操作是顺序执行的。读操作可以是多任务的。
+SQLite不需要服务管理(如启动脚本)或基于`GRANT`和密码的访问控制。访问控制是通过授予数据库文件本身的文件系统权限来处理的。
+
+## 使用示例 {#usage-example}
+
+数据库在ClickHouse,连接到SQLite:
+
+``` sql
+CREATE DATABASE sqlite_db ENGINE = SQLite('sqlite.db');
+SHOW TABLES FROM sqlite_db;
+```
+
+``` text
+┌──name───┐
+│ table1 │
+│ table2 │
+└─────────┘
+```
+
+展示数据表中的内容:
+
+``` sql
+SELECT * FROM sqlite_db.table1;
+```
+
+``` text
+┌─col1──┬─col2─┐
+│ line1 │ 1 │
+│ line2 │ 2 │
+│ line3 │ 3 │
+└───────┴──────┘
+```
+从ClickHouse表插入数据到SQLite表:
+
+``` sql
+CREATE TABLE clickhouse_table(`col1` String,`col2` Int16) ENGINE = MergeTree() ORDER BY col2;
+INSERT INTO clickhouse_table VALUES ('text',10);
+INSERT INTO sqlite_db.table1 SELECT * FROM clickhouse_table;
+SELECT * FROM sqlite_db.table1;
+```
+
+``` text
+┌─col1──┬─col2─┐
+│ line1 │ 1 │
+│ line2 │ 2 │
+│ line3 │ 3 │
+│ text │ 10 │
+└───────┴──────┘
+```
diff --git a/docs/zh/engines/table-engines/integrations/hive.md b/docs/zh/engines/table-engines/integrations/hive.md
new file mode 100644
index 00000000000..aa2c82d902a
--- /dev/null
+++ b/docs/zh/engines/table-engines/integrations/hive.md
@@ -0,0 +1,416 @@
+---
+toc_priority: 4
+toc_title: Hive
+---
+
+# Hive {#hive}
+
+Hive引擎允许对HDFS Hive表执行 `SELECT` 查询。目前它支持如下输入格式:
+
+-文本:只支持简单的标量列类型,除了 `Binary`
+
+- ORC:支持简单的标量列类型,除了`char`; 只支持 `array` 这样的复杂类型
+
+- Parquet:支持所有简单标量列类型;只支持 `array` 这样的复杂类型
+
+## 创建表 {#creating-a-table}
+
+``` sql
+CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
+(
+ name1 [type1] [ALIAS expr1],
+ name2 [type2] [ALIAS expr2],
+ ...
+) ENGINE = Hive('thrift://host:port', 'database', 'table');
+PARTITION BY expr
+```
+查看[CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query)查询的详细描述。
+
+表的结构可以与原来的Hive表结构有所不同:
+- 列名应该与原来的Hive表相同,但你可以使用这些列中的一些,并以任何顺序,你也可以使用一些从其他列计算的别名列。
+- 列类型与原Hive表的列类型保持一致。
+- “Partition by expression”应与原Hive表保持一致,“Partition by expression”中的列应在表结构中。
+
+**引擎参数**
+
+- `thrift://host:port` — Hive Metastore 地址
+
+- `database` — 远程数据库名.
+
+- `table` — 远程数据表名.
+
+## 使用示例 {#usage-example}
+
+### 如何使用HDFS文件系统的本地缓存
+我们强烈建议您为远程文件系统启用本地缓存。基准测试显示,如果使用缓存,它的速度会快两倍。
+
+在使用缓存之前,请将其添加到 `config.xml`
+``` xml
+
+ true
+ local_cache
+ 559096952
+ 1048576
+
+```
+
+
+- enable: 开启后,ClickHouse将为HDFS (远程文件系统)维护本地缓存。
+- root_dir: 必需的。用于存储远程文件系统的本地缓存文件的根目录。
+- limit_size: 必需的。本地缓存文件的最大大小(单位为字节)。
+- bytes_read_before_flush: 从远程文件系统下载文件时,刷新到本地文件系统前的控制字节数。缺省值为1MB。
+
+当ClickHouse为远程文件系统启用了本地缓存时,用户仍然可以选择不使用缓存,并在查询中设置`use_local_cache_for_remote_fs = 0 `, `use_local_cache_for_remote_fs` 默认为 `false`。
+
+### 查询 ORC 输入格式的Hive 表
+
+#### 在 Hive 中建表
+``` text
+hive > CREATE TABLE `test`.`test_orc`(
+ `f_tinyint` tinyint,
+ `f_smallint` smallint,
+ `f_int` int,
+ `f_integer` int,
+ `f_bigint` bigint,
+ `f_float` float,
+ `f_double` double,
+ `f_decimal` decimal(10,0),
+ `f_timestamp` timestamp,
+ `f_date` date,
+ `f_string` string,
+ `f_varchar` varchar(100),
+ `f_bool` boolean,
+ `f_binary` binary,
+ `f_array_int` array,
+ `f_array_string` array,
+ `f_array_float` array,
+ `f_array_array_int` array>,
+ `f_array_array_string` array>,
+ `f_array_array_float` array>)
+PARTITIONED BY (
+ `day` string)
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
+LOCATION
+ 'hdfs://testcluster/data/hive/test.db/test_orc'
+
+OK
+Time taken: 0.51 seconds
+
+hive > insert into test.test_orc partition(day='2021-09-18') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, current_timestamp(), current_date(), 'hello world', 'hello world', 'hello world', true, 'hello world', array(1, 2, 3), array('hello world', 'hello world'), array(float(1.1), float(1.2)), array(array(1, 2), array(3, 4)), array(array('a', 'b'), array('c', 'd')), array(array(float(1.11), float(2.22)), array(float(3.33), float(4.44)));
+OK
+Time taken: 36.025 seconds
+
+hive > select * from test.test_orc;
+OK
+1 2 3 4 5 6.11 7.22 8 2021-11-05 12:38:16.314 2021-11-05 hello world hello world hello world true hello world [1,2,3] ["hello world","hello world"] [1.1,1.2] [[1,2],[3,4]] [["a","b"],["c","d"]] [[1.11,2.22],[3.33,4.44]] 2021-09-18
+Time taken: 0.295 seconds, Fetched: 1 row(s)
+```
+
+#### 在 ClickHouse 中建表
+
+ClickHouse中的表,从上面创建的Hive表中获取数据:
+
+``` sql
+CREATE TABLE test.test_orc
+(
+ `f_tinyint` Int8,
+ `f_smallint` Int16,
+ `f_int` Int32,
+ `f_integer` Int32,
+ `f_bigint` Int64,
+ `f_float` Float32,
+ `f_double` Float64,
+ `f_decimal` Float64,
+ `f_timestamp` DateTime,
+ `f_date` Date,
+ `f_string` String,
+ `f_varchar` String,
+ `f_bool` Bool,
+ `f_binary` String,
+ `f_array_int` Array(Int32),
+ `f_array_string` Array(String),
+ `f_array_float` Array(Float32),
+ `f_array_array_int` Array(Array(Int32)),
+ `f_array_array_string` Array(Array(String)),
+ `f_array_array_float` Array(Array(Float32)),
+ `day` String
+)
+ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc')
+PARTITION BY day
+
+```
+
+``` sql
+SELECT * FROM test.test_orc settings input_format_orc_allow_missing_columns = 1\G
+```
+
+``` text
+SELECT *
+FROM test.test_orc
+SETTINGS input_format_orc_allow_missing_columns = 1
+
+Query id: c3eaffdc-78ab-43cd-96a4-4acc5b480658
+
+Row 1:
+──────
+f_tinyint: 1
+f_smallint: 2
+f_int: 3
+f_integer: 4
+f_bigint: 5
+f_float: 6.11
+f_double: 7.22
+f_decimal: 8
+f_timestamp: 2021-12-04 04:00:44
+f_date: 2021-12-03
+f_string: hello world
+f_varchar: hello world
+f_bool: true
+f_binary: hello world
+f_array_int: [1,2,3]
+f_array_string: ['hello world','hello world']
+f_array_float: [1.1,1.2]
+f_array_array_int: [[1,2],[3,4]]
+f_array_array_string: [['a','b'],['c','d']]
+f_array_array_float: [[1.11,2.22],[3.33,4.44]]
+day: 2021-09-18
+
+
+1 rows in set. Elapsed: 0.078 sec.
+```
+
+### 查询 Parquest 输入格式的Hive 表
+
+#### 在 Hive 中建表
+``` text
+hive >
+CREATE TABLE `test`.`test_parquet`(
+ `f_tinyint` tinyint,
+ `f_smallint` smallint,
+ `f_int` int,
+ `f_integer` int,
+ `f_bigint` bigint,
+ `f_float` float,
+ `f_double` double,
+ `f_decimal` decimal(10,0),
+ `f_timestamp` timestamp,
+ `f_date` date,
+ `f_string` string,
+ `f_varchar` varchar(100),
+ `f_char` char(100),
+ `f_bool` boolean,
+ `f_binary` binary,
+ `f_array_int` array,
+ `f_array_string` array,
+ `f_array_float` array,
+ `f_array_array_int` array>,
+ `f_array_array_string` array>,
+ `f_array_array_float` array>)
+PARTITIONED BY (
+ `day` string)
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
+LOCATION
+ 'hdfs://testcluster/data/hive/test.db/test_parquet'
+OK
+Time taken: 0.51 seconds
+
+hive > insert into test.test_parquet partition(day='2021-09-18') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, current_timestamp(), current_date(), 'hello world', 'hello world', 'hello world', true, 'hello world', array(1, 2, 3), array('hello world', 'hello world'), array(float(1.1), float(1.2)), array(array(1, 2), array(3, 4)), array(array('a', 'b'), array('c', 'd')), array(array(float(1.11), float(2.22)), array(float(3.33), float(4.44)));
+OK
+Time taken: 36.025 seconds
+
+hive > select * from test.test_parquet;
+OK
+1 2 3 4 5 6.11 7.22 8 2021-12-14 17:54:56.743 2021-12-14 hello world hello world hello world true hello world [1,2,3] ["hello world","hello world"] [1.1,1.2] [[1,2],[3,4]] [["a","b"],["c","d"]] [[1.11,2.22],[3.33,4.44]] 2021-09-18
+Time taken: 0.766 seconds, Fetched: 1 row(s)
+```
+
+#### 在 ClickHouse 中建表
+
+ClickHouse 中的表, 从上面创建的Hive表中获取数据:
+
+``` sql
+CREATE TABLE test.test_parquet
+(
+ `f_tinyint` Int8,
+ `f_smallint` Int16,
+ `f_int` Int32,
+ `f_integer` Int32,
+ `f_bigint` Int64,
+ `f_float` Float32,
+ `f_double` Float64,
+ `f_decimal` Float64,
+ `f_timestamp` DateTime,
+ `f_date` Date,
+ `f_string` String,
+ `f_varchar` String,
+ `f_char` String,
+ `f_bool` Bool,
+ `f_binary` String,
+ `f_array_int` Array(Int32),
+ `f_array_string` Array(String),
+ `f_array_float` Array(Float32),
+ `f_array_array_int` Array(Array(Int32)),
+ `f_array_array_string` Array(Array(String)),
+ `f_array_array_float` Array(Array(Float32)),
+ `day` String
+)
+ENGINE = Hive('thrift://localhost:9083', 'test', 'test_parquet')
+PARTITION BY day
+```
+
+``` sql
+SELECT * FROM test.test_parquet settings input_format_parquet_allow_missing_columns = 1\G
+```
+
+``` text
+SELECT *
+FROM test_parquet
+SETTINGS input_format_parquet_allow_missing_columns = 1
+
+Query id: 4e35cf02-c7b2-430d-9b81-16f438e5fca9
+
+Row 1:
+──────
+f_tinyint: 1
+f_smallint: 2
+f_int: 3
+f_integer: 4
+f_bigint: 5
+f_float: 6.11
+f_double: 7.22
+f_decimal: 8
+f_timestamp: 2021-12-14 17:54:56
+f_date: 2021-12-14
+f_string: hello world
+f_varchar: hello world
+f_char: hello world
+f_bool: true
+f_binary: hello world
+f_array_int: [1,2,3]
+f_array_string: ['hello world','hello world']
+f_array_float: [1.1,1.2]
+f_array_array_int: [[1,2],[3,4]]
+f_array_array_string: [['a','b'],['c','d']]
+f_array_array_float: [[1.11,2.22],[3.33,4.44]]
+day: 2021-09-18
+
+1 rows in set. Elapsed: 0.357 sec.
+```
+
+### 查询文本输入格式的Hive表
+
+#### 在Hive 中建表
+
+``` text
+hive >
+CREATE TABLE `test`.`test_text`(
+ `f_tinyint` tinyint,
+ `f_smallint` smallint,
+ `f_int` int,
+ `f_integer` int,
+ `f_bigint` bigint,
+ `f_float` float,
+ `f_double` double,
+ `f_decimal` decimal(10,0),
+ `f_timestamp` timestamp,
+ `f_date` date,
+ `f_string` string,
+ `f_varchar` varchar(100),
+ `f_char` char(100),
+ `f_bool` boolean,
+ `f_binary` binary,
+ `f_array_int` array,
+ `f_array_string` array,
+ `f_array_float` array,
+ `f_array_array_int` array>,
+ `f_array_array_string` array>,
+ `f_array_array_float` array>)
+PARTITIONED BY (
+ `day` string)
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.mapred.TextInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+LOCATION
+ 'hdfs://testcluster/data/hive/test.db/test_text'
+Time taken: 0.1 seconds, Fetched: 34 row(s)
+
+
+hive > insert into test.test_text partition(day='2021-09-18') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, current_timestamp(), current_date(), 'hello world', 'hello world', 'hello world', true, 'hello world', array(1, 2, 3), array('hello world', 'hello world'), array(float(1.1), float(1.2)), array(array(1, 2), array(3, 4)), array(array('a', 'b'), array('c', 'd')), array(array(float(1.11), float(2.22)), array(float(3.33), float(4.44)));
+OK
+Time taken: 36.025 seconds
+
+hive > select * from test.test_text;
+OK
+1 2 3 4 5 6.11 7.22 8 2021-12-14 18:11:17.239 2021-12-14 hello world hello world hello world true hello world [1,2,3] ["hello world","hello world"] [1.1,1.2] [[1,2],[3,4]] [["a","b"],["c","d"]] [[1.11,2.22],[3.33,4.44]] 2021-09-18
+Time taken: 0.624 seconds, Fetched: 1 row(s)
+```
+
+#### 在 ClickHouse 中建表
+
+
+ClickHouse中的表, 从上面创建的Hive表中获取数据:
+``` sql
+CREATE TABLE test.test_text
+(
+ `f_tinyint` Int8,
+ `f_smallint` Int16,
+ `f_int` Int32,
+ `f_integer` Int32,
+ `f_bigint` Int64,
+ `f_float` Float32,
+ `f_double` Float64,
+ `f_decimal` Float64,
+ `f_timestamp` DateTime,
+ `f_date` Date,
+ `f_string` String,
+ `f_varchar` String,
+ `f_char` String,
+ `f_bool` Bool,
+ `day` String
+)
+ENGINE = Hive('thrift://localhost:9083', 'test', 'test_text')
+PARTITION BY day
+```
+
+``` sql
+SELECT * FROM test.test_text settings input_format_skip_unknown_fields = 1, input_format_with_names_use_header = 1, date_time_input_format = 'best_effort'\G
+```
+
+``` text
+SELECT *
+FROM test.test_text
+SETTINGS input_format_skip_unknown_fields = 1, input_format_with_names_use_header = 1, date_time_input_format = 'best_effort'
+
+Query id: 55b79d35-56de-45b9-8be6-57282fbf1f44
+
+Row 1:
+──────
+f_tinyint: 1
+f_smallint: 2
+f_int: 3
+f_integer: 4
+f_bigint: 5
+f_float: 6.11
+f_double: 7.22
+f_decimal: 8
+f_timestamp: 2021-12-14 18:11:17
+f_date: 2021-12-14
+f_string: hello world
+f_varchar: hello world
+f_char: hello world
+f_bool: true
+day: 2021-09-18
+```
diff --git a/docs/zh/engines/table-engines/integrations/index.md b/docs/zh/engines/table-engines/integrations/index.md
index 0c34ae078a0..5ed4a555f9c 100644
--- a/docs/zh/engines/table-engines/integrations/index.md
+++ b/docs/zh/engines/table-engines/integrations/index.md
@@ -19,3 +19,5 @@ ClickHouse 提供了多种方式来与外部系统集成,包括表引擎。像
- [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md)
- [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md)
- [PostgreSQL](../../../engines/table-engines/integrations/postgresql.md)
+- [SQLite](../../../engines/table-engines/integrations/sqlite.md)
+- [Hive](../../../engines/table-engines/integrations/hive.md)
diff --git a/docs/zh/faq/general/dbms-naming.md b/docs/zh/faq/general/dbms-naming.md
deleted file mode 120000
index 0df856af0ca..00000000000
--- a/docs/zh/faq/general/dbms-naming.md
+++ /dev/null
@@ -1 +0,0 @@
-../../../en/faq/general/dbms-naming.md
\ No newline at end of file
diff --git a/docs/zh/faq/general/dbms-naming.md b/docs/zh/faq/general/dbms-naming.md
new file mode 100644
index 00000000000..8d4353f9322
--- /dev/null
+++ b/docs/zh/faq/general/dbms-naming.md
@@ -0,0 +1,17 @@
+---
+title: "\u201CClickHouse\u201D 有什么含义?"
+toc_hidden: true
+toc_priority: 10
+---
+
+# “ClickHouse” 有什么含义? {#what-does-clickhouse-mean}
+
+它是“**点击**流”和“数据**仓库**”的组合。它来自于Yandex最初的用例。在Metrica网站上,ClickHouse本应该保存人们在互联网上的所有点击记录,现在它仍然在做这项工作。你可以在[ClickHouse history](../../introduction/history.md)页面上阅读更多关于这个用例的信息。
+
+这个由两部分组成的意思有两个结果:
+
+- 唯一正确的写“Click**H** house”的方式是用大写H。
+- 如果需要缩写,请使用“**CH**”。由于一些历史原因,缩写CK在中国也很流行,主要是因为中文中最早的一个关于ClickHouse的演讲使用了这种形式。
+
+!!! info “有趣的事实”
+ 多年后ClickHouse闻名于世, 这种命名方法:结合各有深意的两个词被赞扬为最好的数据库命名方式, 卡内基梅隆大学数据库副教授[Andy Pavlo做的研究](https://www.cs.cmu.edu/~pavlo/blog/2020/03/on-naming-a-database-management-system.html) 。ClickHouse与Postgres共同获得“史上最佳数据库名”奖。
diff --git a/docs/zh/faq/general/how-do-i-contribute-code-to-clickhouse.md b/docs/zh/faq/general/how-do-i-contribute-code-to-clickhouse.md
deleted file mode 120000
index 5ac9a615386..00000000000
--- a/docs/zh/faq/general/how-do-i-contribute-code-to-clickhouse.md
+++ /dev/null
@@ -1 +0,0 @@
-../../../en/faq/general/how-do-i-contribute-code-to-clickhouse.md
\ No newline at end of file
diff --git a/docs/zh/faq/general/how-do-i-contribute-code-to-clickhouse.md b/docs/zh/faq/general/how-do-i-contribute-code-to-clickhouse.md
new file mode 100644
index 00000000000..39d2d639229
--- /dev/null
+++ b/docs/zh/faq/general/how-do-i-contribute-code-to-clickhouse.md
@@ -0,0 +1,17 @@
+---
+title: 我如何为ClickHouse贡献代码?
+toc_hidden: true
+toc_priority: 120
+---
+
+# 我如何为ClickHouse贡献代码? {#how-do-i-contribute-code-to-clickhouse}
+
+ClickHouse是一个开源项目[在GitHub上开发](https://github.com/ClickHouse/ClickHouse)。
+
+按照惯例,贡献指南发布在源代码库根目录的 [CONTRIBUTING.md](https://github.com/ClickHouse/ClickHouse/blob/master/CONTRIBUTING.md)文件中。
+
+如果你想对ClickHouse提出实质性的改变建议,可以考虑[在GitHub上发布一个问题](https://github.com/ClickHouse/ClickHouse/issues/new/choose),解释一下你想做什么,先与维护人员和社区讨论一下。[此类RFC问题的例子](https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aissue+is%3Aopen+rfc)。
+
+如果您的贡献与安全相关,也请查看[我们的安全政策](https://github.com/ClickHouse/ClickHouse/security/policy/)。
+
+
diff --git a/docs/zh/faq/index.md b/docs/zh/faq/index.md
index 9887d2c6c0a..1ba1b792fbd 100644
--- a/docs/zh/faq/index.md
+++ b/docs/zh/faq/index.md
@@ -26,6 +26,7 @@ toc_priority: 76
- **[运维操作](../faq/operations/index.md)**
- [如果想在生产环境部署,需要用哪个版本的 ClickHouse 呢?](../faq/operations/production.md)
- [是否可能从 ClickHouse 数据表中删除所有旧的数据记录?](../faq/operations/delete-old-data.md)
+ - [ClickHouse支持多区域复制吗?](../faq/operations/multi-region-replication.md)
- **[集成开发](../faq/integration/index.md)**
- [如何从 ClickHouse 导出数据到一个文件?](../faq/integration/file-export.md)
- [如果我用ODBC链接Oracle数据库出现编码问题该怎么办?](../faq/integration/oracle-odbc.md)
diff --git a/docs/zh/faq/integration/index.md b/docs/zh/faq/integration/index.md
deleted file mode 120000
index 8323d6218a3..00000000000
--- a/docs/zh/faq/integration/index.md
+++ /dev/null
@@ -1 +0,0 @@
-../../../en/faq/integration/index.md
\ No newline at end of file
diff --git a/docs/zh/faq/integration/index.md b/docs/zh/faq/integration/index.md
new file mode 100644
index 00000000000..2bfd728ec8c
--- /dev/null
+++ b/docs/zh/faq/integration/index.md
@@ -0,0 +1,21 @@
+---
+title: 关于集成ClickHouse和其他系统的问题
+toc_hidden_folder: true
+toc_priority: 4
+toc_title: Integration
+---
+
+# 关于集成ClickHouse和其他系统的问题 {#question-about-integrating-clickhouse-and-other-systems}
+
+问题:
+
+- [如何从 ClickHouse 导出数据到一个文件?](../../faq/integration/file-export.md)
+- [如何导入JSON到ClickHouse?](../../faq/integration/json-import.md)
+- [如果我用ODBC链接Oracle数据库出现编码问题该怎么办?](../../faq/integration/oracle-odbc.md)
+
+
+
+!!! info "没看到你要找的东西吗?"
+ 查看[其他faq类别](../../faq/index.md)或浏览左边栏中的主要文档文章。
+
+{## [原文](https://clickhouse.com/docs/en/faq/integration/) ##}
\ No newline at end of file
diff --git a/docs/zh/faq/operations/index.md b/docs/zh/faq/operations/index.md
deleted file mode 120000
index fd141164fdc..00000000000
--- a/docs/zh/faq/operations/index.md
+++ /dev/null
@@ -1 +0,0 @@
-../../../en/faq/operations/index.md
\ No newline at end of file
diff --git a/docs/zh/faq/operations/index.md b/docs/zh/faq/operations/index.md
new file mode 100644
index 00000000000..cdf4b9622ec
--- /dev/null
+++ b/docs/zh/faq/operations/index.md
@@ -0,0 +1,20 @@
+---
+title: 关于操作ClickHouse服务器和集群的问题
+toc_hidden_folder: true
+toc_priority: 3
+toc_title: Operations
+---
+
+# 关于操作ClickHouse服务器和集群的问题 {#question-about-operating-clickhouse-servers-and-clusters}
+
+问题:
+
+- [如果想在生产环境部署,需要用哪个版本的 ClickHouse 呢?](../../faq/operations/production.md)
+- [是否可能从 ClickHouse 数据表中删除所有旧的数据记录?](../../faq/operations/delete-old-data.md)
+- [ClickHouse支持多区域复制吗?](../../faq/operations/multi-region-replication.md)
+
+
+!!! info "没看到你要找的东西吗?"
+ 查看[其他faq类别](../../faq/index.md)或浏览左边栏中的主要文档文章。
+
+{## [原文](https://clickhouse.com/docs/en/faq/production/) ##}
diff --git a/docs/zh/faq/operations/multi-region-replication.md b/docs/zh/faq/operations/multi-region-replication.md
deleted file mode 120000
index dbc985ee1fb..00000000000
--- a/docs/zh/faq/operations/multi-region-replication.md
+++ /dev/null
@@ -1 +0,0 @@
-../../../en/faq/operations/multi-region-replication.md
\ No newline at end of file
diff --git a/docs/zh/faq/operations/multi-region-replication.md b/docs/zh/faq/operations/multi-region-replication.md
new file mode 100644
index 00000000000..f5ab147bde6
--- /dev/null
+++ b/docs/zh/faq/operations/multi-region-replication.md
@@ -0,0 +1,14 @@
+---
+title: ClickHouse支持多区域复制吗?
+toc_hidden: true
+toc_priority: 30
+---
+
+# ClickHouse支持多区域复制吗? {#does-clickhouse-support-multi-region-replication}
+
+简短的回答是“是的”。然而,我们建议将所有区域/数据中心之间的延迟保持在两位数字范围内,否则,在通过分布式共识协议时,写性能将受到影响。例如,美国海岸之间的复制可能会很好,但美国和欧洲之间就不行。
+
+在配置方面,这与单区域复制没有区别,只是使用位于不同位置的主机作为副本。
+
+更多信息,请参见[关于数据复制的完整文章](../../engines/table-engines/mergetree-family/replication.md)。
+
diff --git a/docs/zh/interfaces/http.md b/docs/zh/interfaces/http.md
index 738b0365f46..16f51eac9a8 100644
--- a/docs/zh/interfaces/http.md
+++ b/docs/zh/interfaces/http.md
@@ -18,6 +18,17 @@ $ curl 'http://localhost:8123/'
Ok.
```
+Web UI 可以通过这个地址访问: `http://localhost:8123/play`.
+在运行状况检查脚本中,使用`GET /ping`请求。这个处理方法总是返回 "Ok"。(以换行结尾)。可从18.12.13版获得。请参见' /replicas_status '检查复制集的延迟。
+
+
+``` bash
+$ curl 'http://localhost:8123/ping'
+Ok.
+$ curl 'http://localhost:8123/replicas_status'
+Ok.
+```
+
通过URL中的 `query` 参数来发送请求,或者发送POST请求,或者将查询的开头部分放在URL的`query`参数中,其他部分放在POST中(我们会在后面解释为什么这样做是有必要的)。URL的大小会限制在16KB,所以发送大型查询时要时刻记住这点。
如果请求成功,将会收到200的响应状态码和响应主体中的结果。
diff --git a/docs/zh/operations/requirements.md b/docs/zh/operations/requirements.md
index c3013f738a2..964d7aa34f4 100644
--- a/docs/zh/operations/requirements.md
+++ b/docs/zh/operations/requirements.md
@@ -1,59 +1,59 @@
---
toc_priority: 44
-toc_title: "要求"
+toc_title: "必备条件"
---
-# 要求 {#requirements}
+# 必备条件 {#requirements}
## CPU {#cpu}
-对于从预构建的deb包进行安装,请使用具有x86_64架构并支持SSE4.2指令的CPU。 要使用不支持SSE4.2或具有AArch64或PowerPC64LE体系结构的处理器运行ClickHouse,您应该从源代码构建ClickHouse。
+如果您使用预编译的DEB/RPM包安装ClickHouse,请使用支持SSE4.2指令集的x86_64架构的CPU。如果需要在不支持SSE4.2指令集的CPU上,或者在AArch64(ARM)和PowerPC64LE(IBM Power)架构上运行ClickHouse,您应该从源码编译ClickHouse。
-ClickHouse实现并行数据处理并使用所有可用的硬件资源。 在选择处理器时,考虑到ClickHouse在具有大量内核但时钟速率较低的配置中的工作效率要高于具有较少内核和较高时钟速率的配置。 例如,具有2600MHz的16核心优于具有3600MHz的8核心。
+ClickHouse实现了并行数据处理,处理时会使用所有的可用资源。在选择处理器时,请注意:ClickHouse在具有大量计算核、时钟频率稍低的平台上比计算核少、时钟频率高的平台上效率更高。例如,ClickHouse在16核 2.6GHz的CPU上运行速度高于8核 3.6GHz的CPU。
-建议使用 **睿频加速** 和 **超线程** 技术。 它显着提高了典型工作负载的性能。
+建议使用 **睿频加速** 和 **超线程** 技术。 它显着提高了正常工作负载的性能。
## RAM {#ram}
-我们建议使用至少4GB的RAM来执行重要的查询。 ClickHouse服务器可以使用少得多的RAM运行,但它需要处理查询的内存。
+我们建议使用至少4GB的内存来执行重要的查询。 ClickHouse服务器可以使用很少的内存运行,但它需要一定量的内存用于处理查询。
-RAM所需的体积取决于:
+ClickHouse所需内存取决于:
-- 查询的复杂性。
-- 查询中处理的数据量。
+- 查询的复杂程度。
+- 查询处理的数据量。
-要计算所需的RAM体积,您应该估计临时数据的大小 [GROUP BY](../sql-reference/statements/select/group-by.md#select-group-by-clause), [DISTINCT](../sql-reference/statements/select/distinct.md#select-distinct), [JOIN](../sql-reference/statements/select/join.md#select-join) 和您使用的其他操作。
+要计算所需的内存大小,您应该考虑用于[GROUP BY](../sql-reference/statements/select/group-by.md#select-group-by-clause)、[DISTINCT](../sql-reference/statements/select/distinct.md#select-distinct)、[JOIN](../sql-reference/statements/select/join.md#select-join) 和其他操作所需的临时数据量。
-ClickHouse可以使用外部存储器来存储临时数据。看 [在外部存储器中分组](../sql-reference/statements/select/group-by.md#select-group-by-in-external-memory) 有关详细信息。
+ClickHouse可以使用外部存储器来存储临时数据。详情请见[在外部存储器中分组](../sql-reference/statements/select/group-by.md#select-group-by-in-external-memory)。
## 交换文件 {#swap-file}
-禁用生产环境的交换文件。
+请在生产环境禁用交换文件。
## 存储子系统 {#storage-subsystem}
您需要有2GB的可用磁盘空间来安装ClickHouse。
-数据所需的存储量应单独计算。 评估应包括:
+数据所需的存储空间应单独计算。预估存储容量时请考虑:
-- 估计数据量。
+- 数据量
- 您可以采取数据的样本并从中获取行的平均大小。 然后将该值乘以计划存储的行数。
+ 您可以对数据进行采样并计算每行的平均占用空间。然后将该值乘以计划存储的行数。
-- 数据压缩系数。
+- 数据压缩比
- 要估计数据压缩系数,请将数据的样本加载到ClickHouse中,并将数据的实际大小与存储的表的大小进行比较。 例如,点击流数据通常被压缩6-10倍。
+ 要计算数据压缩比,请将样本数据写入ClickHouse,并将原始数据大小与ClickHouse实际存储的数据进行比较。例如,用户点击行为的原始数据压缩比通常为6-10。
-要计算要存储的最终数据量,请将压缩系数应用于估计的数据量。 如果计划将数据存储在多个副本中,则将估计的量乘以副本数。
+请将原始数据的大小除以压缩比来获得实际所需存储的大小。如果您打算将数据存放于几个副本中,请将存储容量乘上副本数。
## 网络 {#network}
-如果可能的话,使用10G或更高级别的网络。
+如果可能的话,请使用10G或更高级别的网络。
-网络带宽对于处理具有大量中间结果数据的分布式查询至关重要。 此外,网络速度会影响复制过程。
+网络带宽对于处理具有大量中间结果数据的分布式查询至关重要。此外,网络速度会影响复制过程。
## 软件 {#software}
-ClickHouse主要是为Linux系列操作系统开发的。 推荐的Linux发行版是Ubuntu。 `tzdata` 软件包应安装在系统中。
+ClickHouse主要是为Linux系列操作系统开发的。推荐的Linux发行版是Ubuntu。您需要检查`tzdata`(对于Ubuntu)软件包是否在安装ClickHouse之前已经安装。
-ClickHouse也可以在其他操作系统系列中工作。 查看详细信息 [开始](../getting-started/index.md) 文档的部分。
+ClickHouse也可以在其他操作系统系列中工作。详情请查看[开始](../getting-started/index.md)。
diff --git a/docs/zh/operations/system-tables/query_thread_log.md b/docs/zh/operations/system-tables/query_thread_log.md
index 33583f3b730..64f9ed27393 100644
--- a/docs/zh/operations/system-tables/query_thread_log.md
+++ b/docs/zh/operations/system-tables/query_thread_log.md
@@ -1,67 +1,62 @@
----
-machine_translated: true
-machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
----
-
# 系统。query_thread_log {#system_tables-query_thread_log}
包含有关执行查询的线程的信息,例如,线程名称、线程开始时间、查询处理的持续时间。
-开始记录:
+开启日志功能:
-1. 在配置参数 [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) 科。
-2. 设置 [log_query_threads](../../operations/settings/settings.md#settings-log-query-threads) 到1。
+1. 在配置参数 [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) 部分。
+2. 设置 [log_query_threads](../../operations/settings/settings.md#settings-log-query-threads) 为1。
-数据的冲洗周期设置在 `flush_interval_milliseconds` 的参数 [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) 服务器设置部分。 要强制冲洗,请使用 [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) 查询。
+数据从缓存写入数据表周期时间参数 `flush_interval_milliseconds` 位于 [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) 服务器设置部分。如果需要强制从缓存写入数据表,请使用 [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) 查询请求。
-ClickHouse不会自动从表中删除数据。 看 [导言](../../operations/system-tables/index.md#system-tables-introduction) 欲了解更多详情。
+ClickHouse不会自动从表中删除数据。 欲了解更多详情,请参照 [介绍](../../operations/system-tables/index.md#system-tables-introduction)。
列:
-- `event_date` ([日期](../../sql-reference/data-types/date.md)) — The date when the thread has finished execution of the query.
-- `event_time` ([日期时间](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query.
-- `query_start_time` ([日期时间](../../sql-reference/data-types/datetime.md)) — Start time of query execution.
-- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution.
-- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read rows.
-- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read bytes.
-- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` 查询,写入的行数。 对于其他查询,列值为0。
-- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` 查询时,写入的字节数。 对于其他查询,列值为0。
-- `memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The difference between the amount of allocated and freed memory in context of this thread.
-- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this thread.
-- `thread_name` ([字符串](../../sql-reference/data-types/string.md)) — Name of the thread.
-- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Internal thread ID.
-- `thread_id` ([Int32](../../sql-reference/data-types/int-uint.md)) — thread ID.
-- `master_thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — OS initial ID of initial thread.
-- `query` ([字符串](../../sql-reference/data-types/string.md)) — Query string.
-- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Query type. Possible values:
- - 1 — Query was initiated by the client.
- - 0 — Query was initiated by another query for distributed query execution.
-- `user` ([字符串](../../sql-reference/data-types/string.md)) — Name of the user who initiated the current query.
-- `query_id` ([字符串](../../sql-reference/data-types/string.md)) — ID of the query.
-- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that was used to make the query.
-- `port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the query.
-- `initial_user` ([字符串](../../sql-reference/data-types/string.md)) — Name of the user who ran the initial query (for distributed query execution).
-- `initial_query_id` ([字符串](../../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution).
-- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that the parent query was launched from.
-- `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the parent query.
-- `interface` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Interface that the query was initiated from. Possible values:
+- `event_date` ([日期](../../sql-reference/data-types/date.md)) — 该查询线程执行完成的日期。
+- `event_time` ([日期时间](../../sql-reference/data-types/datetime.md)) — 该查询线程执行完成的时间。
+- `query_start_time` ([日期时间](../../sql-reference/data-types/datetime.md)) — 查询的开始时间。
+- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 查询执行持续的时间。
+- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 读取的行数。
+- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 读取的字节数。
+- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 对于 `INSERT` 查询,写入的行数。 对于其他查询,为0。
+- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 对于 `INSERT` 查询,写入的字节数。 对于其他查询,为0。
+- `memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — 在线程上下文,分配的内存和空闲内存之差。
+- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — 在线程上下文,分配的内存和空闲内存之差的最大值。
+- `thread_name` ([字符串](../../sql-reference/data-types/string.md)) — 线程名。
+- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — 内部线程ID。
+- `thread_id` ([Int32](../../sql-reference/data-types/int-uint.md)) — 线程ID。
+- `master_thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — OS初始线程的初始ID。
+- `query` ([字符串](../../sql-reference/data-types/string.md)) — 查询语句。
+- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 查询类型,可能的值:
+ - 1 — 由用户发起的查询。
+ - 0 — 由其他查询发起的分布式查询。
+- `user` ([字符串](../../sql-reference/data-types/string.md)) — 发起查询的用户名。
+- `query_id` ([字符串](../../sql-reference/data-types/string.md)) — 查询的ID。
+- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — 发起查询的IP地址。
+- `port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 发起查询的端口。
+- `initial_user` ([字符串](../../sql-reference/data-types/string.md)) — 首次发起查询的用户名(对于分布式查询)。
+- `initial_query_id` ([字符串](../../sql-reference/data-types/string.md)) — 首次发起查询的ID(对于分布式查询)。
+- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — 发起该查询的父查询IP地址。
+- `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 发起该查询的父查询端口。
+- `interface` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 发起查询的界面,可能的值:
- 1 — TCP.
- 2 — HTTP.
-- `os_user` ([字符串](../../sql-reference/data-types/string.md)) — OS's username who runs [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md).
-- `client_hostname` ([字符串](../../sql-reference/data-types/string.md)) — Hostname of the client machine where the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或者运行另一个TCP客户端。
-- `client_name` ([字符串](../../sql-reference/data-types/string.md)) — The [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端名称。
-- `client_revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Revision of the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端。
-- `client_version_major` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Major version of the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端。
-- `client_version_minor` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Minor version of the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端。
-- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Patch component of the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端版本。
-- `http_method` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — HTTP method that initiated the query. Possible values:
- - 0 — The query was launched from the TCP interface.
+- `os_user` ([字符串](../../sql-reference/data-types/string.md)) — 使用 [clickhouse-client](../../interfaces/cli.md) 的系统用户名。
+- `client_hostname` ([字符串](../../sql-reference/data-types/string.md)) — 运行 [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的主机名。
+- `client_name` ([字符串](../../sql-reference/data-types/string.md)) — [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的名称。
+- `client_revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的修订号。
+- `client_version_major` ([UInt32](../../sql-reference/data-types/int-uint.md)) — [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的主版本号。
+- `client_version_minor` ([UInt32](../../sql-reference/data-types/int-uint.md)) — [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的次版本号。
+- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的补丁版本号。
+- `http_method` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 发起查询的HTTP方法,可能的值:
+ - 0 — 查询通过TCP界面发起。
- 1 — `GET` 方法被使用。
- 2 — `POST` 方法被使用。
-- `http_user_agent` ([字符串](../../sql-reference/data-types/string.md)) — The `UserAgent` http请求中传递的标头。
-- `quota_key` ([字符串](../../sql-reference/data-types/string.md)) — The “quota key” 在指定 [配额](../../operations/quotas.md) 设置(见 `keyed`).
-- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision.
-- `ProfileEvents` ([数组(字符串, UInt64)](../../sql-reference/data-types/array.md)) — Counters that measure different metrics for this thread. The description of them could be found in the table [系统。活动](#system_tables-events).
+- `http_user_agent` ([字符串](../../sql-reference/data-types/string.md)) — `UserAgent` HTTP请求中传递的UA表头。
+- `quota_key` ([字符串](../../sql-reference/data-types/string.md)) — “quota key” 在 [配额](../../operations/quotas.md) 设置内(详见 `keyed`).
+- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse 修订版本号.
+- `ProfileEvents` ([数组(字符串, UInt64)](../../sql-reference/data-types/array.md)) — 对于该线程的多个指标计数器。这一项可以参考 [system.events](#system_tables-events).
**示例**
@@ -113,4 +108,5 @@ ProfileEvents: {'Query':1,'SelectQuery':1,'ReadCompressedBytes':36,'Compr
**另请参阅**
-- [系统。query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — Description of the `query_log` 系统表,其中包含有关查询执行的公共信息。
+- [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — `query_log` 系统表描述,其中包含有关查询执行的公共信息。
+- [system.query_views_log](../../operations/system-tables/query_views_log.md#system_tables-query_views_log) — 这个表包含在查询线程中使用的各个视图的信息。
diff --git a/docs/zh/sql-reference/ansi.md b/docs/zh/sql-reference/ansi.md
index 0e7fa1d06c3..5aad2cf52a8 100644
--- a/docs/zh/sql-reference/ansi.md
+++ b/docs/zh/sql-reference/ansi.md
@@ -1,180 +1,189 @@
---
-machine_translated: true
-machine_translated_rev: ad252bbb4f7e2899c448eb42ecc39ff195c8faa1
toc_priority: 40
toc_title: "ANSI\u517C\u5BB9\u6027"
---
-# Ansi Sql兼容性的ClickHouse SQL方言 {#ansi-sql-compatibility-of-clickhouse-sql-dialect}
+# ClickHouse SQL方言 与ANSI SQL的兼容性{#ansi-sql-compatibility-of-clickhouse-sql-dialect}
!!! note "注"
- 本文依赖于表38, “Feature taxonomy and definition for mandatory features”, Annex F of ISO/IEC CD 9075-2:2013.
+ 本文参考Annex G所著的[ISO/IEC CD 9075-2:2011](https://www.iso.org/obp/ui/#iso:std:iso-iec:9075:-2:ed-4:v1:en:sec:8)标准.
## 行为差异 {#differences-in-behaviour}
-下表列出了查询功能在ClickHouse中有效但不符合ANSI SQL标准的情况。
+下表列出了ClickHouse能够使用,但与ANSI SQL规定有差异的查询特性。
-| Feature ID | 功能名称 | 差异 |
-|------------|--------------------|---------------------------------------------------------------------|
-| E011 | 数值(Numeric)数据类型 | 带小数点的数值文字被解释为近似值 (`Float64`)而不是精确值 (`Decimal`) |
-| E051-05 | SELECT字段可以重命名 | 字段不仅仅在SELECT结果中可被重命名 |
-| E141-01 | 非空约束 | 表中每一列默认为`NOT NULL` |
-| E011-04 | 算术运算符 | ClickHouse不会检查算法,并根据自定义规则更改结果数据类型,而是会溢出 |
+| 功能ID | 功能名称 | 差异 |
+| ------- | --------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| E011 | 数值型数据类型 | 带小数点的数字被视为近似值 (`Float64`)而不是精确值 (`Decimal`) |
+| E051-05 | SELECT 的列可以重命名 | 字段重命名的作用范围不限于进行重命名的SELECT子查询(参考[表达式别名](https://clickhouse.com/docs/zh/sql-reference/syntax/#notes-on-usage)) |
+| E141-01 | NOT NULL(非空)约束 | ClickHouse表中每一列默认为`NOT NULL` |
+| E011-04 | 算术运算符 | ClickHouse在运算时会进行溢出,而不是四舍五入。此外会根据自定义规则修改结果数据类型(参考[溢出检查](https://clickhouse.com/docs/zh/sql-reference/data-types/decimal/#yi-chu-jian-cha)) |
-## 功能匹配 {#feature-status}
+## 功能状态 {#feature-status}
-| Feature ID | 功能名称 | 匹配 | 评论 |
-|------------|----------------------------------------------------------------|--------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| **E011** | **数字数据类型** | **部分**{.text-warning} | |
-| E011-01 | 整型和小型数据类型 | 是 {.text-success} | |
-| E011-02 | 真实、双精度和浮点数据类型数据类型 | 部分 {.text-warning} | `FLOAT()`, `REAL` 和 `DOUBLE PRECISION` 不支持 |
-| E011-03 | 十进制和数值数据类型 | 部分 {.text-warning} | 只有 `DECIMAL(p,s)` 支持,而不是 `NUMERIC` |
-| E011-04 | 算术运算符 | 是 {.text-success} | |
-| E011-05 | 数字比较 | 是 {.text-success} | |
-| E011-06 | 数字数据类型之间的隐式转换 | 否。 {.text-danger} | ANSI SQL允许在数值类型之间进行任意隐式转换,而ClickHouse依赖于具有多个重载的函数而不是隐式转换 |
-| **E021** | **字符串类型** | **部分**{.text-warning} | |
-| E021-01 | 字符数据类型 | 否。 {.text-danger} | |
-| E021-02 | 字符变化数据类型 | 否。 {.text-danger} | `String` 行为类似,但括号中没有长度限制 |
-| E021-03 | 字符文字 | 部分 {.text-warning} | 不自动连接连续文字和字符集支持 |
-| E021-04 | 字符长度函数 | 部分 {.text-warning} | 非也。 `USING` 条款 |
-| E021-05 | OCTET_LENGTH函数 | 非也。 {.text-danger} | `LENGTH` 表现类似 |
-| E021-06 | SUBSTRING | 部分 {.text-warning} | 不支持 `SIMILAR` 和 `ESCAPE` 条款,否 `SUBSTRING_REGEX` 备选案文 |
-| E021-07 | 字符串联 | 部分 {.text-warning} | 非也。 `COLLATE` 条款 |
-| E021-08 | 上下功能 | 是 {.text-success} | |
-| E021-09 | 修剪功能 | 是 {.text-success} | |
-| E021-10 | 固定长度和可变长度字符串类型之间的隐式转换 | 否。 {.text-danger} | ANSI SQL允许在字符串类型之间进行任意隐式转换,而ClickHouse依赖于具有多个重载的函数而不是隐式转换 |
-| E021-11 | 职位功能 | 部分 {.text-warning} | 不支持 `IN` 和 `USING` 条款,否 `POSITION_REGEX` 备选案文 |
-| E021-12 | 字符比较 | 是 {.text-success} | |
-| **E031** | **标识符** | **部分**{.text-warning} | |
-| E031-01 | 分隔标识符 | 部分 {.text-warning} | Unicode文字支持有限 |
-| E031-02 | 小写标识符 | 是 {.text-success} | |
-| E031-03 | 尾部下划线 | 是 {.text-success} | |
-| **E051** | **基本查询规范** | **部分**{.text-warning} | |
-| E051-01 | SELECT DISTINCT | 是 {.text-success} | |
-| E051-02 | GROUP BY子句 | 是 {.text-success} | |
-| E051-04 | 分组依据可以包含不在列 `