Merge branch 'master' into ephemeral-column

This commit is contained in:
Nikolai Kochetov 2022-02-15 10:03:34 +00:00
commit ab288642f6
512 changed files with 11275 additions and 3405 deletions

View File

@ -12,6 +12,7 @@ BraceWrapping:
AfterUnion: true
BeforeCatch: true
BeforeElse: true
BeforeLambdaBody: true
IndentBraces: false
BreakConstructorInitializersBeforeComma: false
Cpp11BracedListStyle: true

View File

@ -142,6 +142,7 @@ Checks: '-*,
clang-analyzer-cplusplus.PlacementNewChecker,
clang-analyzer-cplusplus.SelfAssignment,
clang-analyzer-deadcode.DeadStores,
clang-analyzer-cplusplus.Move,
clang-analyzer-optin.cplusplus.VirtualCall,
clang-analyzer-security.insecureAPI.UncheckedReturn,
clang-analyzer-security.insecureAPI.bcmp,

73
.github/workflows/nightly.yml vendored Normal file
View File

@ -0,0 +1,73 @@
name: NightlyBuilds
env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
"on":
schedule:
- cron: '13 3 * * *'
jobs:
DockerHubPushAarch64:
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Images check
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_images_check.py --suffix aarch64 --all
- name: Upload images files to artifacts
uses: actions/upload-artifact@v2
with:
name: changed_images_aarch64
path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json
DockerHubPushAmd64:
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Images check
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_images_check.py --suffix amd64 --all
- name: Upload images files to artifacts
uses: actions/upload-artifact@v2
with:
name: changed_images_amd64
path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json
DockerHubPush:
needs: [DockerHubPushAmd64, DockerHubPushAarch64]
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Download changed aarch64 images
uses: actions/download-artifact@v2
with:
name: changed_images_aarch64
path: ${{ runner.temp }}
- name: Download changed amd64 images
uses: actions/download-artifact@v2
with:
name: changed_images_amd64
path: ${{ runner.temp }}
- name: Images check
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64
- name: Upload images files to artifacts
uses: actions/upload-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/changed_images.json

3
.gitmodules vendored
View File

@ -259,3 +259,6 @@
[submodule "contrib/azure"]
path = contrib/azure
url = https://github.com/ClickHouse-Extras/azure-sdk-for-cpp.git
[submodule "contrib/minizip-ng"]
path = contrib/minizip-ng
url = https://github.com/zlib-ng/minizip-ng

View File

@ -1,27 +0,0 @@
# This is the configuration file with settings for Potato.
# Potato is an internal Yandex technology that allows us to sync internal [Yandex.Tracker](https://yandex.com/tracker/) and GitHub.
# For all PRs where documentation is needed, just add a 'pr-feature' label and we will include it into documentation sprints.
# The project name.
name: clickhouse
# Object handlers defines which handlers we use.
handlers:
# The handler for creating an Yandex.Tracker issue.
- name: issue-create
params:
triggers:
# The trigger for creating the Yandex.Tracker issue. When the specified event occurs, it transfers PR data to Yandex.Tracker.
github:pullRequest:labeled:
data:
# The Yandex.Tracker queue to create the issue in. Each issue in Tracker belongs to one of the project queues.
queue: CLICKHOUSEDOCS
# The issue title.
summary: '[Potato] Pull Request #{{pullRequest.number}}'
# The issue description.
description: >
{{pullRequest.description}}
Ссылка на Pull Request: {{pullRequest.webUrl}}
# The condition for creating the Yandex.Tracker issue.
condition: eventPayload.labels.filter(label => ['pr-feature'].includes(label.name)).length

View File

@ -182,7 +182,7 @@ if (COMPILER_CLANG)
if (HAS_USE_CTOR_HOMING)
# For more info see https://blog.llvm.org/posts/2021-04-05-constructor-homing-for-debug-info/
if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO")
if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xclang -fuse-ctor-homing")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Xclang -fuse-ctor-homing")
endif()

View File

@ -109,10 +109,10 @@ void LineReader::Suggest::addWords(Words && new_words)
std::lock_guard lock(mutex);
addNewWords(words, new_words, std::less<std::string>{});
addNewWords(words_no_case, new_words_no_case, NoCaseCompare{});
}
assert(std::is_sorted(words.begin(), words.end()));
assert(std::is_sorted(words_no_case.begin(), words_no_case.end(), NoCaseCompare{}));
assert(std::is_sorted(words.begin(), words.end()));
assert(std::is_sorted(words_no_case.begin(), words_no_case.end(), NoCaseCompare{}));
}
}
LineReader::LineReader(const String & history_file_path_, bool multiline_, Patterns extenders_, Patterns delimiters_)

View File

@ -2,6 +2,63 @@
#include <pdqsort.h>
#ifndef NDEBUG
#include <pcg_random.hpp>
#include <base/getThreadId.h>
/** Same as libcxx std::__debug_less. Just without dependency on private part of standard library.
* Check that Comparator induce strict weak ordering.
*/
template <typename Comparator>
class DebugLessComparator
{
public:
constexpr DebugLessComparator(Comparator & cmp_)
: cmp(cmp_)
{}
template <typename LhsType, typename RhsType>
constexpr bool operator()(const LhsType & lhs, const RhsType & rhs)
{
bool lhs_less_than_rhs = cmp(lhs, rhs);
if (lhs_less_than_rhs)
assert(!cmp(rhs, lhs));
return lhs_less_than_rhs;
}
template <typename LhsType, typename RhsType>
constexpr bool operator()(LhsType & lhs, RhsType & rhs)
{
bool lhs_less_than_rhs = cmp(lhs, rhs);
if (lhs_less_than_rhs)
assert(!cmp(rhs, lhs));
return lhs_less_than_rhs;
}
private:
Comparator & cmp;
};
template <typename Comparator>
using ComparatorWrapper = DebugLessComparator<Comparator>;
template <typename RandomIt>
void shuffle(RandomIt first, RandomIt last)
{
static thread_local pcg64 rng(getThreadId());
std::shuffle(first, last, rng);
}
#else
template <typename Comparator>
using ComparatorWrapper = Comparator;
#endif
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wold-style-cast"
@ -10,19 +67,48 @@
template <typename RandomIt>
void nth_element(RandomIt first, RandomIt nth, RandomIt last)
{
::miniselect::floyd_rivest_select(first, nth, last);
}
using value_type = typename std::iterator_traits<RandomIt>::value_type;
using comparator = std::less<value_type>;
template <typename RandomIt>
void partial_sort(RandomIt first, RandomIt middle, RandomIt last)
{
::miniselect::floyd_rivest_partial_sort(first, middle, last);
comparator compare;
ComparatorWrapper<comparator> compare_wrapper = compare;
#ifndef NDEBUG
::shuffle(first, last);
#endif
::miniselect::floyd_rivest_select(first, nth, last, compare_wrapper);
#ifndef NDEBUG
::shuffle(first, nth);
if (nth != last)
::shuffle(nth + 1, last);
#endif
}
template <typename RandomIt, typename Compare>
void partial_sort(RandomIt first, RandomIt middle, RandomIt last, Compare compare)
{
::miniselect::floyd_rivest_partial_sort(first, middle, last, compare);
#ifndef NDEBUG
::shuffle(first, last);
#endif
ComparatorWrapper<Compare> compare_wrapper = compare;
::miniselect::floyd_rivest_partial_sort(first, middle, last, compare_wrapper);
#ifndef NDEBUG
::shuffle(middle, last);
#endif
}
template <typename RandomIt>
void partial_sort(RandomIt first, RandomIt middle, RandomIt last)
{
using value_type = typename std::iterator_traits<RandomIt>::value_type;
using comparator = std::less<value_type>;
::partial_sort(first, middle, last, comparator());
}
#pragma GCC diagnostic pop
@ -30,7 +116,12 @@ void partial_sort(RandomIt first, RandomIt middle, RandomIt last, Compare compar
template <typename RandomIt, typename Compare>
void sort(RandomIt first, RandomIt last, Compare compare)
{
::pdqsort(first, last, compare);
#ifndef NDEBUG
::shuffle(first, last);
#endif
ComparatorWrapper<Compare> compare_wrapper = compare;
::pdqsort(first, last, compare_wrapper);
}
template <typename RandomIt>
@ -38,5 +129,5 @@ void sort(RandomIt first, RandomIt last)
{
using value_type = typename std::iterator_traits<RandomIt>::value_type;
using comparator = std::less<value_type>;
::pdqsort(first, last, comparator());
::sort(first, last, comparator());
}

View File

@ -79,18 +79,14 @@ static void call_default_signal_handler(int sig)
raise(sig);
}
static constexpr size_t max_query_id_size = 127;
static const size_t signal_pipe_buf_size =
sizeof(int)
+ sizeof(siginfo_t)
+ sizeof(ucontext_t)
+ sizeof(ucontext_t*)
+ sizeof(StackTrace)
+ sizeof(UInt32)
+ max_query_id_size + 1 /// query_id + varint encoded length
+ sizeof(void*);
using signal_function = void(int, siginfo_t*, void*);
static void writeSignalIDtoSignalPipe(int sig)
@ -129,18 +125,14 @@ static void signalHandler(int sig, siginfo_t * info, void * context)
char buf[signal_pipe_buf_size];
DB::WriteBufferFromFileDescriptorDiscardOnFailure out(signal_pipe.fds_rw[1], signal_pipe_buf_size, buf);
const ucontext_t signal_context = *reinterpret_cast<ucontext_t *>(context);
const StackTrace stack_trace(signal_context);
StringRef query_id = DB::CurrentThread::getQueryId(); /// This is signal safe.
query_id.size = std::min(query_id.size, max_query_id_size);
const ucontext_t * signal_context = reinterpret_cast<ucontext_t *>(context);
const StackTrace stack_trace(*signal_context);
DB::writeBinary(sig, out);
DB::writePODBinary(*info, out);
DB::writePODBinary(signal_context, out);
DB::writePODBinary(stack_trace, out);
DB::writeBinary(UInt32(getThreadId()), out);
DB::writeStringBinary(query_id, out);
DB::writePODBinary(DB::current_thread, out);
out.next();
@ -184,6 +176,8 @@ public:
void run() override
{
static_assert(PIPE_BUF >= 512);
static_assert(signal_pipe_buf_size <= PIPE_BUF, "Only write of PIPE_BUF to pipe is atomic and the minimal known PIPE_BUF across supported platforms is 512");
char buf[signal_pipe_buf_size];
DB::ReadBufferFromFileDescriptor in(signal_pipe.fds_rw[0], signal_pipe_buf_size, buf);
@ -227,10 +221,9 @@ public:
else
{
siginfo_t info{};
ucontext_t context{};
ucontext_t * context{};
StackTrace stack_trace(NoCapture{});
UInt32 thread_num{};
std::string query_id;
DB::ThreadStatus * thread_ptr{};
if (sig != SanitizerTrap)
@ -241,12 +234,11 @@ public:
DB::readPODBinary(stack_trace, in);
DB::readBinary(thread_num, in);
DB::readBinary(query_id, in);
DB::readPODBinary(thread_ptr, in);
/// This allows to receive more signals if failure happens inside onFault function.
/// Example: segfault while symbolizing stack trace.
std::thread([=, this] { onFault(sig, info, context, stack_trace, thread_num, query_id, thread_ptr); }).detach();
std::thread([=, this] { onFault(sig, info, context, stack_trace, thread_num, thread_ptr); }).detach();
}
}
}
@ -279,18 +271,27 @@ private:
void onFault(
int sig,
const siginfo_t & info,
const ucontext_t & context,
ucontext_t * context,
const StackTrace & stack_trace,
UInt32 thread_num,
const std::string & query_id,
DB::ThreadStatus * thread_ptr) const
{
DB::ThreadStatus thread_status;
String query_id;
String query;
/// Send logs from this thread to client if possible.
/// It will allow client to see failure messages directly.
if (thread_ptr)
{
query_id = thread_ptr->getQueryId().toString();
if (auto thread_group = thread_ptr->getThreadGroup())
{
query = thread_group->query;
}
if (auto logs_queue = thread_ptr->getInternalTextLogsQueue())
DB::CurrentThread::attachInternalTextLogsQueue(logs_queue, DB::LogsLevel::trace);
}
@ -305,15 +306,15 @@ private:
}
else
{
LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (query_id: {}) Received signal {} ({})",
LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (query_id: {}) (query: {}) Received signal {} ({})",
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info,
thread_num, query_id, strsignal(sig), sig);
thread_num, query_id, query, strsignal(sig), sig);
}
String error_message;
if (sig != SanitizerTrap)
error_message = signalToErrorMessage(sig, info, context);
error_message = signalToErrorMessage(sig, info, *context);
else
error_message = "Sanitizer trap.";
@ -389,20 +390,16 @@ static void sanitizerDeathCallback()
const StackTrace stack_trace;
StringRef query_id = DB::CurrentThread::getQueryId();
query_id.size = std::min(query_id.size, max_query_id_size);
int sig = SignalListener::SanitizerTrap;
DB::writeBinary(sig, out);
DB::writePODBinary(stack_trace, out);
DB::writeBinary(UInt32(getThreadId()), out);
DB::writeStringBinary(query_id, out);
DB::writePODBinary(DB::current_thread, out);
out.next();
/// The time that is usually enough for separate thread to print info into log.
sleepForSeconds(10);
sleepForSeconds(20);
}
#endif

View File

@ -78,6 +78,7 @@ add_contrib (croaring-cmake croaring)
add_contrib (zstd-cmake zstd)
add_contrib (zlib-ng-cmake zlib-ng)
add_contrib (bzip2-cmake bzip2)
add_contrib (minizip-ng-cmake minizip-ng)
add_contrib (snappy-cmake snappy)
add_contrib (rocksdb-cmake rocksdb)
add_contrib (thrift-cmake thrift)

View File

@ -56,19 +56,11 @@ list(APPEND SOURCES ${CASS_SRC_DIR}/atomic/atomic_std.hpp)
add_library(_curl_hostcheck OBJECT ${CASS_SRC_DIR}/third_party/curl/hostcheck.cpp)
add_library(_hdr_histogram OBJECT ${CASS_SRC_DIR}/third_party/hdr_histogram/hdr_histogram.cpp)
add_library(_http-parser OBJECT ${CASS_SRC_DIR}/third_party/http-parser/http_parser.c)
add_library(_minizip OBJECT
${CASS_SRC_DIR}/third_party/minizip/ioapi.c
${CASS_SRC_DIR}/third_party/minizip/zip.c
${CASS_SRC_DIR}/third_party/minizip/unzip.c)
target_link_libraries(_minizip ch_contrib::zlib)
target_compile_definitions(_minizip PRIVATE "-Dz_crc_t=unsigned long")
list(APPEND INCLUDE_DIRS
${CASS_SRC_DIR}/third_party/curl
${CASS_SRC_DIR}/third_party/hdr_histogram
${CASS_SRC_DIR}/third_party/http-parser
${CASS_SRC_DIR}/third_party/minizip
${CASS_SRC_DIR}/third_party/mt19937_64
${CASS_SRC_DIR}/third_party/rapidjson/rapidjson
${CASS_SRC_DIR}/third_party/sparsehash/src)
@ -123,10 +115,9 @@ add_library(_cassandra
${SOURCES}
$<TARGET_OBJECTS:_curl_hostcheck>
$<TARGET_OBJECTS:_hdr_histogram>
$<TARGET_OBJECTS:_http-parser>
$<TARGET_OBJECTS:_minizip>)
$<TARGET_OBJECTS:_http-parser>)
target_link_libraries(_cassandra ch_contrib::zlib)
target_link_libraries(_cassandra ch_contrib::zlib ch_contrib::minizip)
target_include_directories(_cassandra PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${INCLUDE_DIRS})
target_include_directories(_cassandra SYSTEM BEFORE PUBLIC ${CASS_INCLUDE_DIR})
target_compile_definitions(_cassandra PRIVATE CASS_BUILDING)

1
contrib/minizip-ng vendored Submodule

@ -0,0 +1 @@
Subproject commit 6cffc951851620e0fac1993be75e4713c334de03

View File

@ -0,0 +1,168 @@
option(ENABLE_MINIZIP "Enable minizip-ng the zip manipulation library" ${ENABLE_LIBRARIES})
if (NOT ENABLE_MINIZIP)
message (STATUS "minizip-ng disabled")
return()
endif()
set(_MINIZIP_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/minizip-ng")
# Initial source files
set(MINIZIP_SRC
${_MINIZIP_SOURCE_DIR}/mz_crypt.c
${_MINIZIP_SOURCE_DIR}/mz_os.c
${_MINIZIP_SOURCE_DIR}/mz_strm.c
${_MINIZIP_SOURCE_DIR}/mz_strm_buf.c
${_MINIZIP_SOURCE_DIR}/mz_strm_mem.c
${_MINIZIP_SOURCE_DIR}/mz_strm_split.c
${_MINIZIP_SOURCE_DIR}/mz_zip.c
${_MINIZIP_SOURCE_DIR}/mz_zip_rw.c)
# Initial header files
set(MINIZIP_HDR
${_MINIZIP_SOURCE_DIR}/mz.h
${_MINIZIP_SOURCE_DIR}/mz_os.h
${_MINIZIP_SOURCE_DIR}/mz_crypt.h
${_MINIZIP_SOURCE_DIR}/mz_strm.h
${_MINIZIP_SOURCE_DIR}/mz_strm_buf.h
${_MINIZIP_SOURCE_DIR}/mz_strm_mem.h
${_MINIZIP_SOURCE_DIR}/mz_strm_split.h
${_MINIZIP_SOURCE_DIR}/mz_strm_os.h
${_MINIZIP_SOURCE_DIR}/mz_zip.h
${_MINIZIP_SOURCE_DIR}/mz_zip_rw.h)
set(MINIZIP_INC ${_MINIZIP_SOURCE_DIR})
set(MINIZIP_DEF)
set(MINIZIP_PUBLIC_DEF)
set(MINIZIP_LIB)
# Check if zlib is present
set(MZ_ZLIB ON)
if(MZ_ZLIB)
# Use zlib from ClickHouse contrib
list(APPEND MINIZIP_LIB ch_contrib::zlib)
list(APPEND MINIZIP_SRC
${_MINIZIP_SOURCE_DIR}/mz_strm_zlib.c)
list(APPEND MINIZIP_HDR
${_MINIZIP_SOURCE_DIR}/mz_strm_zlib.h)
list(APPEND MINIZIP_DEF "-DHAVE_ZLIB")
endif()
# Check if bzip2 is present
set(MZ_BZIP2 ${ENABLE_BZIP2})
if(MZ_BZIP2)
# Use bzip2 from ClickHouse contrib
list(APPEND MINIZIP_LIB ch_contrib::bzip2)
list(APPEND MINIZIP_SRC
${_MINIZIP_SOURCE_DIR}/mz_strm_bzip.c)
list(APPEND MINIZIP_HDR
${_MINIZIP_SOURCE_DIR}/mz_strm_bzip.h)
list(APPEND MINIZIP_DEF "-DHAVE_BZIP2")
endif()
# Check if liblzma is present
set(MZ_LZMA ON)
if(MZ_LZMA)
# Use liblzma from ClickHouse contrib
list(APPEND MINIZIP_LIB ch_contrib::xz)
list(APPEND MINIZIP_SRC
${_MINIZIP_SOURCE_DIR}/mz_strm_lzma.c)
list(APPEND MINIZIP_HDR
${_MINIZIP_SOURCE_DIR}/mz_strm_lzma.h)
list(APPEND MINIZIP_DEF "-DHAVE_LZMA")
endif()
# Check if zstd is present
set(MZ_ZSTD ON)
if(MZ_ZSTD)
# Use zstd from ClickHouse contrib
list(APPEND MINIZIP_LIB ch_contrib::zstd)
list(APPEND MINIZIP_SRC
${_MINIZIP_SOURCE_DIR}/mz_strm_zstd.c)
list(APPEND MINIZIP_HDR
${_MINIZIP_SOURCE_DIR}/mz_strm_zstd.h)
list(APPEND MINIZIP_DEF "-DHAVE_ZSTD")
endif()
if(NOT MZ_ZLIB AND NOT MZ_ZSTD AND NOT MZ_BZIP2 AND NOT MZ_LZMA)
message(STATUS "Compression not supported due to missing libraries")
list(APPEND MINIZIP_DEF -DMZ_ZIP_NO_DECOMPRESSION)
list(APPEND MINIZIP_DEF -DMZ_ZIP_NO_COMPRESSION)
endif()
# Check to see if openssl installation is present
set(MZ_OPENSSL ${ENABLE_SSL})
if(MZ_OPENSSL)
# Use openssl from ClickHouse contrib
list(APPEND MINIZIP_LIB OpenSSL::SSL OpenSSL::Crypto)
list(APPEND MINIZIP_SRC
${_MINIZIP_SOURCE_DIR}/mz_crypt_openssl.c)
endif()
# Include WinZIP AES encryption
set(MZ_WZAES ${ENABLE_SSL})
if(MZ_WZAES)
list(APPEND MINIZIP_DEF -DHAVE_WZAES)
list(APPEND MINIZIP_SRC
${_MINIZIP_SOURCE_DIR}/mz_strm_wzaes.c)
list(APPEND MINIZIP_HDR
${_MINIZIP_SOURCE_DIR}/mz_strm_wzaes.h)
endif()
# Include traditional PKWare encryption
set(MZ_PKCRYPT ON)
if(MZ_PKCRYPT)
list(APPEND MINIZIP_DEF -DHAVE_PKCRYPT)
list(APPEND MINIZIP_SRC
${_MINIZIP_SOURCE_DIR}/mz_strm_pkcrypt.c)
list(APPEND MINIZIP_HDR
${_MINIZIP_SOURCE_DIR}/mz_strm_pkcrypt.h)
endif()
# Unix specific
if(UNIX)
list(APPEND MINIZIP_SRC
${_MINIZIP_SOURCE_DIR}/mz_os_posix.c
${_MINIZIP_SOURCE_DIR}/mz_strm_os_posix.c)
endif()
# Include compatibility layer
set(MZ_COMPAT ON)
if(MZ_COMPAT)
list(APPEND MINIZIP_SRC
${_MINIZIP_SOURCE_DIR}/mz_compat.c)
list(APPEND MINIZIP_HDR
${_MINIZIP_SOURCE_DIR}/mz_compat.h
zip.h
unzip.h)
list(APPEND MINIZIP_INC "${CMAKE_CURRENT_SOURCE_DIR}")
list(APPEND MINIZIP_PUBLIC_DEF "-DMZ_COMPAT_VERSION=110")
endif()
add_library(_minizip ${MINIZIP_SRC} ${MINIZIP_HDR})
target_include_directories(_minizip PUBLIC ${MINIZIP_INC})
target_compile_definitions(_minizip PUBLIC ${MINIZIP_PUBLIC_DEF})
target_compile_definitions(_minizip PRIVATE ${MINIZIP_DEF})
target_link_libraries(_minizip PRIVATE ${MINIZIP_LIB})
add_library(ch_contrib::minizip ALIAS _minizip)

View File

@ -0,0 +1,13 @@
/* unzip.h -- Compatibility layer shim
part of the minizip-ng project
This program is distributed under the terms of the same license as zlib.
See the accompanying LICENSE file for the full text of the license.
*/
#ifndef MZ_COMPAT_UNZIP
#define MZ_COMPAT_UNZIP
#include "mz_compat.h"
#endif

View File

@ -0,0 +1,13 @@
/* zip.h -- Compatibility layer shim
part of the minizip-ng project
This program is distributed under the terms of the same license as zlib.
See the accompanying LICENSE file for the full text of the license.
*/
#ifndef MZ_COMPAT_ZIP
#define MZ_COMPAT_ZIP
#include "mz_compat.h"
#endif

2
contrib/replxx vendored

@ -1 +1 @@
Subproject commit c745b3fb012ee5ae762fbc8cd7a40c4dc3fe15df
Subproject commit 9460e5e0fc10f78f460af26a6bd928798cac864d

View File

@ -127,11 +127,6 @@ endif()
if(CMAKE_SYSTEM_NAME MATCHES "Darwin")
add_definitions(-DOS_MACOSX)
if(CMAKE_SYSTEM_PROCESSOR MATCHES arm)
add_definitions(-DIOS_CROSS_COMPILE -DROCKSDB_LITE)
# no debug info for IOS, that will make our library big
add_definitions(-DNDEBUG)
endif()
elseif(CMAKE_SYSTEM_NAME MATCHES "Linux")
add_definitions(-DOS_LINUX)
elseif(CMAKE_SYSTEM_NAME MATCHES "SunOS")

View File

@ -16,6 +16,8 @@ Restart=always
RestartSec=30
RuntimeDirectory=clickhouse-server
ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=/run/clickhouse-server/clickhouse-server.pid
# Minus means that this file is optional.
EnvironmentFile=-/etc/default/clickhouse
LimitCORE=infinity
LimitNOFILE=500000
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE

View File

@ -1,15 +0,0 @@
version: "2"
services:
builder:
image: clickhouse/clickhouse-builder
build: docker/builder
client:
image: clickhouse/clickhouse-client
build: docker/client
command: ['--host', 'server']
server:
image: clickhouse/clickhouse-server
build: docker/server
ports:
- 8123:8123

View File

@ -32,6 +32,7 @@
"dependent": []
},
"docker/test/pvs": {
"only_amd64": true,
"name": "clickhouse/pvs-test",
"dependent": []
},
@ -72,6 +73,7 @@
"dependent": []
},
"docker/test/integration/runner": {
"only_amd64": true,
"name": "clickhouse/integration-tests-runner",
"dependent": []
},
@ -124,6 +126,7 @@
"dependent": []
},
"docker/test/integration/kerberos_kdc": {
"only_amd64": true,
"name": "clickhouse/kerberos-kdc",
"dependent": []
},
@ -137,6 +140,7 @@
]
},
"docker/test/integration/kerberized_hadoop": {
"only_amd64": true,
"name": "clickhouse/kerberized-hadoop",
"dependent": []
},

View File

@ -185,15 +185,14 @@ handle SIGUSR2 nostop noprint pass
handle SIG$RTMIN nostop noprint pass
info signals
continue
gcore
backtrace full
info locals
thread apply all backtrace full
info registers
disassemble /s
up
info locals
disassemble /s
up
info locals
disassemble /s
p \"done\"
detach
@ -314,6 +313,11 @@ quit
|| echo "Fuzzer failed ($fuzzer_exit_code). See the logs." ; } \
| tail -1 > description.txt
fi
if test -f core.*; then
pigz core.*
mv core.*.gz core.gz
fi
}
case "$stage" in
@ -345,6 +349,10 @@ case "$stage" in
time fuzz
;&
"report")
CORE_LINK=''
if [ -f core.gz ]; then
CORE_LINK='<a href="core.gz">core.gz</a>'
fi
cat > report.html <<EOF ||:
<!DOCTYPE html>
<html lang="en">
@ -386,6 +394,7 @@ th { cursor: pointer; }
<a href="fuzzer.log">fuzzer.log</a>
<a href="server.log">server.log</a>
<a href="main.log">main.log</a>
${CORE_LINK}
</p>
<table>
<tr><th>Test name</th><th>Test status</th><th>Description</th></tr>

View File

@ -15,9 +15,10 @@ RUN curl -o krb5-libs-1.10.3-65.el6.x86_64.rpm ftp://ftp.pbone.net/mirror/vault.
rm -fr *.rpm
RUN cd /tmp && \
curl http://archive.apache.org/dist/commons/daemon/source/commons-daemon-1.0.15-src.tar.gz -o commons-daemon-1.0.15-src.tar.gz && \
tar xzf commons-daemon-1.0.15-src.tar.gz && \
cd commons-daemon-1.0.15-src/src/native/unix && \
./configure && \
make && \
cp ./jsvc /usr/local/hadoop/sbin
curl http://archive.apache.org/dist/commons/daemon/source/commons-daemon-1.0.15-src.tar.gz -o commons-daemon-1.0.15-src.tar.gz && \
tar xzf commons-daemon-1.0.15-src.tar.gz && \
cd commons-daemon-1.0.15-src/src/native/unix && \
./configure && \
make && \
cp ./jsvc /usr/local/hadoop-2.7.0/sbin && \
[ -e /usr/local/hadoop ] || ln -s ./hadoop-2.7.0 /usr/local/hadoop

View File

@ -58,9 +58,7 @@ RUN apt-get update \
RUN dockerd --version; docker --version
ARG TARGETARCH
# FIXME: psycopg2-binary is not available for aarch64, we skip it for now
RUN test x$TARGETARCH = xarm64 || ( python3 -m pip install \
RUN python3 -m pip install \
PyMySQL \
aerospike==4.0.0 \
avro==1.10.2 \
@ -90,7 +88,7 @@ RUN test x$TARGETARCH = xarm64 || ( python3 -m pip install \
urllib3 \
requests-kerberos \
pyhdfs \
azure-storage-blob )
azure-storage-blob
COPY modprobe.sh /usr/local/bin/modprobe
COPY dockerd-entrypoint.sh /usr/local/bin/

View File

@ -4,7 +4,7 @@ services:
kerberizedhdfs1:
cap_add:
- DAC_READ_SEARCH
image: clickhouse/kerberized-hadoop
image: clickhouse/kerberized-hadoop:${DOCKER_KERBERIZED_HADOOP_TAG:-latest}
hostname: kerberizedhdfs1
restart: always
volumes:

View File

@ -45,6 +45,7 @@ export DOCKER_MYSQL_JS_CLIENT_TAG=${DOCKER_MYSQL_JS_CLIENT_TAG:=latest}
export DOCKER_MYSQL_PHP_CLIENT_TAG=${DOCKER_MYSQL_PHP_CLIENT_TAG:=latest}
export DOCKER_POSTGRESQL_JAVA_CLIENT_TAG=${DOCKER_POSTGRESQL_JAVA_CLIENT_TAG:=latest}
export DOCKER_KERBEROS_KDC_TAG=${DOCKER_KERBEROS_KDC_TAG:=latest}
export DOCKER_KERBERIZED_HADOOP_TAG=${DOCKER_KERBERIZED_HADOOP_TAG:=latest}
cd /ClickHouse/tests/integration
exec "$@"

View File

@ -1,5 +1,5 @@
# docker build -t clickhouse/performance-comparison .
FROM ubuntu:18.04
FROM ubuntu:20.04
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"

View File

@ -4,11 +4,7 @@
ARG FROM_TAG=latest
FROM clickhouse/binary-builder:$FROM_TAG
# PVS studio doesn't support aarch64/arm64, so there is a check for it everywhere
# We'll produce an empty image for arm64
ARG TARGETARCH
RUN test x$TARGETARCH = xarm64 || ( apt-get update --yes \
RUN apt-get update --yes \
&& apt-get install \
bash \
wget \
@ -21,7 +17,7 @@ RUN test x$TARGETARCH = xarm64 || ( apt-get update --yes \
libprotoc-dev \
libgrpc++-dev \
libc-ares-dev \
--yes --no-install-recommends )
--yes --no-install-recommends
#RUN wget -nv -O - http://files.viva64.com/etc/pubkey.txt | sudo apt-key add -
#RUN sudo wget -nv -O /etc/apt/sources.list.d/viva64.list http://files.viva64.com/etc/viva64.list
@ -33,7 +29,7 @@ RUN test x$TARGETARCH = xarm64 || ( apt-get update --yes \
ENV PKG_VERSION="pvs-studio-latest"
RUN test x$TARGETARCH = xarm64 || ( set -x \
RUN set -x \
&& export PUBKEY_HASHSUM="ad369a2e9d8b8c30f5a9f2eb131121739b79c78e03fef0f016ea51871a5f78cd4e6257b270dca0ac3be3d1f19d885516" \
&& wget -nv https://files.viva64.com/etc/pubkey.txt -O /tmp/pubkey.txt \
&& echo "${PUBKEY_HASHSUM} /tmp/pubkey.txt" | sha384sum -c \
@ -41,7 +37,7 @@ RUN test x$TARGETARCH = xarm64 || ( set -x \
&& wget -nv "https://files.viva64.com/${PKG_VERSION}.deb" \
&& { debsig-verify ${PKG_VERSION}.deb \
|| echo "WARNING: Some file was just downloaded from the internet without any validation and we are installing it into the system"; } \
&& dpkg -i "${PKG_VERSION}.deb" )
&& dpkg -i "${PKG_VERSION}.deb"
ENV CCACHE_DIR=/test_output/ccache

View File

@ -148,14 +148,12 @@ info signals
continue
gcore
backtrace full
info locals
thread apply all backtrace full
info registers
disassemble /s
up
info locals
disassemble /s
up
info locals
disassemble /s
p \"done\"
detach
@ -269,5 +267,5 @@ clickhouse-local --structure "test String, res String" -q "SELECT 'failure', tes
# Default filename is 'core.PROCESS_ID'
for core in core.*; do
pigz $core
mv $core.gz /output/
mv $core.gz /test_output/
done

View File

@ -43,24 +43,27 @@ RUN pip3 install urllib3 testflows==1.7.20 docker-compose==1.29.1 docker==5.0.0
ENV DOCKER_CHANNEL stable
ENV DOCKER_VERSION 20.10.6
RUN set -eux; \
\
# this "case" statement is generated via "update.sh"
\
if ! wget -nv -O docker.tgz "https://download.docker.com/linux/static/${DOCKER_CHANNEL}/x86_64/docker-${DOCKER_VERSION}.tgz"; then \
echo >&2 "error: failed to download 'docker-${DOCKER_VERSION}' from '${DOCKER_CHANNEL}' for '${x86_64}'"; \
exit 1; \
fi; \
\
tar --extract \
# Architecture of the image when BuildKit/buildx is used
ARG TARGETARCH
# Install docker
RUN arch=${TARGETARCH:-amd64} \
&& case $arch in \
amd64) rarch=x86_64 ;; \
arm64) rarch=aarch64 ;; \
esac \
&& set -eux \
&& if ! wget -nv -O docker.tgz "https://download.docker.com/linux/static/${DOCKER_CHANNEL}/${rarch}/docker-${DOCKER_VERSION}.tgz"; then \
echo >&2 "error: failed to download 'docker-${DOCKER_VERSION}' from '${DOCKER_CHANNEL}' for '${rarch}'" \
&& exit 1; \
fi \
&& tar --extract \
--file docker.tgz \
--strip-components 1 \
--directory /usr/local/bin/ \
; \
rm docker.tgz; \
\
dockerd --version; \
docker --version
&& rm docker.tgz \
&& dockerd --version \
&& docker --version
COPY modprobe.sh /usr/local/bin/modprobe
COPY dockerd-entrypoint.sh /usr/local/bin/

View File

@ -886,3 +886,12 @@ S3 disk can be configured as `main` or `cold` storage:
```
In case of `cold` option a data can be moved to S3 if local disk free size will be smaller than `move_factor * disk_size` or by TTL move rule.
## Virtual Columns {#virtual-columns}
- `_part` — Name of a part.
- `_part_index` — Sequential index of the part in the query result.
- `_partition_id` — Name of a partition.
- `_part_uuid` — Unique part identifier (if enabled MergeTree setting `assign_part_uuids`).
- `_partition_value` — Values (a tuple) of a `partition by` expression.
- `_sample_factor` — Sample factor (from the query).

View File

@ -209,6 +209,8 @@ When querying a `Distributed` table, `SELECT` queries are sent to all shards and
When the `max_parallel_replicas` option is enabled, query processing is parallelized across all replicas within a single shard. For more information, see the section [max_parallel_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas).
To learn more about how distibuted `in` and `global in` queries are processed, refer to [this](../../../sql-reference/operators/in.md#select-distributed-subqueries) documentation.
## Virtual Columns {#virtual-columns}
- `_shard_num` — Contains the `shard_num` value from the table `system.clusters`. Type: [UInt32](../../../sql-reference/data-types/int-uint.md).

View File

@ -7,18 +7,29 @@ toc_title: URL
Queries data to/from a remote HTTP/HTTPS server. This engine is similar to the [File](../../../engines/table-engines/special/file.md) engine.
Syntax: `URL(URL, Format)`
Syntax: `URL(URL [,Format] [,CompressionMethod])`
- The `URL` parameter must conform to the structure of a Uniform Resource Locator. The specified URL must point to a server that uses HTTP or HTTPS. This does not require any additional headers for getting a response from the server.
- The `Format` must be one that ClickHouse can use in `SELECT` queries and, if necessary, in `INSERTs`. For the full list of supported formats, see [Formats](../../../interfaces/formats.md#formats).
- `CompressionMethod` indicates that whether the HTTP body should be compressed. If the compression is enabled, the HTTP packets sent by the URL engine contain 'Content-Encoding' header to indicate which compression method is used.
To enable compression, please first make sure the remote HTTP endpoint indicated by the `URL` parameter supports corresponding compression algorithm.
The supported `CompressionMethod` should be one of following:
- gzip or gz
- deflate
- brotli or br
- lzma or xz
- zstd or zst
- lz4
- bz2
- snappy
- none
## Usage {#using-the-engine-in-the-clickhouse-server}
The `format` must be one that ClickHouse can use in
`SELECT` queries and, if necessary, in `INSERTs`. For the full list of supported formats, see
[Formats](../../../interfaces/formats.md#formats).
The `URL` must conform to the structure of a Uniform Resource Locator. The specified URL must point to a server
that uses HTTP or HTTPS. This does not require any
additional headers for getting a response from the server.
`INSERT` and `SELECT` queries are transformed to `POST` and `GET` requests,
respectively. For processing `POST` requests, the remote server must support
[Chunked transfer encoding](https://en.wikipedia.org/wiki/Chunked_transfer_encoding).

View File

@ -67,6 +67,7 @@ toc_title: Adopters
| <a href="https://geniee.co.jp" class="favicon">Geniee</a> | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) |
| <a href="https://www.genotek.ru/" class="favicon">Genotek</a> | Bioinformatics | Main product | — | — | [Video, August 2020](https://youtu.be/v3KyZbz9lEE) |
| <a href="https://gigapipe.com/" class="favicon">Gigapipe</a> | Managed ClickHouse | Main product | — | — | [Official website](https://gigapipe.com/) |
| <a href="https://gigasheet.co/" class="favicon">Gigasheet</a> | Analytics | Main product | — | — | Direct Reference, February 2022|
| <a href="https://glaber.io/" class="favicon">Glaber</a> | Monitoring | Main product | — | — | [Website](https://glaber.io/) |
| <a href="https://graphcdn.io/" class="favicon">GraphCDN</a> | CDN | Traffic Analytics | — | — | [Blog Post in English, August 2021](https://altinity.com/blog/delivering-insight-on-graphql-apis-with-clickhouse-at-graphcdn/) |
| <a href="https://www.grouparoo.com" class="favicon">Grouparoo</a> | Data Warehouse Integrations | Main product | — | — | [Official Website, November 2021](https://www.grouparoo.com/integrations) |

View File

@ -108,7 +108,13 @@ Examples of configuration for quorum with three nodes can be found in [integrati
ClickHouse Keeper is bundled into the ClickHouse server package, just add configuration of `<keeper_server>` and start ClickHouse server as always. If you want to run standalone ClickHouse Keeper you can start it in a similar way with:
```bash
clickhouse-keeper --config /etc/your_path_to_config/config.xml --daemon
clickhouse-keeper --config /etc/your_path_to_config/config.xml
```
If you don't have the symlink (`clickhouse-keeper`) you can create it or specify `keeper` as argument:
```bash
clickhouse keeper --config /etc/your_path_to_config/config.xml
```
## Four Letter Word Commands {#four-letter-word-commands}

View File

@ -27,7 +27,7 @@ To analyze the `trace_log` system table:
For security reasons, introspection functions are disabled by default.
- Use the `addressToLine`, `addressToSymbol` and `demangle` [introspection functions](../../sql-reference/functions/introspection.md) to get function names and their positions in ClickHouse code. To get a profile for some query, you need to aggregate data from the `trace_log` table. You can aggregate data by individual functions or by the whole stack traces.
- Use the `addressToLine`, `addressToLineWithInlines`, `addressToSymbol` and `demangle` [introspection functions](../../sql-reference/functions/introspection.md) to get function names and their positions in ClickHouse code. To get a profile for some query, you need to aggregate data from the `trace_log` table. You can aggregate data by individual functions or by the whole stack traces.
If you need to visualize `trace_log` info, try [flamegraph](../../interfaces/third-party/gui/#clickhouse-flamegraph) and [speedscope](https://github.com/laplab/clickhouse-speedscope).

View File

@ -1803,6 +1803,20 @@ If an INSERTed block is skipped due to deduplication in the source table, there
At the same time, this behaviour “breaks” `INSERT` idempotency. If an `INSERT` into the main table was successful and `INSERT` into a materialized view failed (e.g. because of communication failure with Zookeeper) a client will get an error and can retry the operation. However, the materialized view wont receive the second insert because it will be discarded by deduplication in the main (source) table. The setting `deduplicate_blocks_in_dependent_materialized_views` allows for changing this behaviour. On retry, a materialized view will receive the repeat insert and will perform a deduplication check by itself,
ignoring check result for the source table, and will insert rows lost because of the first failure.
## insert_deduplication_token {#insert_deduplication_token}
The setting allows a user to provide own deduplication semantic in MergeTree/ReplicatedMergeTree
For example, by providing a unique value for the setting in each INSERT statement,
user can avoid the same inserted data being deduplicated
Possilbe values:
- Any string
Default value: empty string (disabled)
`insert_deduplication_token` is used for deduplication _only_ when not empty
## max_network_bytes {#settings-max-network-bytes}
Limits the data volume (in bytes) that is received or transmitted over the network when executing a query. This setting applies to every individual query.
@ -2304,7 +2318,7 @@ Possible values:
- 1 — Enabled.
- 0 — Disabled.
Default value: `0`.
Default value: `1`.
## output_format_parallel_formatting {#output-format-parallel-formatting}
@ -2315,7 +2329,7 @@ Possible values:
- 1 — Enabled.
- 0 — Disabled.
Default value: `0`.
Default value: `1`.
## min_chunk_bytes_for_parallel_parsing {#min-chunk-bytes-for-parallel-parsing}

View File

@ -2,7 +2,7 @@
Contains stack traces of all server threads. Allows developers to introspect the server state.
To analyze stack frames, use the `addressToLine`, `addressToSymbol` and `demangle` [introspection functions](../../sql-reference/functions/introspection.md).
To analyze stack frames, use the `addressToLine`, `addressToLineWithInlines`, `addressToSymbol` and `demangle` [introspection functions](../../sql-reference/functions/introspection.md).
Columns:

View File

@ -4,7 +4,7 @@ Contains stack traces collected by the sampling query profiler.
ClickHouse creates this table when the [trace_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) server configuration section is set. Also the [query_profiler_real_time_period_ns](../../operations/settings/settings.md#query_profiler_real_time_period_ns) and [query_profiler_cpu_time_period_ns](../../operations/settings/settings.md#query_profiler_cpu_time_period_ns) settings should be set.
To analyze logs, use the `addressToLine`, `addressToSymbol` and `demangle` introspection functions.
To analyze logs, use the `addressToLine`, `addressToLineWithInlines`, `addressToSymbol` and `demangle` introspection functions.
Columns:

View File

@ -1,9 +1,9 @@
---
toc_priority: 40
toc_title: UInt8, UInt16, UInt32, UInt64, UInt256, Int8, Int16, Int32, Int64, Int128, Int256
toc_title: UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256
---
# UInt8, UInt16, UInt32, UInt64, UInt256, Int8, Int16, Int32, Int64, Int128, Int256 {#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64-int128-int256}
# UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256
Fixed-length integers, with or without a sign.

View File

@ -120,7 +120,7 @@ The `mail` and `phone` fields are of type String, but the `icq` field is `UInt32
Get the first available contact method for the customer from the contact list:
``` sql
SELECT coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook;
SELECT name, coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook;
```
``` text

View File

@ -113,6 +113,111 @@ trace_source_code_lines: /lib/x86_64-linux-gnu/libpthread-2.27.so
/build/glibc-OTsEL5/glibc-2.27/misc/../sysdeps/unix/sysv/linux/x86_64/clone.S:97
```
## addressToLineWithInlines {#addresstolinewithinlines}
Similar to `addressToLine`, but it will return an Array with all inline functions, and will be much slower as a price.
If you use official ClickHouse packages, you need to install the `clickhouse-common-static-dbg` package.
**Syntax**
``` sql
addressToLineWithInlines(address_of_binary_instruction)
```
**Arguments**
- `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process.
**Returned value**
- Array which first element is source code filename and the line number in this file delimited by colon. And from second element, inline functions' source code filename and line number and function name are listed.
- Array with single element which is name of a binary, if the function couldnt find the debug information.
- Empty array, if the address is not valid.
Type: [Array(String)](../../sql-reference/data-types/array.md).
**Example**
Enabling introspection functions:
``` sql
SET allow_introspection_functions=1;
```
Applying the function to address.
```sql
SELECT addressToLineWithInlines(531055181::UInt64);
```
``` text
┌─addressToLineWithInlines(CAST('531055181', 'UInt64'))────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ ['./src/Functions/addressToLineWithInlines.cpp:98','./build_normal_debug/./src/Functions/addressToLineWithInlines.cpp:176:DB::(anonymous namespace)::FunctionAddressToLineWithInlines::implCached(unsigned long) const'] │
└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
Applying the function to the whole stack trace:
``` sql
SELECT
ta, addressToLineWithInlines(arrayJoin(trace) as ta)
FROM system.trace_log
WHERE
query_id = '5e173544-2020-45de-b645-5deebe2aae54';
```
The [arrayJoin](../../sql-reference/functions/array-functions.md#array-functions-join) functions will split array to rows.
``` text
┌────────ta─┬─addressToLineWithInlines(arrayJoin(trace))───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ 365497529 │ ['./build_normal_debug/./contrib/libcxx/include/string_view:252'] │
│ 365593602 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:191'] │
│ 365593866 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │
│ 365592528 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │
│ 365591003 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:477'] │
│ 365590479 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:442'] │
│ 365590600 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:457'] │
│ 365598941 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │
│ 365607098 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │
│ 365590571 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:451'] │
│ 365598941 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │
│ 365607098 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │
│ 365590571 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:451'] │
│ 365598941 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │
│ 365607098 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │
│ 365590571 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:451'] │
│ 365598941 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │
│ 365597289 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:807'] │
│ 365599840 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:1118'] │
│ 531058145 │ ['./build_normal_debug/./src/Functions/addressToLineWithInlines.cpp:152'] │
│ 531055181 │ ['./src/Functions/addressToLineWithInlines.cpp:98','./build_normal_debug/./src/Functions/addressToLineWithInlines.cpp:176:DB::(anonymous namespace)::FunctionAddressToLineWithInlines::implCached(unsigned long) const'] │
│ 422333613 │ ['./build_normal_debug/./src/Functions/IFunctionAdaptors.h:21'] │
│ 586866022 │ ['./build_normal_debug/./src/Functions/IFunction.cpp:216'] │
│ 586869053 │ ['./build_normal_debug/./src/Functions/IFunction.cpp:264'] │
│ 586873237 │ ['./build_normal_debug/./src/Functions/IFunction.cpp:334'] │
│ 597901620 │ ['./build_normal_debug/./src/Interpreters/ExpressionActions.cpp:601'] │
│ 597898534 │ ['./build_normal_debug/./src/Interpreters/ExpressionActions.cpp:718'] │
│ 630442912 │ ['./build_normal_debug/./src/Processors/Transforms/ExpressionTransform.cpp:23'] │
│ 546354050 │ ['./build_normal_debug/./src/Processors/ISimpleTransform.h:38'] │
│ 626026993 │ ['./build_normal_debug/./src/Processors/ISimpleTransform.cpp:89'] │
│ 626294022 │ ['./build_normal_debug/./src/Processors/Executors/ExecutionThreadContext.cpp:45'] │
│ 626293730 │ ['./build_normal_debug/./src/Processors/Executors/ExecutionThreadContext.cpp:63'] │
│ 626169525 │ ['./build_normal_debug/./src/Processors/Executors/PipelineExecutor.cpp:213'] │
│ 626170308 │ ['./build_normal_debug/./src/Processors/Executors/PipelineExecutor.cpp:178'] │
│ 626166348 │ ['./build_normal_debug/./src/Processors/Executors/PipelineExecutor.cpp:329'] │
│ 626163461 │ ['./build_normal_debug/./src/Processors/Executors/PipelineExecutor.cpp:84'] │
│ 626323536 │ ['./build_normal_debug/./src/Processors/Executors/PullingAsyncPipelineExecutor.cpp:85'] │
│ 626323277 │ ['./build_normal_debug/./src/Processors/Executors/PullingAsyncPipelineExecutor.cpp:112'] │
│ 626323133 │ ['./build_normal_debug/./contrib/libcxx/include/type_traits:3682'] │
│ 626323041 │ ['./build_normal_debug/./contrib/libcxx/include/tuple:1415'] │
└───────────┴──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
## addressToSymbol {#addresstosymbol}
Converts virtual memory address inside ClickHouse server process to the symbol from ClickHouse object files.

View File

@ -22,7 +22,7 @@ tuple(x, y, …)
## tupleElement {#tupleelement}
A function that allows getting a column from a tuple.
N is the column index, starting from 1. N must be a constant. N must be a constant. N must be a strict postive integer no greater than the size of the tuple.
N is the column index, starting from 1. N must be a constant. N must be a strict postive integer no greater than the size of the tuple.
There is no cost to execute the function.
The function implements the operator `x.N`.

View File

@ -216,6 +216,17 @@ This is more optimal than using the normal IN. However, keep the following point
It also makes sense to specify a local table in the `GLOBAL IN` clause, in case this local table is only available on the requestor server and you want to use data from it on remote servers.
### Distributed Subqueries and max_rows_in_set
You can use [`max_rows_in_set`](../../operations/settings/query-complexity.md#max-rows-in-set) and [`max_bytes_in_set`](../../operations/settings/query-complexity.md#max-rows-in-set) to control how much data is tranferred during distributed queries.
This is specially important if the `global in` query returns a large amount of data. Consider the following sql -
```sql
select * from table1 where col1 global in (select col1 from table2 where <some_predicate>)
```
If `some_predicate` is not selective enough, it will return large amount of data and cause performance issues. In such cases, it is wise to limit the data transfer over the network. Also, note that [`set_overflow_mode`](../../operations/settings/query-complexity.md#set_overflow_mode) is set to `throw` (by default) meaning that an exception is raised when these thresholds are met.
### Distributed Subqueries and max_parallel_replicas {#max_parallel_replica-subqueries}
When max_parallel_replicas is greater than 1, distributed queries are further transformed. For example, the following:

View File

@ -197,12 +197,13 @@ ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
## MATERIALIZE COLUMN {#materialize-column}
Materializes the column in the parts where the column is missing. This is useful in case of creating a new column with complicated `DEFAULT` or `MATERIALIZED` expression. Calculation of the column directly on `SELECT` query can cause bigger request execution time, so it is reasonable to use `MATERIALIZE COLUMN` for such columns. To perform same manipulation for existing column, use `FINAL` modifier.
Materializes or updates a column with an expression for a default value (`DEFAULT` or `MATERIALIZED`).
It is used if it is necessary to add or update a column with a complicated expression, because evaluating such an expression directly on `SELECT` executing turns out to be expensive.
Syntax:
```sql
ALTER TABLE table MATERIALIZE COLUMN col [FINAL];
ALTER TABLE table MATERIALIZE COLUMN col;
```
**Example**
@ -211,20 +212,34 @@ ALTER TABLE table MATERIALIZE COLUMN col [FINAL];
DROP TABLE IF EXISTS tmp;
SET mutations_sync = 2;
CREATE TABLE tmp (x Int64) ENGINE = MergeTree() ORDER BY tuple() PARTITION BY tuple();
INSERT INTO tmp SELECT * FROM system.numbers LIMIT 10;
INSERT INTO tmp SELECT * FROM system.numbers LIMIT 5;
ALTER TABLE tmp ADD COLUMN s String MATERIALIZED toString(x);
ALTER TABLE tmp MATERIALIZE COLUMN s;
SELECT groupArray(x), groupArray(s) FROM (select x,s from tmp order by x);
┌─groupArray(x)─┬─groupArray(s)─────────┐
│ [0,1,2,3,4] │ ['0','1','2','3','4'] │
└───────────────┴───────────────────────┘
ALTER TABLE tmp MODIFY COLUMN s String MATERIALIZED toString(round(100/x));
INSERT INTO tmp SELECT * FROM system.numbers LIMIT 5,5;
SELECT groupArray(x), groupArray(s) FROM tmp;
```
**Result:**
┌─groupArray(x)─────────┬─groupArray(s)──────────────────────────────────┐
│ [0,1,2,3,4,5,6,7,8,9] │ ['0','1','2','3','4','20','17','14','12','11'] │
└───────────────────────┴────────────────────────────────────────────────┘
```sql
┌─groupArray(x)─────────┬─groupArray(s)─────────────────────────────┐
│ [0,1,2,3,4,5,6,7,8,9] │ ['0','1','2','3','4','5','6','7','8','9'] │
└───────────────────────┴───────────────────────────────────────────┘
ALTER TABLE tmp MATERIALIZE COLUMN s;
SELECT groupArray(x), groupArray(s) FROM tmp;
┌─groupArray(x)─────────┬─groupArray(s)─────────────────────────────────────────┐
│ [0,1,2,3,4,5,6,7,8,9] │ ['inf','100','50','33','25','20','17','14','12','11'] │
└───────────────────────┴───────────────────────────────────────────────────────┘
```
**See Also**

View File

@ -172,6 +172,7 @@ Hierarchy of privileges:
- `SYSTEM FLUSH LOGS`
- [INTROSPECTION](#grant-introspection)
- `addressToLine`
- `addressToLineWithInlines`
- `addressToSymbol`
- `demangle`
- [SOURCES](#grant-sources)
@ -430,6 +431,7 @@ Allows using [introspection](../../operations/optimizing-performance/sampling-qu
- `INTROSPECTION`. Level: `GROUP`. Aliases: `INTROSPECTION FUNCTIONS`
- `addressToLine`. Level: `GLOBAL`
- `addressToLineWithInlines`. Level: `GLOBAL`
- `addressToSymbol`. Level: `GLOBAL`
- `demangle`. Level: `GLOBAL`

View File

@ -285,7 +285,7 @@ ORDER BY expr [WITH FILL] [FROM const_expr] [TO const_expr] [STEP const_numeric_
`WITH FILL` can be applied for fields with Numeric (all kinds of float, decimal, int) or Date/DateTime types. When applied for `String` fields, missed values are filled with empty strings.
When `FROM const_expr` not defined sequence of filling use minimal `expr` field value from `ORDER BY`.
When `TO const_expr` not defined sequence of filling use maximum `expr` field value from `ORDER BY`.
When `STEP const_numeric_expr` defined then `const_numeric_expr` interprets `as is` for numeric types as `days` for Date type and as `seconds` for DateTime type.
When `STEP const_numeric_expr` defined then `const_numeric_expr` interprets `as is` for numeric types, as `days` for Date type, as `seconds` for DateTime type. It also supports [INTERVAL](https://clickhouse.com/docs/en/sql-reference/data-types/special-data-types/interval/) data type representing time and date intervals.
When `STEP const_numeric_expr` omitted then sequence of filling use `1.0` for numeric type, `1 day` for Date type and `1 second` for DateTime type.
Example of a query without `WITH FILL`:
@ -402,4 +402,85 @@ Result:
└────────────┴────────────┴──────────┘
```
The following query uses the `INTERVAL` data type of 1 day for each data filled on column `d1`:
``` sql
SELECT
toDate((number * 10) * 86400) AS d1,
toDate(number * 86400) AS d2,
'original' AS source
FROM numbers(10)
WHERE (number % 3) = 1
ORDER BY
d1 WITH FILL STEP INTERVAL 1 DAY,
d2 WITH FILL;
```
Result:
```
┌─────────d1─┬─────────d2─┬─source───┐
│ 1970-01-11 │ 1970-01-02 │ original │
│ 1970-01-12 │ 1970-01-01 │ │
│ 1970-01-13 │ 1970-01-01 │ │
│ 1970-01-14 │ 1970-01-01 │ │
│ 1970-01-15 │ 1970-01-01 │ │
│ 1970-01-16 │ 1970-01-01 │ │
│ 1970-01-17 │ 1970-01-01 │ │
│ 1970-01-18 │ 1970-01-01 │ │
│ 1970-01-19 │ 1970-01-01 │ │
│ 1970-01-20 │ 1970-01-01 │ │
│ 1970-01-21 │ 1970-01-01 │ │
│ 1970-01-22 │ 1970-01-01 │ │
│ 1970-01-23 │ 1970-01-01 │ │
│ 1970-01-24 │ 1970-01-01 │ │
│ 1970-01-25 │ 1970-01-01 │ │
│ 1970-01-26 │ 1970-01-01 │ │
│ 1970-01-27 │ 1970-01-01 │ │
│ 1970-01-28 │ 1970-01-01 │ │
│ 1970-01-29 │ 1970-01-01 │ │
│ 1970-01-30 │ 1970-01-01 │ │
│ 1970-01-31 │ 1970-01-01 │ │
│ 1970-02-01 │ 1970-01-01 │ │
│ 1970-02-02 │ 1970-01-01 │ │
│ 1970-02-03 │ 1970-01-01 │ │
│ 1970-02-04 │ 1970-01-01 │ │
│ 1970-02-05 │ 1970-01-01 │ │
│ 1970-02-06 │ 1970-01-01 │ │
│ 1970-02-07 │ 1970-01-01 │ │
│ 1970-02-08 │ 1970-01-01 │ │
│ 1970-02-09 │ 1970-01-01 │ │
│ 1970-02-10 │ 1970-01-05 │ original │
│ 1970-02-11 │ 1970-01-01 │ │
│ 1970-02-12 │ 1970-01-01 │ │
│ 1970-02-13 │ 1970-01-01 │ │
│ 1970-02-14 │ 1970-01-01 │ │
│ 1970-02-15 │ 1970-01-01 │ │
│ 1970-02-16 │ 1970-01-01 │ │
│ 1970-02-17 │ 1970-01-01 │ │
│ 1970-02-18 │ 1970-01-01 │ │
│ 1970-02-19 │ 1970-01-01 │ │
│ 1970-02-20 │ 1970-01-01 │ │
│ 1970-02-21 │ 1970-01-01 │ │
│ 1970-02-22 │ 1970-01-01 │ │
│ 1970-02-23 │ 1970-01-01 │ │
│ 1970-02-24 │ 1970-01-01 │ │
│ 1970-02-25 │ 1970-01-01 │ │
│ 1970-02-26 │ 1970-01-01 │ │
│ 1970-02-27 │ 1970-01-01 │ │
│ 1970-02-28 │ 1970-01-01 │ │
│ 1970-03-01 │ 1970-01-01 │ │
│ 1970-03-02 │ 1970-01-01 │ │
│ 1970-03-03 │ 1970-01-01 │ │
│ 1970-03-04 │ 1970-01-01 │ │
│ 1970-03-05 │ 1970-01-01 │ │
│ 1970-03-06 │ 1970-01-01 │ │
│ 1970-03-07 │ 1970-01-01 │ │
│ 1970-03-08 │ 1970-01-01 │ │
│ 1970-03-09 │ 1970-01-01 │ │
│ 1970-03-10 │ 1970-01-01 │ │
│ 1970-03-11 │ 1970-01-01 │ │
│ 1970-03-12 │ 1970-01-08 │ original │
└────────────┴────────────┴──────────┘
```
[Original article](https://clickhouse.com/docs/en/sql-reference/statements/select/order-by/) <!--hide-->

View File

@ -5,6 +5,6 @@ toc_title: Roadmap
# Roadmap {#roadmap}
The roadmap for the year 2021 is published for open discussion [here](https://github.com/ClickHouse/ClickHouse/issues/17623).
The roadmap for the year 2022 is published for open discussion [here](https://github.com/ClickHouse/ClickHouse/issues/32513).
{## [Original article](https://clickhouse.com/docs/en/roadmap/) ##}

View File

@ -872,3 +872,13 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
```
Если диск сконфигурирован как `cold`, данные будут переноситься в S3 при срабатывании правил TTL или когда свободное место на локальном диске станет меньше порогового значения, которое определяется как `move_factor * disk_size`.
## Виртуальные столбцы {#virtual-columns}
- `_part` — Имя куска.
- `_part_index` — Номер куска по порядку в результате запроса.
- `_partition_id` — Имя партиции.
- `_part_uuid` — Уникальный идентификатор куска (если включена MergeTree настройка `assign_part_uuids`).
- `_partition_value` — Значения (кортеж) выражения `partition by`.
- `_sample_factor` — Коэффициент сэмплирования (из запроса).

View File

@ -2119,7 +2119,7 @@ ClickHouse генерирует исключение:
- 1 — включен режим параллельного разбора.
- 0 — отключен режим параллельного разбора.
Значение по умолчанию: `0`.
Значение по умолчанию: `1`.
## output_format_parallel_formatting {#output-format-parallel-formatting}
@ -2130,7 +2130,7 @@ ClickHouse генерирует исключение:
- 1 — включен режим параллельного форматирования.
- 0 — отключен режим параллельного форматирования.
Значение по умолчанию: `0`.
Значение по умолчанию: `1`.
## min_chunk_bytes_for_parallel_parsing {#min-chunk-bytes-for-parallel-parsing}

View File

@ -197,12 +197,13 @@ ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
## MATERIALIZE COLUMN {#materialize-column}
Материализует столбец таблицы в кусках, в которых отсутствуют значения. Используется, если необходимо создать новый столбец со сложным материализованным выражением или выражением для заполнения по умолчанию (`DEFAULT`), потому как вычисление такого столбца прямо во время выполнения запроса `SELECT` оказывается ощутимо затратным. Чтобы совершить ту же операцию для существующего столбца, используйте модификатор `FINAL`.
Материализует или обновляет столбец таблицы с выражением для значения по умолчанию (`DEFAULT` или `MATERIALIZED`).
Используется, если необходимо добавить или обновить столбец со сложным выражением, потому как вычисление такого выражения прямо во время выполнения запроса `SELECT` оказывается ощутимо затратным.
Синтаксис:
```sql
ALTER TABLE table MATERIALIZE COLUMN col [FINAL];
ALTER TABLE table MATERIALIZE COLUMN col;
```
**Пример**
@ -211,21 +212,39 @@ ALTER TABLE table MATERIALIZE COLUMN col [FINAL];
DROP TABLE IF EXISTS tmp;
SET mutations_sync = 2;
CREATE TABLE tmp (x Int64) ENGINE = MergeTree() ORDER BY tuple() PARTITION BY tuple();
INSERT INTO tmp SELECT * FROM system.numbers LIMIT 10;
INSERT INTO tmp SELECT * FROM system.numbers LIMIT 5;
ALTER TABLE tmp ADD COLUMN s String MATERIALIZED toString(x);
ALTER TABLE tmp MATERIALIZE COLUMN s;
SELECT groupArray(x), groupArray(s) FROM (select x,s from tmp order by x);
┌─groupArray(x)─┬─groupArray(s)─────────┐
│ [0,1,2,3,4] │ ['0','1','2','3','4'] │
└───────────────┴───────────────────────┘
ALTER TABLE tmp MODIFY COLUMN s String MATERIALIZED toString(round(100/x));
INSERT INTO tmp SELECT * FROM system.numbers LIMIT 5,5;
SELECT groupArray(x), groupArray(s) FROM tmp;
┌─groupArray(x)─────────┬─groupArray(s)──────────────────────────────────┐
│ [0,1,2,3,4,5,6,7,8,9] │ ['0','1','2','3','4','20','17','14','12','11'] │
└───────────────────────┴────────────────────────────────────────────────┘
ALTER TABLE tmp MATERIALIZE COLUMN s;
SELECT groupArray(x), groupArray(s) FROM tmp;
┌─groupArray(x)─────────┬─groupArray(s)─────────────────────────────────────────┐
│ [0,1,2,3,4,5,6,7,8,9] │ ['inf','100','50','33','25','20','17','14','12','11'] │
└───────────────────────┴───────────────────────────────────────────────────────┘
```
**Результат:**
**Смотрите также**
```sql
┌─groupArray(x)─────────┬─groupArray(s)─────────────────────────────┐
│ [0,1,2,3,4,5,6,7,8,9] │ ['0','1','2','3','4','5','6','7','8','9'] │
└───────────────────────┴───────────────────────────────────────────┘
```
- [MATERIALIZED](../../statements/create/table.md#materialized).
## Ограничения запроса ALTER {#ogranicheniia-zaprosa-alter}

View File

@ -14,7 +14,7 @@ toc_title: Introduction
- [MySQL](../../engines/database-engines/mysql.md)
- [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md)
- [MaterializeMySQL](../../engines/database-engines/materialized-mysql.md)
- [Lazy](../../engines/database-engines/lazy.md)
@ -26,4 +26,6 @@ toc_title: Introduction
- [Replicated](../../engines/database-engines/replicated.md)
- [SQLite](../../engines/database-engines/sqlite.md)
[来源文章](https://clickhouse.com/docs/en/database_engines/) <!--hide-->

View File

@ -1 +0,0 @@
../../../en/engines/database-engines/materialized-mysql.md

View File

@ -0,0 +1,274 @@
---
toc_priority: 29
toc_title: MaterializedMySQL
---
# [experimental] MaterializedMySQL {#materialized-mysql}
!!! warning "警告"
这是一个实验性的特性,不应该在生产中使用.
创建ClickHouse数据库包含MySQL中所有的表以及这些表中的所有数据。
ClickHouse服务器作为MySQL副本工作。它读取binlog并执行DDL和DML查询。
## 创建数据库 {#creating-a-database}
``` sql
CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster]
ENGINE = MaterializedMySQL('host:port', ['database' | database], 'user', 'password') [SETTINGS ...]
[TABLE OVERRIDE table1 (...), TABLE OVERRIDE table2 (...)]
```
**引擎参数**
- `host:port` — MySQL 服务地址.
- `database` — MySQL 数据库名称.
- `user` — MySQL 用户名.
- `password` — MySQL 用户密码.
**引擎配置**
- `max_rows_in_buffer` — 允许在内存中缓存数据的最大行数(对于单个表和无法查询的缓存数据)。当超过这个数字时,数据将被物化。默认值:`65 505`。
- `max_bytes_in_buffer` - 允许在内存中缓存数据的最大字节数(对于单个表和无法查询的缓存数据)。当超过这个数字时,数据将被物化。默认值: `1 048 576 `
- `max_rows_in_buffers` - 允许在内存中缓存数据的最大行数(用于数据库和无法查询的缓存数据)。当超过这个数字时,数据将被物化。默认值: `65 505`
- `max_bytes_in_buffers` - 允许在内存中缓存数据的最大字节数(用于数据库和无法查询的缓存数据)。当超过这个数字时,数据将被物化。默认值: `1 048 576`
- `max_flush_data_time ` - 允许数据在内存中缓存的最大毫秒数(对于数据库和无法查询的缓存数据)。当超过这个时间,数据将被物化。默认值: `1000`
- `max_wait_time_when_mysql_unavailable` - MySQL不可用时的重试间隔(毫秒)。负值禁用重试。默认值:`1000`。
`allows_query_when_mysql_lost `—允许在MySQL丢失时查询物化表。默认值:`0`(`false`)。
```sql
CREATE DATABASE mysql ENGINE = MaterializedMySQL('localhost:3306', 'db', 'user', '***')
SETTINGS
allows_query_when_mysql_lost=true,
max_wait_time_when_mysql_unavailable=10000;
```
**MySQL服务器端配置**
为了`MaterializedMySQL`的正确工作,有一些必须设置的`MySQL`端配置设置:
- `default_authentication_plugin = mysql_native_password `,因为 `MaterializedMySQL` 只能授权使用该方法。
- `gtid_mode = on`因为基于GTID的日志记录是提供正确的 `MaterializedMySQL`复制的强制要求。
!!! attention "注意"
当打开`gtid_mode`时,您还应该指定`enforce_gtid_consistency = on`。
## 虚拟列 {#virtual-columns}
当使用`MaterializeMySQL`数据库引擎时,[ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md)表与虚拟的`_sign`和`_version`列一起使用。
- `_version` — 事务版本. 类型 [UInt64](../../sql-reference/data-types/int-uint.md).
- `_sign` — 删除标记. 类型 [Int8](../../sql-reference/data-types/int-uint.md). 可能的值:
- `1` — 行没有删除,
- `-1` — 行已被删除.
## 支持的数据类型 {#data_types-support}
| MySQL | ClickHouse |
|-------------------------|--------------------------------------------------------------|
| TINY | [Int8](../../sql-reference/data-types/int-uint.md) |
| SHORT | [Int16](../../sql-reference/data-types/int-uint.md) |
| INT24 | [Int32](../../sql-reference/data-types/int-uint.md) |
| LONG | [UInt32](../../sql-reference/data-types/int-uint.md) |
| LONGLONG | [UInt64](../../sql-reference/data-types/int-uint.md) |
| FLOAT | [Float32](../../sql-reference/data-types/float.md) |
| DOUBLE | [Float64](../../sql-reference/data-types/float.md) |
| DECIMAL, NEWDECIMAL | [Decimal](../../sql-reference/data-types/decimal.md) |
| DATE, NEWDATE | [Date](../../sql-reference/data-types/date.md) |
| DATETIME, TIMESTAMP | [DateTime](../../sql-reference/data-types/datetime.md) |
| DATETIME2, TIMESTAMP2 | [DateTime64](../../sql-reference/data-types/datetime64.md) |
| YEAR | [UInt16](../../sql-reference/data-types/int-uint.md) |
| TIME | [Int64](../../sql-reference/data-types/int-uint.md) |
| ENUM | [Enum](../../sql-reference/data-types/enum.md) |
| STRING | [String](../../sql-reference/data-types/string.md) |
| VARCHAR, VAR_STRING | [String](../../sql-reference/data-types/string.md) |
| BLOB | [String](../../sql-reference/data-types/string.md) |
| GEOMETRY | [String](../../sql-reference/data-types/string.md) |
| BINARY | [FixedString](../../sql-reference/data-types/fixedstring.md) |
| BIT | [UInt64](../../sql-reference/data-types/int-uint.md) |
| SET | [UInt64](../../sql-reference/data-types/int-uint.md) |
[Nullable](../../sql-reference/data-types/nullable.md) 已经被支持.
MySQL中的Time 类型会被ClickHouse转换成微秒来存储
不支持其他类型。如果MySQL表包含此类类型的列ClickHouse抛出异常"Unhandled data type"并停止复制。
## 规范和推荐用法 {#specifics-and-recommendations}
### 兼容性限制 {#compatibility-restrictions}
除了数据类型的限制之外,还有一些限制与`MySQL`数据库相比有所不同,这应该在复制之前解决:
- `MySQL` 中的每个表都应该包含 `PRIMARY KEY`
- 对于表的复制,那些包含 `ENUM` 字段值超出范围的行(在 `ENUM` 签名中指定)将不起作用。
### DDL Queries {#ddl-queries}
MySQL DDL 语句会被转换成对应的ClickHouse DDL 语句,比如: ([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create/index.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md)). 如果ClickHouse 无法解析某些语句DDL 操作,则会跳过。
### 数据复制 {#data-replication}
MaterializedMySQL不支持直接的 `INSERT` `DELETE``UPDATE` 查询。然而,它们在数据复制方面得到了支持:
- MySQL `INSERT`查询被转换为`_sign=1`的INSERT查询。
- MySQL `DELETE`查询被转换为`INSERT`,并且`_sign=-1`。
- 如果主键被修改了MySQL的 `UPDATE` 查询将被转换为 `INSERT``_sign=1` 和INSERT 带有_sign=-1;如果主键没有被修改,则转换为`INSERT`和`_sign=1`。
### MaterializedMySQL 数据表查询 {#select}
`SELECT` 查询从 `MaterializedMySQL`表有一些细节:
- 如果在SELECT查询中没有指定`_version`,则 [FINAL](../../sql-reference/statements/select/from.md#select-from- FINAL)修饰符被使用,所以只有带有 `MAX(_version)`的行会返回每个主键值。
- 如果在SELECT查询中没有指定 `_sign`,则默认使用 `WHERE _sign=1 `。所以被删除的行不是
包含在结果集中。
- 结果包括列注释以防MySQL数据库表中存在这些列注释。
### 索引转换 {#index-conversion}
在ClickHouse表中MySQL的 `PRIMARY KEY``INDEX` 子句被转换为 `ORDER BY` 元组。
ClickHouse只有一个物理排序`order by` 条件决定。要创建一个新的物理排序,请使用[materialized views](../../sql-reference/statements/create/view.md#materialized)。
**注意**
- `_sign=-1` 的行不会被物理地从表中删除。
- 级联 `UPDATE/DELETE` 查询不支持 `MaterializedMySQL` 引擎,因为他们在 MySQL binlog中不可见的
— 复制很容易被破坏。
— 禁止对数据库和表进行手工操作。
- `MaterializedMySQL` 受[optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert)设置的影响。当MySQL服务器中的一个表发生变化时数据会合并到 `MaterializedMySQL` 数据库中相应的表中。
### 表重写 {#table-overrides}
表覆盖可用于自定义ClickHouse DDL查询从而允许您对应用程序进行模式优化。这对于控制分区特别有用分区对MaterializedMySQL的整体性能非常重要。
这些是你可以对MaterializedMySQL表重写的模式转换操作:
* 修改列类型。必须与原始类型兼容,否则复制将失败。例如,可以将`UInt32`列修改为`UInt64`,不能将 `String` 列修改为 `Array(String)`
* 修改 [column TTL](../table-engines/mergetree-family/mergetree/#mergetree-column-ttl).
* 修改 [column compression codec](../../sql-reference/statements/create/table/#codecs).
* 增加 [ALIAS columns](../../sql-reference/statements/create/table/#alias).
* 增加 [skipping indexes](../table-engines/mergetree-family/mergetree/#table_engine-mergetree-data_skipping-indexes)
* 增加 [projections](../table-engines/mergetree-family/mergetree/#projections).
请注意,当使用 `SELECT ... FINAL ` (MaterializedMySQL默认是这样做的) 时,预测优化是被禁用的,所以这里是受限的, `INDEX ... TYPE hypothesis `[在v21.12的博客文章中描述]](https://clickhouse.com/blog/en/2021/clickhouse-v21.12-released/)可能在这种情况下更有用。
* 修改 [PARTITION BY](../table-engines/mergetree-family/custom-partitioning-key/)
* 修改 [ORDER BY](../table-engines/mergetree-family/mergetree/#mergetree-query-clauses)
* 修改 [PRIMARY KEY](../table-engines/mergetree-family/mergetree/#mergetree-query-clauses)
* 增加 [SAMPLE BY](../table-engines/mergetree-family/mergetree/#mergetree-query-clauses)
* 增加 [table TTL](../table-engines/mergetree-family/mergetree/#mergetree-query-clauses)
```sql
CREATE DATABASE db_name ENGINE = MaterializedMySQL(...)
[SETTINGS ...]
[TABLE OVERRIDE table_name (
[COLUMNS (
[col_name [datatype] [ALIAS expr] [CODEC(...)] [TTL expr], ...]
[INDEX index_name expr TYPE indextype[(...)] GRANULARITY val, ...]
[PROJECTION projection_name (SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY]), ...]
)]
[ORDER BY expr]
[PRIMARY KEY expr]
[PARTITION BY expr]
[SAMPLE BY expr]
[TTL expr]
), ...]
```
示例:
```sql
CREATE DATABASE db_name ENGINE = MaterializedMySQL(...)
TABLE OVERRIDE table1 (
COLUMNS (
userid UUID,
category LowCardinality(String),
timestamp DateTime CODEC(Delta, Default)
)
PARTITION BY toYear(timestamp)
),
TABLE OVERRIDE table2 (
COLUMNS (
client_ip String TTL created + INTERVAL 72 HOUR
)
SAMPLE BY ip_hash
)
```
`COLUMNS`列表是稀疏的;根据指定修改现有列添加额外的ALIAS列。不可能添加普通列或实体化列。具有不同类型的已修改列必须可从原始类型赋值。在执行`CREATE DATABASE` 查询时,目前还没有验证这个或类似的问题,因此需要格外小心。
您可以为还不存在的表指定重写。
!!! warning "警告"
如果使用时不小心,很容易用表重写中断复制。例如:
* 如果一个ALIAS列被添加了一个表覆盖并且一个具有相同名称的列后来被添加到源MySQL表在ClickHouse中转换后的ALTER table查询将失败并停止复制。
* 目前可以添加引用可空列的覆盖,而非空列是必需的,例如 `ORDER BY``PARTITION BY`。这将导致CREATE TABLE查询失败也会导致复制停止。
## 使用示例 {#examples-of-use}
MySQL 查询语句:
``` sql
mysql> CREATE DATABASE db;
mysql> CREATE TABLE db.test (a INT PRIMARY KEY, b INT);
mysql> INSERT INTO db.test VALUES (1, 11), (2, 22);
mysql> DELETE FROM db.test WHERE a=1;
mysql> ALTER TABLE db.test ADD COLUMN c VARCHAR(16);
mysql> UPDATE db.test SET c='Wow!', b=222;
mysql> SELECT * FROM test;
```
```text
┌─a─┬───b─┬─c────┐
│ 2 │ 222 │ Wow! │
└───┴─────┴──────┘
```
ClickHouse中的数据库与MySQL服务器交换数据:
创建的数据库和表:
``` sql
CREATE DATABASE mysql ENGINE = MaterializedMySQL('localhost:3306', 'db', 'user', '***');
SHOW TABLES FROM mysql;
```
``` text
┌─name─┐
│ test │
└──────┘
```
数据插入之后:
``` sql
SELECT * FROM mysql.test;
```
``` text
┌─a─┬──b─┐
│ 1 │ 11 │
│ 2 │ 22 │
└───┴────┘
```
删除数据后,添加列并更新:
``` sql
SELECT * FROM mysql.test;
```
``` text
┌─a─┬───b─┬─c────┐
│ 2 │ 222 │ Wow! │
└───┴─────┴──────┘
```
[来源文章](https://clickhouse.com/docs/en/engines/database-engines/materialized-mysql/) <!--hide-->

View File

@ -24,6 +24,7 @@ ENGINE = PostgreSQL('host:port', 'database', 'user', 'password'[, `use_table_cac
- `database` — 远程数据库名次
- `user` — PostgreSQL用户名称
- `password` — PostgreSQL用户密码
- `schema` - PostgreSQL 模式
- `use_table_cache` — 定义数据库表结构是否已缓存或不进行。可选的。默认值: `0`.
## 支持的数据类型 {#data_types-support}

View File

@ -31,6 +31,7 @@ CREATE DATABASE testdb ENGINE = Replicated('zoo_path', 'shard_name', 'replica_na
当创建数据库的新副本时,该副本会自己创建表。如果副本已经不可用很长一段时间,并且已经滞后于复制日志-它用ZooKeeper中的当前元数据检查它的本地元数据将带有数据的额外表移动到一个单独的非复制数据库(以免意外地删除任何多余的东西),创建缺失的表,如果表名已经被重命名,则更新表名。数据在`ReplicatedMergeTree`级别被复制,也就是说,如果表没有被复制,数据将不会被复制(数据库只负责元数据)。
允许[`ALTER TABLE ATTACH|FETCH|DROP|DROP DETACHED|DETACH PARTITION|PART`](../../sql-reference/statements/alter/partition.md)查询,但不允许复制。数据库引擎将只向当前副本添加/获取/删除分区/部件。但是如果表本身使用了Replicated表引擎那么数据将在使用`ATTACH`后被复制。
## 使用示例 {#usage-example}
创建三台主机的集群:

View File

@ -1 +0,0 @@
../../../en/engines/database-engines/sqlite.md

View File

@ -0,0 +1,80 @@
---
toc_priority: 32
toc_title: SQLite
---
# SQLite {#sqlite}
允许连接到[SQLite](https://www.sqlite.org/index.html)数据库并支持ClickHouse和SQLite交换数据 执行 `INSERT``SELECT` 查询。
## 创建一个数据库 {#creating-a-database}
``` sql
CREATE DATABASE sqlite_database
ENGINE = SQLite('db_path')
```
**引擎参数**
- `db_path` — SQLite 数据库文件的路径.
## 数据类型的支持 {#data_types-support}
| SQLite | ClickHouse |
|---------------|---------------------------------------------------------|
| INTEGER | [Int32](../../sql-reference/data-types/int-uint.md) |
| REAL | [Float32](../../sql-reference/data-types/float.md) |
| TEXT | [String](../../sql-reference/data-types/string.md) |
| BLOB | [String](../../sql-reference/data-types/string.md) |
## 技术细节和建议 {#specifics-and-recommendations}
SQLite将整个数据库(定义、表、索引和数据本身)存储为主机上的单个跨平台文件。在写入过程中SQLite会锁定整个数据库文件因此写入操作是顺序执行的。读操作可以是多任务的。
SQLite不需要服务管理(如启动脚本)或基于`GRANT`和密码的访问控制。访问控制是通过授予数据库文件本身的文件系统权限来处理的。
## 使用示例 {#usage-example}
数据库在ClickHouse连接到SQLite:
``` sql
CREATE DATABASE sqlite_db ENGINE = SQLite('sqlite.db');
SHOW TABLES FROM sqlite_db;
```
``` text
┌──name───┐
│ table1 │
│ table2 │
└─────────┘
```
展示数据表中的内容:
``` sql
SELECT * FROM sqlite_db.table1;
```
``` text
┌─col1──┬─col2─┐
│ line1 │ 1 │
│ line2 │ 2 │
│ line3 │ 3 │
└───────┴──────┘
```
从ClickHouse表插入数据到SQLite表:
``` sql
CREATE TABLE clickhouse_table(`col1` String,`col2` Int16) ENGINE = MergeTree() ORDER BY col2;
INSERT INTO clickhouse_table VALUES ('text',10);
INSERT INTO sqlite_db.table1 SELECT * FROM clickhouse_table;
SELECT * FROM sqlite_db.table1;
```
``` text
┌─col1──┬─col2─┐
│ line1 │ 1 │
│ line2 │ 2 │
│ line3 │ 3 │
│ text │ 10 │
└───────┴──────┘
```

View File

@ -0,0 +1,416 @@
---
toc_priority: 4
toc_title: Hive
---
# Hive {#hive}
Hive引擎允许对HDFS Hive表执行 `SELECT` 查询。目前它支持如下输入格式:
-文本:只支持简单的标量列类型,除了 `Binary`
- ORC:支持简单的标量列类型,除了`char`; 只支持 `array` 这样的复杂类型
- Parquet:支持所有简单标量列类型;只支持 `array` 这样的复杂类型
## 创建表 {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [ALIAS expr1],
name2 [type2] [ALIAS expr2],
...
) ENGINE = Hive('thrift://host:port', 'database', 'table');
PARTITION BY expr
```
查看[CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query)查询的详细描述。
表的结构可以与原来的Hive表结构有所不同:
- 列名应该与原来的Hive表相同但你可以使用这些列中的一些并以任何顺序你也可以使用一些从其他列计算的别名列。
- 列类型与原Hive表的列类型保持一致。
- “Partition by expression”应与原Hive表保持一致“Partition by expression”中的列应在表结构中。
**引擎参数**
- `thrift://host:port` — Hive Metastore 地址
- `database` — 远程数据库名.
- `table` — 远程数据表名.
## 使用示例 {#usage-example}
### 如何使用HDFS文件系统的本地缓存
我们强烈建议您为远程文件系统启用本地缓存。基准测试显示,如果使用缓存,它的速度会快两倍。
在使用缓存之前,请将其添加到 `config.xml`
``` xml
<local_cache_for_remote_fs>
<enable>true</enable>
<root_dir>local_cache</root_dir>
<limit_size>559096952</limit_size>
<bytes_read_before_flush>1048576</bytes_read_before_flush>
</local_cache_for_remote_fs>
```
- enable: 开启后ClickHouse将为HDFS (远程文件系统)维护本地缓存。
- root_dir: 必需的。用于存储远程文件系统的本地缓存文件的根目录。
- limit_size: 必需的。本地缓存文件的最大大小(单位为字节)。
- bytes_read_before_flush: 从远程文件系统下载文件时刷新到本地文件系统前的控制字节数。缺省值为1MB。
当ClickHouse为远程文件系统启用了本地缓存时用户仍然可以选择不使用缓存并在查询中设置`use_local_cache_for_remote_fs = 0 `, `use_local_cache_for_remote_fs` 默认为 `false`
### 查询 ORC 输入格式的Hive 表
#### 在 Hive 中建表
``` text
hive > CREATE TABLE `test`.`test_orc`(
`f_tinyint` tinyint,
`f_smallint` smallint,
`f_int` int,
`f_integer` int,
`f_bigint` bigint,
`f_float` float,
`f_double` double,
`f_decimal` decimal(10,0),
`f_timestamp` timestamp,
`f_date` date,
`f_string` string,
`f_varchar` varchar(100),
`f_bool` boolean,
`f_binary` binary,
`f_array_int` array<int>,
`f_array_string` array<string>,
`f_array_float` array<float>,
`f_array_array_int` array<array<int>>,
`f_array_array_string` array<array<string>>,
`f_array_array_float` array<array<float>>)
PARTITIONED BY (
`day` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
'hdfs://testcluster/data/hive/test.db/test_orc'
OK
Time taken: 0.51 seconds
hive > insert into test.test_orc partition(day='2021-09-18') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, current_timestamp(), current_date(), 'hello world', 'hello world', 'hello world', true, 'hello world', array(1, 2, 3), array('hello world', 'hello world'), array(float(1.1), float(1.2)), array(array(1, 2), array(3, 4)), array(array('a', 'b'), array('c', 'd')), array(array(float(1.11), float(2.22)), array(float(3.33), float(4.44)));
OK
Time taken: 36.025 seconds
hive > select * from test.test_orc;
OK
1 2 3 4 5 6.11 7.22 8 2021-11-05 12:38:16.314 2021-11-05 hello world hello world hello world true hello world [1,2,3] ["hello world","hello world"] [1.1,1.2] [[1,2],[3,4]] [["a","b"],["c","d"]] [[1.11,2.22],[3.33,4.44]] 2021-09-18
Time taken: 0.295 seconds, Fetched: 1 row(s)
```
#### 在 ClickHouse 中建表
ClickHouse中的表从上面创建的Hive表中获取数据:
``` sql
CREATE TABLE test.test_orc
(
`f_tinyint` Int8,
`f_smallint` Int16,
`f_int` Int32,
`f_integer` Int32,
`f_bigint` Int64,
`f_float` Float32,
`f_double` Float64,
`f_decimal` Float64,
`f_timestamp` DateTime,
`f_date` Date,
`f_string` String,
`f_varchar` String,
`f_bool` Bool,
`f_binary` String,
`f_array_int` Array(Int32),
`f_array_string` Array(String),
`f_array_float` Array(Float32),
`f_array_array_int` Array(Array(Int32)),
`f_array_array_string` Array(Array(String)),
`f_array_array_float` Array(Array(Float32)),
`day` String
)
ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc')
PARTITION BY day
```
``` sql
SELECT * FROM test.test_orc settings input_format_orc_allow_missing_columns = 1\G
```
``` text
SELECT *
FROM test.test_orc
SETTINGS input_format_orc_allow_missing_columns = 1
Query id: c3eaffdc-78ab-43cd-96a4-4acc5b480658
Row 1:
──────
f_tinyint: 1
f_smallint: 2
f_int: 3
f_integer: 4
f_bigint: 5
f_float: 6.11
f_double: 7.22
f_decimal: 8
f_timestamp: 2021-12-04 04:00:44
f_date: 2021-12-03
f_string: hello world
f_varchar: hello world
f_bool: true
f_binary: hello world
f_array_int: [1,2,3]
f_array_string: ['hello world','hello world']
f_array_float: [1.1,1.2]
f_array_array_int: [[1,2],[3,4]]
f_array_array_string: [['a','b'],['c','d']]
f_array_array_float: [[1.11,2.22],[3.33,4.44]]
day: 2021-09-18
1 rows in set. Elapsed: 0.078 sec.
```
### 查询 Parquest 输入格式的Hive 表
#### 在 Hive 中建表
``` text
hive >
CREATE TABLE `test`.`test_parquet`(
`f_tinyint` tinyint,
`f_smallint` smallint,
`f_int` int,
`f_integer` int,
`f_bigint` bigint,
`f_float` float,
`f_double` double,
`f_decimal` decimal(10,0),
`f_timestamp` timestamp,
`f_date` date,
`f_string` string,
`f_varchar` varchar(100),
`f_char` char(100),
`f_bool` boolean,
`f_binary` binary,
`f_array_int` array<int>,
`f_array_string` array<string>,
`f_array_float` array<float>,
`f_array_array_int` array<array<int>>,
`f_array_array_string` array<array<string>>,
`f_array_array_float` array<array<float>>)
PARTITIONED BY (
`day` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'hdfs://testcluster/data/hive/test.db/test_parquet'
OK
Time taken: 0.51 seconds
hive > insert into test.test_parquet partition(day='2021-09-18') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, current_timestamp(), current_date(), 'hello world', 'hello world', 'hello world', true, 'hello world', array(1, 2, 3), array('hello world', 'hello world'), array(float(1.1), float(1.2)), array(array(1, 2), array(3, 4)), array(array('a', 'b'), array('c', 'd')), array(array(float(1.11), float(2.22)), array(float(3.33), float(4.44)));
OK
Time taken: 36.025 seconds
hive > select * from test.test_parquet;
OK
1 2 3 4 5 6.11 7.22 8 2021-12-14 17:54:56.743 2021-12-14 hello world hello world hello world true hello world [1,2,3] ["hello world","hello world"] [1.1,1.2] [[1,2],[3,4]] [["a","b"],["c","d"]] [[1.11,2.22],[3.33,4.44]] 2021-09-18
Time taken: 0.766 seconds, Fetched: 1 row(s)
```
#### 在 ClickHouse 中建表
ClickHouse 中的表, 从上面创建的Hive表中获取数据:
``` sql
CREATE TABLE test.test_parquet
(
`f_tinyint` Int8,
`f_smallint` Int16,
`f_int` Int32,
`f_integer` Int32,
`f_bigint` Int64,
`f_float` Float32,
`f_double` Float64,
`f_decimal` Float64,
`f_timestamp` DateTime,
`f_date` Date,
`f_string` String,
`f_varchar` String,
`f_char` String,
`f_bool` Bool,
`f_binary` String,
`f_array_int` Array(Int32),
`f_array_string` Array(String),
`f_array_float` Array(Float32),
`f_array_array_int` Array(Array(Int32)),
`f_array_array_string` Array(Array(String)),
`f_array_array_float` Array(Array(Float32)),
`day` String
)
ENGINE = Hive('thrift://localhost:9083', 'test', 'test_parquet')
PARTITION BY day
```
``` sql
SELECT * FROM test.test_parquet settings input_format_parquet_allow_missing_columns = 1\G
```
``` text
SELECT *
FROM test_parquet
SETTINGS input_format_parquet_allow_missing_columns = 1
Query id: 4e35cf02-c7b2-430d-9b81-16f438e5fca9
Row 1:
──────
f_tinyint: 1
f_smallint: 2
f_int: 3
f_integer: 4
f_bigint: 5
f_float: 6.11
f_double: 7.22
f_decimal: 8
f_timestamp: 2021-12-14 17:54:56
f_date: 2021-12-14
f_string: hello world
f_varchar: hello world
f_char: hello world
f_bool: true
f_binary: hello world
f_array_int: [1,2,3]
f_array_string: ['hello world','hello world']
f_array_float: [1.1,1.2]
f_array_array_int: [[1,2],[3,4]]
f_array_array_string: [['a','b'],['c','d']]
f_array_array_float: [[1.11,2.22],[3.33,4.44]]
day: 2021-09-18
1 rows in set. Elapsed: 0.357 sec.
```
### 查询文本输入格式的Hive表
#### 在Hive 中建表
``` text
hive >
CREATE TABLE `test`.`test_text`(
`f_tinyint` tinyint,
`f_smallint` smallint,
`f_int` int,
`f_integer` int,
`f_bigint` bigint,
`f_float` float,
`f_double` double,
`f_decimal` decimal(10,0),
`f_timestamp` timestamp,
`f_date` date,
`f_string` string,
`f_varchar` varchar(100),
`f_char` char(100),
`f_bool` boolean,
`f_binary` binary,
`f_array_int` array<int>,
`f_array_string` array<string>,
`f_array_float` array<float>,
`f_array_array_int` array<array<int>>,
`f_array_array_string` array<array<string>>,
`f_array_array_float` array<array<float>>)
PARTITIONED BY (
`day` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
'hdfs://testcluster/data/hive/test.db/test_text'
Time taken: 0.1 seconds, Fetched: 34 row(s)
hive > insert into test.test_text partition(day='2021-09-18') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, current_timestamp(), current_date(), 'hello world', 'hello world', 'hello world', true, 'hello world', array(1, 2, 3), array('hello world', 'hello world'), array(float(1.1), float(1.2)), array(array(1, 2), array(3, 4)), array(array('a', 'b'), array('c', 'd')), array(array(float(1.11), float(2.22)), array(float(3.33), float(4.44)));
OK
Time taken: 36.025 seconds
hive > select * from test.test_text;
OK
1 2 3 4 5 6.11 7.22 8 2021-12-14 18:11:17.239 2021-12-14 hello world hello world hello world true hello world [1,2,3] ["hello world","hello world"] [1.1,1.2] [[1,2],[3,4]] [["a","b"],["c","d"]] [[1.11,2.22],[3.33,4.44]] 2021-09-18
Time taken: 0.624 seconds, Fetched: 1 row(s)
```
#### 在 ClickHouse 中建表
ClickHouse中的表 从上面创建的Hive表中获取数据:
``` sql
CREATE TABLE test.test_text
(
`f_tinyint` Int8,
`f_smallint` Int16,
`f_int` Int32,
`f_integer` Int32,
`f_bigint` Int64,
`f_float` Float32,
`f_double` Float64,
`f_decimal` Float64,
`f_timestamp` DateTime,
`f_date` Date,
`f_string` String,
`f_varchar` String,
`f_char` String,
`f_bool` Bool,
`day` String
)
ENGINE = Hive('thrift://localhost:9083', 'test', 'test_text')
PARTITION BY day
```
``` sql
SELECT * FROM test.test_text settings input_format_skip_unknown_fields = 1, input_format_with_names_use_header = 1, date_time_input_format = 'best_effort'\G
```
``` text
SELECT *
FROM test.test_text
SETTINGS input_format_skip_unknown_fields = 1, input_format_with_names_use_header = 1, date_time_input_format = 'best_effort'
Query id: 55b79d35-56de-45b9-8be6-57282fbf1f44
Row 1:
──────
f_tinyint: 1
f_smallint: 2
f_int: 3
f_integer: 4
f_bigint: 5
f_float: 6.11
f_double: 7.22
f_decimal: 8
f_timestamp: 2021-12-14 18:11:17
f_date: 2021-12-14
f_string: hello world
f_varchar: hello world
f_char: hello world
f_bool: true
day: 2021-09-18
```

View File

@ -19,3 +19,5 @@ ClickHouse 提供了多种方式来与外部系统集成,包括表引擎。像
- [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md)
- [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md)
- [PostgreSQL](../../../engines/table-engines/integrations/postgresql.md)
- [SQLite](../../../engines/table-engines/integrations/sqlite.md)
- [Hive](../../../engines/table-engines/integrations/hive.md)

View File

@ -1,67 +1,62 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
---
# 系统。query_thread_log {#system_tables-query_thread_log}
包含有关执行查询的线程的信息,例如,线程名称、线程开始时间、查询处理的持续时间。
始记录:
开启日志功能:
1. 在配置参数 [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log)
2. 设置 [log_query_threads](../../operations/settings/settings.md#settings-log-query-threads) 1。
1. 在配置参数 [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) 部分
2. 设置 [log_query_threads](../../operations/settings/settings.md#settings-log-query-threads) 1。
数据的冲洗周期设置在 `flush_interval_milliseconds` 的参数 [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) 服务器设置部分。 要强制冲洗,请使用 [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) 查询。
数据从缓存写入数据表周期时间参数 `flush_interval_milliseconds` 位于 [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) 服务器设置部分。如果需要强制从缓存写入数据表,请使用 [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) 查询请求
ClickHouse不会自动从表中删除数据。 看 [导言](../../operations/system-tables/index.md#system-tables-introduction) 欲了解更多详情
ClickHouse不会自动从表中删除数据。 欲了解更多详情,请参照 [介绍](../../operations/system-tables/index.md#system-tables-introduction)。
列:
- `event_date` ([日期](../../sql-reference/data-types/date.md)) — The date when the thread has finished execution of the query.
- `event_time` ([日期时间](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query.
- `query_start_time` ([日期时间](../../sql-reference/data-types/datetime.md)) — Start time of query execution.
- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution.
- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read rows.
- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read bytes.
- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` 查询,写入的行数。 对于其他查询,列值为0。
- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` 查询时,写入的字节数。 对于其他查询,列值为0。
- `memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The difference between the amount of allocated and freed memory in context of this thread.
- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this thread.
- `thread_name` ([字符串](../../sql-reference/data-types/string.md)) — Name of the thread.
- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Internal thread ID.
- `thread_id` ([Int32](../../sql-reference/data-types/int-uint.md)) — thread ID.
- `master_thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — OS initial ID of initial thread.
- `query` ([字符串](../../sql-reference/data-types/string.md)) — Query string.
- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Query type. Possible values:
- 1 — Query was initiated by the client.
- 0 — Query was initiated by another query for distributed query execution.
- `user` ([字符串](../../sql-reference/data-types/string.md)) — Name of the user who initiated the current query.
- `query_id` ([字符串](../../sql-reference/data-types/string.md)) — ID of the query.
- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that was used to make the query.
- `port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the query.
- `initial_user` ([字符串](../../sql-reference/data-types/string.md)) — Name of the user who ran the initial query (for distributed query execution).
- `initial_query_id` ([字符串](../../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution).
- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that the parent query was launched from.
- `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the parent query.
- `interface` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Interface that the query was initiated from. Possible values:
- `event_date` ([日期](../../sql-reference/data-types/date.md)) — 该查询线程执行完成的日期。
- `event_time` ([日期时间](../../sql-reference/data-types/datetime.md)) — 该查询线程执行完成的时间。
- `query_start_time` ([日期时间](../../sql-reference/data-types/datetime.md)) — 查询的开始时间。
- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 查询执行持续的时间。
- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 读取的行数。
- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 读取的字节数。
- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 对于 `INSERT` 查询,写入的行数。 对于其他查询为0。
- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 对于 `INSERT` 查询,写入的字节数。 对于其他查询,为0。
- `memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — 在线程上下文,分配的内存和空闲内存之差。
- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — 在线程上下文,分配的内存和空闲内存之差的最大值。
- `thread_name` ([字符串](../../sql-reference/data-types/string.md)) — 线程名。
- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — 内部线程ID。
- `thread_id` ([Int32](../../sql-reference/data-types/int-uint.md)) — 线程ID。
- `master_thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — OS初始线程的初始ID。
- `query` ([字符串](../../sql-reference/data-types/string.md)) — 查询语句。
- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 查询类型,可能的值:
- 1 — 由用户发起的查询。
- 0 — 由其他查询发起的分布式查询。
- `user` ([字符串](../../sql-reference/data-types/string.md)) — 发起查询的用户名。
- `query_id` ([字符串](../../sql-reference/data-types/string.md)) — 查询的ID。
- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — 发起查询的IP地址。
- `port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 发起查询的端口。
- `initial_user` ([字符串](../../sql-reference/data-types/string.md)) — 首次发起查询的用户名(对于分布式查询)。
- `initial_query_id` ([字符串](../../sql-reference/data-types/string.md)) — 首次发起查询的ID对于分布式查询
- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — 发起该查询的父查询IP地址。
- `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 发起该查询的父查询端口。
- `interface` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 发起查询的界面,可能的值:
- 1 — TCP.
- 2 — HTTP.
- `os_user` ([字符串](../../sql-reference/data-types/string.md)) — OS's username who runs [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md).
- `client_hostname` ([字符串](../../sql-reference/data-types/string.md)) — Hostname of the client machine where the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或者运行另一个TCP客户端
- `client_name` ([字符串](../../sql-reference/data-types/string.md)) — The [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端名称。
- `client_revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Revision of the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端。
- `client_version_major` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Major version of the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端。
- `client_version_minor` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Minor version of the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端。
- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Patch component of the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端版本。
- `http_method` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — HTTP method that initiated the query. Possible values:
- 0 — The query was launched from the TCP interface.
- `os_user` ([字符串](../../sql-reference/data-types/string.md)) — 使用 [clickhouse-client](../../interfaces/cli.md) 的系统用户名。
- `client_hostname` ([字符串](../../sql-reference/data-types/string.md)) — 运行 [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的主机名
- `client_name` ([字符串](../../sql-reference/data-types/string.md)) — [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端名称。
- `client_revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的修订号
- `client_version_major` ([UInt32](../../sql-reference/data-types/int-uint.md)) — [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的主版本号
- `client_version_minor` ([UInt32](../../sql-reference/data-types/int-uint.md)) — [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的次版本号
- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的补丁版本
- `http_method` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 发起查询的HTTP方法可能的值
- 0 — 查询通过TCP界面发起。
- 1 — `GET` 方法被使用。
- 2 — `POST` 方法被使用。
- `http_user_agent` ([字符串](../../sql-reference/data-types/string.md)) — The `UserAgent` http请求中传递的标头。
- `quota_key` ([字符串](../../sql-reference/data-types/string.md)) — The “quota key” 在指定 [配额](../../operations/quotas.md) 设置(见 `keyed`).
- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision.
- `ProfileEvents` ([数组(字符串, UInt64)](../../sql-reference/data-types/array.md)) — Counters that measure different metrics for this thread. The description of them could be found in the table [系统。活动](#system_tables-events).
- `http_user_agent` ([字符串](../../sql-reference/data-types/string.md)) — `UserAgent` HTTP请求中传递的UA表头。
- `quota_key` ([字符串](../../sql-reference/data-types/string.md)) — “quota key” 在 [配额](../../operations/quotas.md) 设置`keyed`).
- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse 修订版本号.
- `ProfileEvents` ([数组(字符串, UInt64)](../../sql-reference/data-types/array.md)) — 对于该线程的多个指标计数器。这一项可以参考 [system.events](#system_tables-events).
**示例**
@ -113,4 +108,5 @@ ProfileEvents: {'Query':1,'SelectQuery':1,'ReadCompressedBytes':36,'Compr
**另请参阅**
- [系统。query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — Description of the `query_log` 系统表,其中包含有关查询执行的公共信息。
- [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — `query_log` 系统表描述,其中包含有关查询执行的公共信息。
- [system.query_views_log](../../operations/system-tables/query_views_log.md#system_tables-query_views_log) — 这个表包含在查询线程中使用的各个视图的信息。

View File

@ -1,17 +1,41 @@
# UInt8,UInt16,UInt32,UInt64,Int8,Int16,Int32,Int64 {#uint8-uint16-uint32-uint64-int8-int16-int32-int64}
---
toc_priority: 40
toc_title: UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256
---
# UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256
固定长度的整型,包括有符号整型或无符号整型。
创建表时,可以为整数设置类型参数 (例如. `TINYINT(8)`, `SMALLINT(16)`, `INT(32)`, `BIGINT(64)`), 但 ClickHouse 会忽略它们.
## 整型范围 {#int-ranges}
- Int8-\[-128:127\]
- Int16-\[-32768:32767\]
- Int32-\[-2147483648:2147483647\]
- Int64-\[-9223372036854775808:9223372036854775807\]
- `Int8` — \[-128 : 127\]
- `Int16` — \[-32768 : 32767\]
- `Int32` — \[-2147483648 : 2147483647\]
- `Int64` — \[-9223372036854775808 : 9223372036854775807\]
- `Int128` — \[-170141183460469231731687303715884105728 : 170141183460469231731687303715884105727\]
- `Int256` — \[-57896044618658097711785492504343953926634992332820282019728792003956564819968 : 57896044618658097711785492504343953926634992332820282019728792003956564819967\]
别名:
- `Int8``TINYINT`, `BOOL`, `BOOLEAN`, `INT1`.
- `Int16``SMALLINT`, `INT2`.
- `Int32``INT`, `INT4`, `INTEGER`.
- `Int64``BIGINT`.
## 无符号整型范围 {#uint-ranges}
- UInt8-\[0:255\]
- UInt16-\[0:65535\]
- UInt32-\[0:4294967295\]
- UInt64-\[0:18446744073709551615\]
- `UInt8` — \[0 : 255\]
- `UInt16` — \[0 : 65535\]
- `UInt32` — \[0 : 4294967295\]
- `UInt64` — \[0 : 18446744073709551615\]
- `UInt128` — \[0 : 340282366920938463463374607431768211455\]
- `UInt256` — \[0 : 115792089237316195423570985008687907853269984665640564039457584007913129639935\]
[源文档](https://clickhouse.com/docs/en/data_types/int_uint/) <!--hide-->

View File

@ -5,6 +5,6 @@ toc_title: Roadmap
# Roadmap {#roadmap}
`2021年Roadmap`已公布供公开讨论查看[这里](https://github.com/ClickHouse/ClickHouse/issues/17623).
`2022年Roadmap`已公布供公开讨论查看 [这里](https://github.com/ClickHouse/ClickHouse/issues/32513).
{## [源文章](https://clickhouse.com/docs/en/roadmap/) ##}

View File

@ -481,48 +481,76 @@ catch (...)
void Client::connect()
{
connection_parameters = ConnectionParameters(config());
if (is_interactive)
std::cout << "Connecting to "
<< (!connection_parameters.default_database.empty() ? "database " + connection_parameters.default_database + " at "
: "")
<< connection_parameters.host << ":" << connection_parameters.port
<< (!connection_parameters.user.empty() ? " as user " + connection_parameters.user : "") << "." << std::endl;
UInt16 default_port = ConnectionParameters::getPortFromConfig(config());
connection_parameters = ConnectionParameters(config(), hosts_ports[0].host,
hosts_ports[0].port.value_or(default_port));
String server_name;
UInt64 server_version_major = 0;
UInt64 server_version_minor = 0;
UInt64 server_version_patch = 0;
try
for (size_t attempted_address_index = 0; attempted_address_index < hosts_ports.size(); ++attempted_address_index)
{
connection = Connection::createConnection(connection_parameters, global_context);
connection_parameters.host = hosts_ports[attempted_address_index].host;
connection_parameters.port = hosts_ports[attempted_address_index].port.value_or(default_port);
if (max_client_network_bandwidth)
if (is_interactive)
std::cout << "Connecting to "
<< (!connection_parameters.default_database.empty() ? "database " + connection_parameters.default_database + " at "
: "")
<< connection_parameters.host << ":" << connection_parameters.port
<< (!connection_parameters.user.empty() ? " as user " + connection_parameters.user : "") << "." << std::endl;
try
{
ThrottlerPtr throttler = std::make_shared<Throttler>(max_client_network_bandwidth, 0, "");
connection->setThrottler(throttler);
}
connection = Connection::createConnection(connection_parameters, global_context);
connection->getServerVersion(
connection_parameters.timeouts, server_name, server_version_major, server_version_minor, server_version_patch, server_revision);
}
catch (const Exception & e)
{
/// It is typical when users install ClickHouse, type some password and instantly forget it.
if ((connection_parameters.user.empty() || connection_parameters.user == "default")
&& e.code() == DB::ErrorCodes::AUTHENTICATION_FAILED)
if (max_client_network_bandwidth)
{
ThrottlerPtr throttler = std::make_shared<Throttler>(max_client_network_bandwidth, 0, "");
connection->setThrottler(throttler);
}
connection->getServerVersion(
connection_parameters.timeouts, server_name, server_version_major, server_version_minor, server_version_patch, server_revision);
config().setString("host", connection_parameters.host);
config().setInt("port", connection_parameters.port);
break;
}
catch (const Exception & e)
{
std::cerr << std::endl
<< "If you have installed ClickHouse and forgot password you can reset it in the configuration file." << std::endl
<< "The password for default user is typically located at /etc/clickhouse-server/users.d/default-password.xml" << std::endl
<< "and deleting this file will reset the password." << std::endl
<< "See also /etc/clickhouse-server/users.xml on the server where ClickHouse is installed." << std::endl
<< std::endl;
}
/// It is typical when users install ClickHouse, type some password and instantly forget it.
/// This problem can't be fixed with reconnection so it is not attempted
if ((connection_parameters.user.empty() || connection_parameters.user == "default")
&& e.code() == DB::ErrorCodes::AUTHENTICATION_FAILED)
{
std::cerr << std::endl
<< "If you have installed ClickHouse and forgot password you can reset it in the configuration file." << std::endl
<< "The password for default user is typically located at /etc/clickhouse-server/users.d/default-password.xml" << std::endl
<< "and deleting this file will reset the password." << std::endl
<< "See also /etc/clickhouse-server/users.xml on the server where ClickHouse is installed." << std::endl
<< std::endl;
throw;
}
else
{
if (attempted_address_index == hosts_ports.size() - 1)
throw;
throw;
if (is_interactive)
{
std::cerr << "Connection attempt to database at "
<< connection_parameters.host << ":" << connection_parameters.port
<< " resulted in failure"
<< std::endl
<< getExceptionMessage(e, false)
<< std::endl
<< "Attempting connection to the next provided address"
<< std::endl;
}
}
}
}
server_version = toString(server_version_major) + "." + toString(server_version_minor) + "." + toString(server_version_patch);
@ -966,8 +994,11 @@ void Client::addOptions(OptionsDescription & options_description)
/// Main commandline options related to client functionality and all parameters from Settings.
options_description.main_description->add_options()
("config,c", po::value<std::string>(), "config-file path (another shorthand)")
("host,h", po::value<std::string>()->default_value("localhost"), "server host")
("port", po::value<int>()->default_value(9000), "server port")
("host,h", po::value<std::vector<HostPort>>()->multitoken()->default_value({{"localhost"}}, "localhost"),
"list of server hosts with optionally assigned port to connect. List elements are separated by a space."
"Every list element looks like '<host>[:<port>]'. If port isn't assigned, connection is made by port from '--port' param"
"Example of usage: '-h host1:1 host2 host3:3'")
("port", po::value<int>()->default_value(9000), "server port, which is default port for every host from '--host' param")
("secure,s", "Use TLS connection")
("user,u", po::value<std::string>()->default_value("default"), "user")
/** If "--password [value]" is used but the value is omitted, the bad argument exception will be thrown.
@ -1074,8 +1105,8 @@ void Client::processOptions(const OptionsDescription & options_description,
if (options.count("config"))
config().setString("config-file", options["config"].as<std::string>());
if (options.count("host") && !options["host"].defaulted())
config().setString("host", options["host"].as<std::string>());
if (options.count("host"))
hosts_ports = options["host"].as<std::vector<HostPort>>();
if (options.count("interleave-queries-file"))
interleave_queries_files = options["interleave-queries-file"].as<std::vector<std::string>>();
if (options.count("port") && !options["port"].defaulted())

View File

@ -57,8 +57,16 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
("seed", po::value<std::string>(), "seed (arbitrary string) that determines the result of obfuscation")
;
Settings cmd_settings;
for (const auto & field : cmd_settings.all())
{
if (field.getName() == "max_parser_depth" || field.getName() == "max_query_size")
cmd_settings.addProgramOption(desc, field);
}
boost::program_options::variables_map options;
boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options);
po::notify(options);
if (options.count("help"))
{
@ -149,7 +157,8 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
ParserQuery parser(end);
do
{
ASTPtr res = parseQueryAndMovePosition(parser, pos, end, "query", multiple, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
ASTPtr res = parseQueryAndMovePosition(
parser, pos, end, "query", multiple, cmd_settings.max_query_size, cmd_settings.max_parser_depth);
/// For insert query with data(INSERT INTO ... VALUES ...), will lead to format fail,
/// should throw exception early and make exception message more readable.
if (const auto * insert_query = res->as<ASTInsertQuery>(); insert_query && insert_query->data)
@ -222,6 +231,5 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
std::cerr << getCurrentExceptionMessage(true) << '\n';
return getCurrentExceptionCode();
}
return 0;
}

View File

@ -313,9 +313,15 @@ void LocalServer::cleanup()
}
static bool checkIfStdinIsRegularFile()
{
struct stat file_stat;
return fstat(STDIN_FILENO, &file_stat) == 0 && S_ISREG(file_stat.st_mode);
}
std::string LocalServer::getInitialCreateTableQuery()
{
if (!config().has("table-structure") && !config().has("table-file"))
if (!config().has("table-structure") && !config().has("table-file") && !config().has("table-data-format") && (!checkIfStdinIsRegularFile() || !config().has("query")))
return {};
auto table_name = backQuoteIfNeed(config().getString("table-name", "table"));
@ -337,8 +343,9 @@ std::string LocalServer::getInitialCreateTableQuery()
format_from_file_name = FormatFactory::instance().getFormatFromFileName(file_name, false);
}
auto data_format
= backQuoteIfNeed(config().getString("table-data-format", format_from_file_name.empty() ? "TSV" : format_from_file_name));
auto data_format = backQuoteIfNeed(
config().getString("table-data-format", config().getString("format", format_from_file_name.empty() ? "TSV" : format_from_file_name)));
if (table_structure == "auto")
table_structure = "";
@ -518,22 +525,17 @@ void LocalServer::processConfig()
if (config().has("multiquery"))
is_multiquery = true;
load_suggestions = true;
}
else
{
if (delayed_interactive)
{
load_suggestions = true;
}
need_render_progress = config().getBool("progress", false);
echo_queries = config().hasOption("echo") || config().hasOption("verbose");
ignore_error = config().getBool("ignore-error", false);
is_multiquery = true;
}
print_stack_trace = config().getBool("stacktrace", false);
load_suggestions = (is_interactive || delayed_interactive) && !config().getBool("disable_suggestion", false);
auto logging = (config().has("logger.console")
|| config().has("logger.level")

View File

@ -22,6 +22,7 @@
#include <base/getMemoryAmount.h>
#include <base/errnoToString.h>
#include <base/coverage.h>
#include <Common/MemoryTracker.h>
#include <Common/ClickHouseRevision.h>
#include <Common/DNSResolver.h>
#include <Common/CurrentMetrics.h>
@ -925,6 +926,14 @@ if (ThreadFuzzer::instance().isEffective())
total_memory_tracker.setDescription("(total)");
total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
auto * global_overcommit_tracker = global_context->getGlobalOvercommitTracker();
if (config->has("global_memory_usage_overcommit_max_wait_microseconds"))
{
UInt64 max_overcommit_wait_time = config->getUInt64("global_memory_usage_overcommit_max_wait_microseconds", 0);
global_overcommit_tracker->setMaxWaitTime(max_overcommit_wait_time);
}
total_memory_tracker.setOvercommitTracker(global_overcommit_tracker);
// FIXME logging-related things need synchronization -- see the 'Logger * log' saved
// in a lot of places. For now, disable updating log configuration without server restart.
//setTextLog(global_context->getTextLog());

View File

@ -217,13 +217,12 @@
<!-- The following file is used only if ssl_require_client_auth=1 -->
<ssl_ca_cert_file>/path/to/ssl_ca_cert_file</ssl_ca_cert_file>
<!-- Default compression algorithm (applied if client doesn't specify another algorithm, see result_compression in QueryInfo).
<!-- Default transport compression type (can be overridden by client, see the transport_compression_type field in QueryInfo).
Supported algorithms: none, deflate, gzip, stream_gzip -->
<compression>deflate</compression>
<transport_compression_type>none</transport_compression_type>
<!-- Default compression level (applied if client doesn't specify another level, see result_compression in QueryInfo).
Supported levels: none, low, medium, high -->
<compression_level>medium</compression_level>
<!-- Default transport compression level. Supported levels: 0..3 -->
<transport_compression_level>0</transport_compression_level>
<!-- Send/receive message size limits in bytes. -1 means unlimited -->
<max_send_message_size>-1</max_send_message_size>

View File

@ -31,7 +31,7 @@ namespace ErrorCodes
* If test-mode option is added, files will be put by given url via PUT request.
*/
void processFile(const fs::path & file_path, const fs::path & dst_path, bool test_mode, WriteBuffer & metadata_buf)
void processFile(const fs::path & file_path, const fs::path & dst_path, bool test_mode, bool link, WriteBuffer & metadata_buf)
{
String remote_path;
RE2::FullMatch(file_path.string(), EXTRACT_PATH_PATTERN, &remote_path);
@ -52,22 +52,29 @@ void processFile(const fs::path & file_path, const fs::path & dst_path, bool tes
auto dst_file_path = fs::path(dst_path) / remote_path;
auto src_buf = createReadBufferFromFileBase(file_path, {}, fs::file_size(file_path));
std::shared_ptr<WriteBuffer> dst_buf;
/// test mode for integration tests.
if (test_mode)
dst_buf = std::make_shared<WriteBufferFromHTTP>(Poco::URI(dst_file_path), Poco::Net::HTTPRequest::HTTP_PUT);
if (link)
{
fs::create_symlink(file_path, dst_file_path);
}
else
dst_buf = std::make_shared<WriteBufferFromFile>(dst_file_path);
{
auto src_buf = createReadBufferFromFileBase(file_path, {}, fs::file_size(file_path));
std::shared_ptr<WriteBuffer> dst_buf;
copyData(*src_buf, *dst_buf);
dst_buf->next();
dst_buf->finalize();
/// test mode for integration tests.
if (test_mode)
dst_buf = std::make_shared<WriteBufferFromHTTP>(Poco::URI(dst_file_path), Poco::Net::HTTPRequest::HTTP_PUT);
else
dst_buf = std::make_shared<WriteBufferFromFile>(dst_file_path);
copyData(*src_buf, *dst_buf);
dst_buf->next();
dst_buf->finalize();
}
};
void processTableFiles(const fs::path & data_path, fs::path dst_path, bool test_mode)
void processTableFiles(const fs::path & data_path, fs::path dst_path, bool test_mode, bool link)
{
std::cerr << "Data path: " << data_path << ", destination path: " << dst_path << std::endl;
@ -94,7 +101,7 @@ void processTableFiles(const fs::path & data_path, fs::path dst_path, bool test_
{
if (dir_it->is_directory())
{
processFile(dir_it->path(), dst_path, test_mode, *root_meta);
processFile(dir_it->path(), dst_path, test_mode, link, *root_meta);
String directory_prefix;
RE2::FullMatch(dir_it->path().string(), EXTRACT_PATH_PATTERN, &directory_prefix);
@ -115,14 +122,14 @@ void processTableFiles(const fs::path & data_path, fs::path dst_path, bool test_
fs::directory_iterator files_end;
for (fs::directory_iterator file_it(dir_it->path()); file_it != files_end; ++file_it)
processFile(file_it->path(), dst_path, test_mode, *directory_meta);
processFile(file_it->path(), dst_path, test_mode, link, *directory_meta);
directory_meta->next();
directory_meta->finalize();
}
else
{
processFile(dir_it->path(), dst_path, test_mode, *root_meta);
processFile(dir_it->path(), dst_path, test_mode, link, *root_meta);
}
}
root_meta->next();
@ -141,6 +148,7 @@ try
("help,h", "produce help message")
("metadata-path", po::value<std::string>(), "Metadata path (select data_paths from system.tables where name='table_name'")
("test-mode", "Use test mode, which will put data on given url via PUT")
("link", "Create symlinks instead of copying")
("url", po::value<std::string>(), "Web server url for test mode")
("output-dir", po::value<std::string>(), "Directory to put files in non-test mode");
@ -186,7 +194,7 @@ try
root_path = fs::current_path();
}
processTableFiles(fs_path, root_path, test_mode);
processTableFiles(fs_path, root_path, test_mode, options.count("link"));
return 0;
}

View File

@ -1,8 +1,8 @@
#include <Access/AccessRights.h>
#include <base/logger_useful.h>
#include <base/sort.h>
#include <boost/container/small_vector.hpp>
#include <boost/range/adaptor/map.hpp>
#include <boost/range/algorithm/sort.hpp>
#include <unordered_map>
namespace DB
@ -101,7 +101,7 @@ namespace
AccessRightsElements getResult() const
{
ProtoElements sorted = *this;
boost::range::sort(sorted);
::sort(sorted.begin(), sorted.end());
AccessRightsElements res;
res.reserve(sorted.size());

View File

@ -86,7 +86,7 @@ enum class AccessType
M(CREATE_DICTIONARY, "", DICTIONARY, CREATE) /* allows to execute {CREATE|ATTACH} DICTIONARY */\
M(CREATE_TEMPORARY_TABLE, "", GLOBAL, CREATE) /* allows to create and manipulate temporary tables;
implicitly enabled by the grant CREATE_TABLE on any table */ \
M(CREATE_FUNCTION, "", DATABASE, CREATE) /* allows to execute CREATE FUNCTION */ \
M(CREATE_FUNCTION, "", GLOBAL, CREATE) /* allows to execute CREATE FUNCTION */ \
M(CREATE, "", GROUP, ALL) /* allows to execute {CREATE|ATTACH} */ \
\
M(DROP_DATABASE, "", DATABASE, DROP) /* allows to execute {DROP|DETACH} DATABASE */\
@ -94,7 +94,7 @@ enum class AccessType
M(DROP_VIEW, "", VIEW, DROP) /* allows to execute {DROP|DETACH} TABLE for views;
implicitly enabled by the grant DROP_TABLE */\
M(DROP_DICTIONARY, "", DICTIONARY, DROP) /* allows to execute {DROP|DETACH} DICTIONARY */\
M(DROP_FUNCTION, "", DATABASE, DROP) /* allows to execute DROP FUNCTION */\
M(DROP_FUNCTION, "", GLOBAL, DROP) /* allows to execute DROP FUNCTION */\
M(DROP, "", GROUP, ALL) /* allows to execute {DROP|DETACH} */\
\
M(TRUNCATE, "TRUNCATE TABLE", TABLE, ALL) \
@ -113,9 +113,9 @@ enum class AccessType
M(ALTER_ROLE, "", GLOBAL, ACCESS_MANAGEMENT) \
M(DROP_ROLE, "", GLOBAL, ACCESS_MANAGEMENT) \
M(ROLE_ADMIN, "", GLOBAL, ACCESS_MANAGEMENT) /* allows to grant and revoke the roles which are not granted to the current user with admin option */\
M(CREATE_ROW_POLICY, "CREATE POLICY", GLOBAL, ACCESS_MANAGEMENT) \
M(ALTER_ROW_POLICY, "ALTER POLICY", GLOBAL, ACCESS_MANAGEMENT) \
M(DROP_ROW_POLICY, "DROP POLICY", GLOBAL, ACCESS_MANAGEMENT) \
M(CREATE_ROW_POLICY, "CREATE POLICY", TABLE, ACCESS_MANAGEMENT) \
M(ALTER_ROW_POLICY, "ALTER POLICY", TABLE, ACCESS_MANAGEMENT) \
M(DROP_ROW_POLICY, "DROP POLICY", TABLE, ACCESS_MANAGEMENT) \
M(CREATE_QUOTA, "", GLOBAL, ACCESS_MANAGEMENT) \
M(ALTER_QUOTA, "", GLOBAL, ACCESS_MANAGEMENT) \
M(DROP_QUOTA, "", GLOBAL, ACCESS_MANAGEMENT) \
@ -124,7 +124,7 @@ enum class AccessType
M(DROP_SETTINGS_PROFILE, "DROP PROFILE", GLOBAL, ACCESS_MANAGEMENT) \
M(SHOW_USERS, "SHOW CREATE USER", GLOBAL, SHOW_ACCESS) \
M(SHOW_ROLES, "SHOW CREATE ROLE", GLOBAL, SHOW_ACCESS) \
M(SHOW_ROW_POLICIES, "SHOW POLICIES, SHOW CREATE ROW POLICY, SHOW CREATE POLICY", GLOBAL, SHOW_ACCESS) \
M(SHOW_ROW_POLICIES, "SHOW POLICIES, SHOW CREATE ROW POLICY, SHOW CREATE POLICY", TABLE, SHOW_ACCESS) \
M(SHOW_QUOTAS, "SHOW CREATE QUOTA", GLOBAL, SHOW_ACCESS) \
M(SHOW_SETTINGS_PROFILES, "SHOW PROFILES, SHOW CREATE SETTINGS PROFILE, SHOW CREATE PROFILE", GLOBAL, SHOW_ACCESS) \
M(SHOW_ACCESS, "", GROUP, ACCESS_MANAGEMENT) \
@ -166,6 +166,7 @@ enum class AccessType
M(dictGet, "dictHas, dictGetHierarchy, dictIsIn", DICTIONARY, ALL) /* allows to execute functions dictGet(), dictHas(), dictGetHierarchy(), dictIsIn() */\
\
M(addressToLine, "", GLOBAL, INTROSPECTION) /* allows to execute function addressToLine() */\
M(addressToLineWithInlines, "", GLOBAL, INTROSPECTION) /* allows to execute function addressToLineWithInlines() */\
M(addressToSymbol, "", GLOBAL, INTROSPECTION) /* allows to execute function addressToSymbol() */\
M(demangle, "", GLOBAL, INTROSPECTION) /* allows to execute function demangle() */\
M(INTROSPECTION, "INTROSPECTION FUNCTIONS", GROUP, ALL) /* allows to execute functions addressToLine(), addressToSymbol(), demangle()*/\

View File

@ -425,6 +425,7 @@ bool ContextAccess::checkAccessImplHelper(const AccessFlags & flags, const Args
| AccessType::TRUNCATE;
const AccessFlags dictionary_ddl = AccessType::CREATE_DICTIONARY | AccessType::DROP_DICTIONARY;
const AccessFlags function_ddl = AccessType::CREATE_FUNCTION | AccessType::DROP_FUNCTION;
const AccessFlags table_and_dictionary_ddl = table_ddl | dictionary_ddl;
const AccessFlags write_table_access = AccessType::INSERT | AccessType::OPTIMIZE;
const AccessFlags write_dcl_access = AccessType::ACCESS_MANAGEMENT - AccessType::SHOW_ACCESS;
@ -432,7 +433,7 @@ bool ContextAccess::checkAccessImplHelper(const AccessFlags & flags, const Args
const AccessFlags not_readonly_flags = write_table_access | table_and_dictionary_ddl | write_dcl_access | AccessType::SYSTEM | AccessType::KILL_QUERY;
const AccessFlags not_readonly_1_flags = AccessType::CREATE_TEMPORARY_TABLE;
const AccessFlags ddl_flags = table_ddl | dictionary_ddl;
const AccessFlags ddl_flags = table_ddl | dictionary_ddl | function_ddl;
const AccessFlags introspection_flags = AccessType::INTROSPECTION;
};
static const PrecalculatedFlags precalc;

View File

@ -7,8 +7,8 @@
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <boost/range/algorithm/set_algorithm.hpp>
#include <boost/range/algorithm/sort.hpp>
#include <boost/range/algorithm_ext/push_back.hpp>
#include <base/sort.h>
namespace DB
@ -132,7 +132,7 @@ std::shared_ptr<ASTRolesOrUsersSet> RolesOrUsersSet::toAST() const
ast->names.reserve(ids.size());
for (const UUID & id : ids)
ast->names.emplace_back(::DB::toString(id));
boost::range::sort(ast->names);
::sort(ast->names.begin(), ast->names.end());
}
if (!except_ids.empty())
@ -140,7 +140,7 @@ std::shared_ptr<ASTRolesOrUsersSet> RolesOrUsersSet::toAST() const
ast->except_names.reserve(except_ids.size());
for (const UUID & except_id : except_ids)
ast->except_names.emplace_back(::DB::toString(except_id));
boost::range::sort(ast->except_names);
::sort(ast->except_names.begin(), ast->except_names.end());
}
return ast;
@ -161,7 +161,7 @@ std::shared_ptr<ASTRolesOrUsersSet> RolesOrUsersSet::toASTWithNames(const Access
if (name)
ast->names.emplace_back(std::move(*name));
}
boost::range::sort(ast->names);
::sort(ast->names.begin(), ast->names.end());
}
if (!except_ids.empty())
@ -173,7 +173,7 @@ std::shared_ptr<ASTRolesOrUsersSet> RolesOrUsersSet::toASTWithNames(const Access
if (except_name)
ast->except_names.emplace_back(std::move(*except_name));
}
boost::range::sort(ast->except_names);
::sort(ast->except_names.begin(), ast->except_names.end());
}
return ast;

View File

@ -45,7 +45,15 @@ TEST(AccessRights, Union)
lhs.grant(AccessType::INSERT);
rhs.grant(AccessType::ALL, "db1");
lhs.makeUnion(rhs);
ASSERT_EQ(lhs.toString(), "GRANT INSERT ON *.*, GRANT SHOW, SELECT, ALTER, CREATE DATABASE, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, CREATE FUNCTION, DROP, TRUNCATE, OPTIMIZE, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*");
ASSERT_EQ(lhs.toString(),
"GRANT INSERT ON *.*, "
"GRANT SHOW, SELECT, ALTER, CREATE DATABASE, CREATE TABLE, CREATE VIEW, "
"CREATE DICTIONARY, DROP DATABASE, DROP TABLE, DROP VIEW, DROP DICTIONARY, "
"TRUNCATE, OPTIMIZE, CREATE ROW POLICY, ALTER ROW POLICY, DROP ROW POLICY, "
"SHOW ROW POLICIES, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, "
"SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, "
"SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, "
"SYSTEM RESTORE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*");
}

View File

@ -90,11 +90,11 @@ struct AggregateFunctionIntervalLengthSumData
void sort()
{
if (!sorted)
{
::sort(std::begin(segments), std::end(segments));
sorted = true;
}
if (sorted)
return;
::sort(std::begin(segments), std::end(segments));
sorted = true;
}
void serialize(WriteBuffer & buf) const

View File

@ -75,11 +75,11 @@ struct AggregateFunctionSequenceMatchData final
void sort()
{
if (!sorted)
{
::sort(std::begin(events_list), std::end(events_list), Comparator{});
sorted = true;
}
if (sorted)
return;
::sort(std::begin(events_list), std::end(events_list), Comparator{});
sorted = true;
}
void serialize(WriteBuffer & buf) const

View File

@ -239,6 +239,7 @@ private:
UInt64 genRandom(size_t lim)
{
assert(lim > 0);
/// With a large number of values, we will generate random numbers several times slower.
if (lim <= static_cast<UInt64>(rng.max()))
return static_cast<UInt32>(rng()) % static_cast<UInt32>(lim);

View File

@ -260,7 +260,8 @@ private:
if (sorted)
return;
::sort(samples.begin(), samples.end(), [](const auto & lhs, const auto & rhs) { return lhs.first < rhs.first; });
/// In order to provide deterministic result we must sort by value and hash
::sort(samples.begin(), samples.end(), [](const auto & lhs, const auto & rhs) { return lhs < rhs; });
sorted = true;
}

View File

@ -79,6 +79,7 @@ set(dbms_sources)
add_headers_and_sources(clickhouse_common_io Common)
add_headers_and_sources(clickhouse_common_io Common/HashTable)
add_headers_and_sources(clickhouse_common_io IO)
add_headers_and_sources(clickhouse_common_io IO/Archives)
add_headers_and_sources(clickhouse_common_io IO/S3)
list (REMOVE_ITEM clickhouse_common_io_sources Common/malloc.cpp Common/new_delete.cpp)
@ -508,6 +509,10 @@ if (TARGET ch_contrib::bzip2)
target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::bzip2)
endif()
if (TARGET ch_contrib::minizip)
target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::minizip)
endif ()
if (TARGET ch_contrib::simdjson)
dbms_target_link_libraries(PRIVATE ch_contrib::simdjson)
endif()

View File

@ -1317,7 +1317,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
if (insert && insert->select)
insert->tryFindInputFunction(input_function);
bool is_async_insert = global_context->getSettings().async_insert && insert && insert->hasInlinedData();
bool is_async_insert = global_context->getSettingsRef().async_insert && insert && insert->hasInlinedData();
/// INSERT query for which data transfer is needed (not an INSERT SELECT or input()) is processed separately.
if (insert && (!insert->select || input_function) && !insert->watch && !is_async_insert)
@ -1501,6 +1501,25 @@ String ClientBase::prompt() const
}
void ClientBase::initQueryIdFormats()
{
if (!query_id_formats.empty())
return;
/// Initialize query_id_formats if any
if (config().has("query_id_formats"))
{
Poco::Util::AbstractConfiguration::Keys keys;
config().keys("query_id_formats", keys);
for (const auto & name : keys)
query_id_formats.emplace_back(name + ":", config().getString("query_id_formats." + name));
}
if (query_id_formats.empty())
query_id_formats.emplace_back("Query id:", " {query_id}\n");
}
void ClientBase::runInteractive()
{
if (config().has("query_id"))
@ -1508,6 +1527,8 @@ void ClientBase::runInteractive()
if (print_time_to_stderr)
throw Exception("time option could be specified only in non-interactive mode", ErrorCodes::BAD_ARGUMENTS);
initQueryIdFormats();
/// Initialize DateLUT here to avoid counting time spent here as query execution time.
const auto local_tz = DateLUT::instance().getTimeZone();
@ -1528,18 +1549,6 @@ void ClientBase::runInteractive()
home_path = home_path_cstr;
}
/// Initialize query_id_formats if any
if (config().has("query_id_formats"))
{
Poco::Util::AbstractConfiguration::Keys keys;
config().keys("query_id_formats", keys);
for (const auto & name : keys)
query_id_formats.emplace_back(name + ":", config().getString("query_id_formats." + name));
}
if (query_id_formats.empty())
query_id_formats.emplace_back("Query id:", " {query_id}\n");
/// Load command history if present.
if (config().has("history_file"))
history_file = config().getString("history_file");
@ -1648,6 +1657,9 @@ void ClientBase::runInteractive()
void ClientBase::runNonInteractive()
{
if (delayed_interactive)
initQueryIdFormats();
if (!queries_files.empty())
{
auto process_multi_query_from_file = [&](const String & file)
@ -1917,7 +1929,7 @@ void ClientBase::init(int argc, char ** argv)
/// Output of help message.
if (options.count("help")
|| (options.count("host") && options["host"].as<std::string>() == "elp")) /// If user writes -help instead of --help.
|| (options.count("host") && options["host"].as<std::vector<HostPort>>()[0].host == "elp")) /// If user writes -help instead of --help.
{
printHelpMessage(options_description);
exit(0);

View File

@ -5,6 +5,7 @@
#include <Common/InterruptListener.h>
#include <Common/ShellCommand.h>
#include <Common/Stopwatch.h>
#include <Common/DNSResolver.h>
#include <Core/ExternalTable.h>
#include <Poco/Util/Application.h>
#include <Interpreters/Context.h>
@ -138,6 +139,8 @@ private:
void updateSuggest(const ASTCreateQuery & ast_create);
void initQueryIdFormats();
protected:
static bool isSyncInsertWithData(const ASTInsertQuery & insert_query, const ContextPtr & context);
@ -241,6 +244,25 @@ protected:
} profile_events;
QueryProcessingStage::Enum query_processing_stage;
struct HostPort
{
String host;
std::optional<UInt16> port{};
friend std::istream & operator>>(std::istream & in, HostPort & hostPort)
{
String host_with_port;
in >> host_with_port;
DB::DNSResolver & resolver = DB::DNSResolver::instance();
std::pair<Poco::Net::IPAddress, std::optional<UInt16>>
host_and_port = resolver.resolveHostOrAddress(host_with_port);
hostPort.host = host_and_port.first.toString();
hostPort.port = host_and_port.second;
return in;
}
};
std::vector<HostPort> hosts_ports{};
};
}

View File

@ -23,15 +23,13 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
}
ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfiguration & config)
ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfiguration & config,
std::string connection_host,
int connection_port) : host(connection_host), port(connection_port)
{
bool is_secure = config.getBool("secure", false);
security = is_secure ? Protocol::Secure::Enable : Protocol::Secure::Disable;
host = config.getString("host", "localhost");
port = config.getInt(
"port", config.getInt(is_secure ? "tcp_port_secure" : "tcp_port", is_secure ? DBMS_DEFAULT_SECURE_PORT : DBMS_DEFAULT_PORT));
default_database = config.getString("database", "");
/// changed the default value to "default" to fix the issue when the user in the prompt is blank
@ -61,12 +59,25 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati
/// By default compression is disabled if address looks like localhost.
compression = config.getBool("compression", !isLocalAddress(DNSResolver::instance().resolveHost(host)))
? Protocol::Compression::Enable : Protocol::Compression::Disable;
? Protocol::Compression::Enable : Protocol::Compression::Disable;
timeouts = ConnectionTimeouts(
Poco::Timespan(config.getInt("connect_timeout", DBMS_DEFAULT_CONNECT_TIMEOUT_SEC), 0),
Poco::Timespan(config.getInt("send_timeout", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0),
Poco::Timespan(config.getInt("receive_timeout", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0),
Poco::Timespan(config.getInt("tcp_keep_alive_timeout", 0), 0));
Poco::Timespan(config.getInt("connect_timeout", DBMS_DEFAULT_CONNECT_TIMEOUT_SEC), 0),
Poco::Timespan(config.getInt("send_timeout", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0),
Poco::Timespan(config.getInt("receive_timeout", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0),
Poco::Timespan(config.getInt("tcp_keep_alive_timeout", 0), 0));
}
ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfiguration & config)
: ConnectionParameters(config, config.getString("host", "localhost"), getPortFromConfig(config))
{
}
int ConnectionParameters::getPortFromConfig(const Poco::Util::AbstractConfiguration & config)
{
bool is_secure = config.getBool("secure", false);
return config.getInt("port",
config.getInt(is_secure ? "tcp_port_secure" : "tcp_port",
is_secure ? DBMS_DEFAULT_SECURE_PORT : DBMS_DEFAULT_PORT));
}
}

View File

@ -24,6 +24,9 @@ struct ConnectionParameters
ConnectionParameters() {}
ConnectionParameters(const Poco::Util::AbstractConfiguration & config);
ConnectionParameters(const Poco::Util::AbstractConfiguration & config, std::string host, int port);
static int getPortFromConfig(const Poco::Util::AbstractConfiguration & config);
};
}

View File

@ -74,6 +74,8 @@ void LocalConnection::sendQuery(
query_context->setProgressCallback([this] (const Progress & value) { return this->updateProgress(value); });
query_context->setFileProgressCallback([this](const FileProgress & value) { this->updateProgress(Progress(value)); });
}
if (!current_database.empty())
query_context->setCurrentDatabase(current_database);
CurrentThread::QueryScope query_scope_holder(query_context);
@ -427,9 +429,9 @@ void LocalConnection::getServerVersion(
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented");
}
void LocalConnection::setDefaultDatabase(const String &)
void LocalConnection::setDefaultDatabase(const String & database)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented");
current_database = database;
}
UInt64 LocalConnection::getServerRevision(const ConnectionTimeouts &)

View File

@ -142,5 +142,7 @@ private:
/// Last "server" packet.
std::optional<UInt64> next_packet_type;
String current_database;
};
}

View File

@ -50,12 +50,12 @@ ColumnArray::ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr &&
if (!offsets_concrete)
throw Exception("offsets_column must be a ColumnUInt64", ErrorCodes::LOGICAL_ERROR);
if (!offsets_concrete->empty() && nested_column)
if (!offsets_concrete->empty() && data)
{
Offset last_offset = offsets_concrete->getData().back();
/// This will also prevent possible overflow in offset.
if (nested_column->size() != last_offset)
if (data->size() != last_offset)
throw Exception("offsets_column has data inconsistent with nested_column", ErrorCodes::LOGICAL_ERROR);
}

20
src/Common/ArenaUtils.h Normal file
View File

@ -0,0 +1,20 @@
#pragma once
#include <string.h>
#include <string>
#include <base/StringRef.h>
/** Copy string value into Arena.
* Arena should support method:
* char * alloc(size_t size).
*/
template <typename Arena>
inline StringRef copyStringInArena(Arena & arena, StringRef value)
{
size_t key_size = value.size;
char * place_for_key = arena.alloc(key_size);
memcpy(reinterpret_cast<void *>(place_for_key), reinterpret_cast<const void *>(value.data), key_size);
StringRef result{place_for_key, key_size};
return result;
}

View File

@ -387,47 +387,52 @@ struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod
}
template <typename Data>
ALWAYS_INLINE FindResult findFromRow(Data & data, size_t row_, Arena & pool)
ALWAYS_INLINE FindResult findKey(Data & data, size_t row_, Arena & pool)
{
size_t row = getIndexAt(row_);
if (is_nullable && row == 0)
{
if constexpr (has_mapped)
return FindResult(data.hasNullKeyData() ? &data.getNullKeyData() : nullptr, data.hasNullKeyData());
return FindResult(data.hasNullKeyData() ? &data.getNullKeyData() : nullptr, data.hasNullKeyData(), 0);
else
return FindResult(data.hasNullKeyData());
return FindResult(data.hasNullKeyData(), 0);
}
if (visit_cache[row] != VisitValue::Empty)
{
if constexpr (has_mapped)
return FindResult(&mapped_cache[row], visit_cache[row] == VisitValue::Found);
return FindResult(&mapped_cache[row], visit_cache[row] == VisitValue::Found, 0);
else
return FindResult(visit_cache[row] == VisitValue::Found);
return FindResult(visit_cache[row] == VisitValue::Found, 0);
}
auto key_holder = getKeyHolder(row_, pool);
typename Data::iterator it;
typename Data::LookupResult it;
if (saved_hash)
it = data.find(*key_holder, saved_hash[row]);
it = data.find(keyHolderGetKey(key_holder), saved_hash[row]);
else
it = data.find(*key_holder);
it = data.find(keyHolderGetKey(key_holder));
bool found = it != data.end();
bool found = it;
visit_cache[row] = found ? VisitValue::Found : VisitValue::NotFound;
if constexpr (has_mapped)
{
if (found)
mapped_cache[row] = it->second;
mapped_cache[row] = it->getMapped();
}
size_t offset = 0;
if constexpr (FindResult::has_offset)
offset = found ? data.offsetInternal(it) : 0;
if constexpr (has_mapped)
return FindResult(&mapped_cache[row], found);
return FindResult(&mapped_cache[row], found, offset);
else
return FindResult(found);
return FindResult(found, offset);
}
template <typename Data>

View File

@ -202,6 +202,45 @@ Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host, U
return Poco::Net::SocketAddress(impl->cache_host(host).front(), port);
}
std::pair<Poco::Net::IPAddress, std::optional<UInt16>> DNSResolver::resolveHostOrAddress(const std::string & host_and_port)
{
Poco::Net::IPAddress ip;
size_t number_of_colons = std::count(host_and_port.begin(), host_and_port.end(), ':');
if (number_of_colons > 1)
{
/// IPv6 host
if (host_and_port.starts_with('['))
{
size_t close_bracket_pos = host_and_port.find(']');
assert(close_bracket_pos != std::string::npos);
ip = resolveHost(host_and_port.substr(0, close_bracket_pos));
if (close_bracket_pos == host_and_port.size() - 1)
return {ip, std::nullopt};
if (host_and_port[close_bracket_pos + 1] != ':')
throw Exception("Missing delimiter between host and port", ErrorCodes::BAD_ARGUMENTS);
unsigned int port;
if (!Poco::NumberParser::tryParseUnsigned(host_and_port.substr(close_bracket_pos + 2), port))
throw Exception("Port must be numeric", ErrorCodes::BAD_ARGUMENTS);
if (port > 0xFFFF)
throw Exception("Port must be less 0xFFFF", ErrorCodes::BAD_ARGUMENTS);
return {ip, port};
}
return {resolveHost(host_and_port), std::nullopt};
}
else if (number_of_colons == 1)
{
/// IPv4 host with port
Poco::Net::SocketAddress socket = resolveAddress(host_and_port);
return {socket.host(), socket.port()};
}
/// IPv4 host
return {resolveHost(host_and_port), std::nullopt};
}
String DNSResolver::reverseResolve(const Poco::Net::IPAddress & address)
{
if (impl->disable_cache)

View File

@ -34,6 +34,10 @@ public:
Poco::Net::SocketAddress resolveAddress(const std::string & host, UInt16 port);
/// Accepts host names like 'example.com'/'example.com:port' or '127.0.0.1'/'127.0.0.1:port' or '::1'/'[::1]:port'
/// and resolves its IP and port, if port is set
std::pair<Poco::Net::IPAddress, std::optional<UInt16>> resolveHostOrAddress(const std::string & host_and_port);
/// Accepts host IP and resolves its host name
String reverseResolve(const Poco::Net::IPAddress & address);

View File

@ -610,6 +610,8 @@
M(639, SNAPPY_COMPRESS_FAILED) \
M(640, NO_HIVEMETASTORE) \
M(641, CANNOT_APPEND_TO_FILE) \
M(642, CANNOT_PACK_ARCHIVE) \
M(643, CANNOT_UNPACK_ARCHIVE) \
\
M(999, KEEPER_EXCEPTION) \
M(1000, POCO_EXCEPTION) \

View File

@ -1,6 +1,7 @@
#include "MemoryTracker.h"
#include <IO/WriteHelpers.h>
#include <Common/VariableContext.h>
#include <Interpreters/TraceCollector.h>
#include <Common/Exception.h>
#include <Common/LockMemoryExceptionInThread.h>
@ -8,6 +9,7 @@
#include <Common/formatReadable.h>
#include <Common/ProfileEvents.h>
#include <Common/thread_local_rng.h>
#include <Common/OvercommitTracker.h>
#include <base/logger_useful.h>
#include <atomic>
@ -95,7 +97,7 @@ void MemoryTracker::logMemoryUsage(Int64 current) const
}
void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded)
void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker)
{
if (size < 0)
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Negative size ({}) is passed to MemoryTracker. It is a bug.", size);
@ -104,7 +106,8 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded)
{
/// Since the MemoryTrackerBlockerInThread should respect the level, we should go to the next parent.
if (auto * loaded_next = parent.load(std::memory_order_relaxed))
loaded_next->allocImpl(size, throw_if_memory_exceeded);
loaded_next->allocImpl(size, throw_if_memory_exceeded,
level == VariableContext::Process ? this : query_tracker);
return;
}
@ -186,18 +189,30 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded)
if (unlikely(current_hard_limit && will_be > current_hard_limit) && memoryTrackerCanThrow(level, false) && throw_if_memory_exceeded)
{
/// Prevent recursion. Exception::ctor -> std::string -> new[] -> MemoryTracker::alloc
MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global);
ProfileEvents::increment(ProfileEvents::QueryMemoryLimitExceeded);
const auto * description = description_ptr.load(std::memory_order_relaxed);
throw DB::Exception(
DB::ErrorCodes::MEMORY_LIMIT_EXCEEDED,
"Memory limit{}{} exceeded: would use {} (attempt to allocate chunk of {} bytes), maximum: {}",
description ? " " : "",
description ? description : "",
formatReadableSizeWithBinarySuffix(will_be),
size,
formatReadableSizeWithBinarySuffix(current_hard_limit));
bool need_to_throw = true;
bool try_to_free_memory = overcommit_tracker != nullptr && query_tracker != nullptr;
if (try_to_free_memory)
need_to_throw = overcommit_tracker->needToStopQuery(query_tracker);
if (need_to_throw)
{
/// Prevent recursion. Exception::ctor -> std::string -> new[] -> MemoryTracker::alloc
MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global);
ProfileEvents::increment(ProfileEvents::QueryMemoryLimitExceeded);
const auto * description = description_ptr.load(std::memory_order_relaxed);
throw DB::Exception(
DB::ErrorCodes::MEMORY_LIMIT_EXCEEDED,
"Memory limit{}{} exceeded: would use {} (attempt to allocate chunk of {} bytes), maximum: {}",
description ? " " : "",
description ? description : "",
formatReadableSizeWithBinarySuffix(will_be),
size,
formatReadableSizeWithBinarySuffix(current_hard_limit));
}
else
{
will_be = amount.load(std::memory_order_relaxed);
}
}
bool peak_updated;
@ -221,7 +236,8 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded)
}
if (auto * loaded_next = parent.load(std::memory_order_relaxed))
loaded_next->allocImpl(size, throw_if_memory_exceeded);
loaded_next->allocImpl(size, throw_if_memory_exceeded,
level == VariableContext::Process ? this : query_tracker);
}
void MemoryTracker::alloc(Int64 size)
@ -302,10 +318,23 @@ void MemoryTracker::free(Int64 size)
}
OvercommitRatio MemoryTracker::getOvercommitRatio()
{
return { amount.load(std::memory_order_relaxed), soft_limit.load(std::memory_order_relaxed) };
}
OvercommitRatio MemoryTracker::getOvercommitRatio(Int64 limit)
{
return { amount.load(std::memory_order_relaxed), limit };
}
void MemoryTracker::resetCounters()
{
amount.store(0, std::memory_order_relaxed);
peak.store(0, std::memory_order_relaxed);
soft_limit.store(0, std::memory_order_relaxed);
hard_limit.store(0, std::memory_order_relaxed);
profiler_limit.store(0, std::memory_order_relaxed);
}
@ -330,6 +359,12 @@ void MemoryTracker::set(Int64 to)
}
void MemoryTracker::setSoftLimit(Int64 value)
{
soft_limit.store(value, std::memory_order_relaxed);
}
void MemoryTracker::setHardLimit(Int64 value)
{
hard_limit.store(value, std::memory_order_relaxed);

View File

@ -28,6 +28,9 @@ extern thread_local bool memory_tracker_always_throw_logical_error_on_allocation
#define ALLOW_ALLOCATIONS_IN_SCOPE static_assert(true)
#endif
struct OvercommitRatio;
struct OvercommitTracker;
/** Tracks memory consumption.
* It throws an exception if amount of consumed memory become greater than certain limit.
* The same memory tracker could be simultaneously used in different threads.
@ -40,6 +43,7 @@ class MemoryTracker
private:
std::atomic<Int64> amount {0};
std::atomic<Int64> peak {0};
std::atomic<Int64> soft_limit {0};
std::atomic<Int64> hard_limit {0};
std::atomic<Int64> profiler_limit {0};
@ -61,6 +65,8 @@ private:
/// This description will be used as prefix into log messages (if isn't nullptr)
std::atomic<const char *> description_ptr = nullptr;
OvercommitTracker * overcommit_tracker = nullptr;
bool updatePeak(Int64 will_be, bool log_memory_usage);
void logMemoryUsage(Int64 current) const;
@ -83,7 +89,7 @@ public:
void allocNoThrow(Int64 size);
void allocImpl(Int64 size, bool throw_if_memory_exceeded);
void allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker = nullptr);
void realloc(Int64 old_size, Int64 new_size)
{
@ -108,8 +114,14 @@ public:
return peak.load(std::memory_order_relaxed);
}
void setSoftLimit(Int64 value);
void setHardLimit(Int64 value);
Int64 getSoftLimit() const
{
return soft_limit.load(std::memory_order_relaxed);
}
/** Set limit if it was not set.
* Otherwise, set limit to new value, if new value is greater than previous limit.
*/
@ -159,6 +171,14 @@ public:
description_ptr.store(description, std::memory_order_relaxed);
}
OvercommitRatio getOvercommitRatio();
OvercommitRatio getOvercommitRatio(Int64 limit);
void setOvercommitTracker(OvercommitTracker * tracker) noexcept
{
overcommit_tracker = tracker;
}
/// Reset the accumulated data
void resetCounters();

View File

@ -0,0 +1,119 @@
#include "OvercommitTracker.h"
#include <chrono>
#include <mutex>
#include <Interpreters/ProcessList.h>
using namespace std::chrono_literals;
OvercommitTracker::OvercommitTracker()
: max_wait_time(0us)
, picked_tracker(nullptr)
, cancelation_state(QueryCancelationState::NONE)
{}
void OvercommitTracker::setMaxWaitTime(UInt64 wait_time)
{
std::lock_guard guard(overcommit_m);
max_wait_time = wait_time * 1us;
}
bool OvercommitTracker::needToStopQuery(MemoryTracker * tracker)
{
std::unique_lock<std::mutex> lk(overcommit_m);
pickQueryToExclude();
assert(cancelation_state == QueryCancelationState::RUNNING);
// If no query was chosen we need to stop current query.
// This may happen if no soft limit is set.
if (picked_tracker == nullptr)
{
cancelation_state = QueryCancelationState::NONE;
return true;
}
if (picked_tracker == tracker)
return true;
return !cv.wait_for(lk, max_wait_time, [this]()
{
return cancelation_state == QueryCancelationState::NONE;
});
}
void OvercommitTracker::unsubscribe(MemoryTracker * tracker)
{
std::unique_lock<std::mutex> lk(overcommit_m);
if (picked_tracker == tracker)
{
LOG_DEBUG(getLogger(), "Picked query stopped");
picked_tracker = nullptr;
cancelation_state = QueryCancelationState::NONE;
cv.notify_all();
}
}
UserOvercommitTracker::UserOvercommitTracker(DB::ProcessListForUser * user_process_list_)
: user_process_list(user_process_list_)
{}
void UserOvercommitTracker::pickQueryToExcludeImpl()
{
MemoryTracker * query_tracker = nullptr;
OvercommitRatio current_ratio{0, 0};
// At this moment query list must be read only.
// BlockQueryIfMemoryLimit is used in ProcessList to guarantee this.
auto & queries = user_process_list->queries;
LOG_DEBUG(logger, "Trying to choose query to stop from {} queries", queries.size());
for (auto const & query : queries)
{
if (query.second->isKilled())
continue;
auto * memory_tracker = query.second->getMemoryTracker();
if (!memory_tracker)
continue;
auto ratio = memory_tracker->getOvercommitRatio();
LOG_DEBUG(logger, "Query has ratio {}/{}", ratio.committed, ratio.soft_limit);
if (ratio.soft_limit != 0 && current_ratio < ratio)
{
query_tracker = memory_tracker;
current_ratio = ratio;
}
}
LOG_DEBUG(logger, "Selected to stop query with overcommit ratio {}/{}",
current_ratio.committed, current_ratio.soft_limit);
picked_tracker = query_tracker;
}
void GlobalOvercommitTracker::pickQueryToExcludeImpl()
{
MemoryTracker * query_tracker = nullptr;
OvercommitRatio current_ratio{0, 0};
process_list->processEachQueryStatus([&](DB::QueryStatus const & query)
{
if (query.isKilled())
return;
Int64 user_soft_limit = 0;
if (auto const * user_process_list = query.getUserProcessList())
user_soft_limit = user_process_list->user_memory_tracker.getSoftLimit();
if (user_soft_limit == 0)
return;
auto * memory_tracker = query.getMemoryTracker();
if (!memory_tracker)
return;
auto ratio = memory_tracker->getOvercommitRatio(user_soft_limit);
LOG_DEBUG(logger, "Query has ratio {}/{}", ratio.committed, ratio.soft_limit);
if (current_ratio < ratio)
{
query_tracker = memory_tracker;
current_ratio = ratio;
}
});
LOG_DEBUG(logger, "Selected to stop query with overcommit ratio {}/{}",
current_ratio.committed, current_ratio.soft_limit);
picked_tracker = query_tracker;
}

View File

@ -0,0 +1,155 @@
#pragma once
#include <base/logger_useful.h>
#include <base/types.h>
#include <boost/core/noncopyable.hpp>
#include <Poco/Logger.h>
#include <cassert>
#include <chrono>
#include <condition_variable>
#include <mutex>
#include <unordered_map>
// This struct is used for the comparison of query memory usage.
struct OvercommitRatio
{
OvercommitRatio(Int64 committed_, Int64 soft_limit_)
: committed(committed_)
, soft_limit(soft_limit_)
{}
friend bool operator<(OvercommitRatio const & lhs, OvercommitRatio const & rhs) noexcept
{
// (a / b < c / d) <=> (a * d < c * b)
return (lhs.committed * rhs.soft_limit) < (rhs.committed * lhs.soft_limit)
|| (lhs.soft_limit == 0 && rhs.soft_limit > 0)
|| (lhs.committed == 0 && rhs.committed == 0 && lhs.soft_limit > rhs.soft_limit);
}
// actual query memory usage
Int64 committed;
// guaranteed amount of memory query can use
Int64 soft_limit;
};
class MemoryTracker;
// Usually it's hard to set some reasonable hard memory limit
// (especially, the default value). This class introduces new
// mechanisim for the limiting of memory usage.
// Soft limit represents guaranteed amount of memory query/user
// may use. It's allowed to exceed this limit. But if hard limit
// is reached, query with the biggest overcommit ratio
// is killed to free memory.
struct OvercommitTracker : boost::noncopyable
{
OvercommitTracker();
void setMaxWaitTime(UInt64 wait_time);
bool needToStopQuery(MemoryTracker * tracker);
void unsubscribe(MemoryTracker * tracker);
virtual ~OvercommitTracker() = default;
protected:
virtual void pickQueryToExcludeImpl() = 0;
mutable std::mutex overcommit_m;
mutable std::condition_variable cv;
std::chrono::microseconds max_wait_time;
enum class QueryCancelationState
{
NONE,
RUNNING,
};
// Specifies memory tracker of the chosen to stop query.
// If soft limit is not set, all the queries which reach hard limit must stop.
// This case is represented as picked tracker pointer is set to nullptr and
// overcommit tracker is in RUNNING state.
MemoryTracker * picked_tracker;
QueryCancelationState cancelation_state;
virtual Poco::Logger * getLogger() = 0;
private:
void pickQueryToExclude()
{
if (cancelation_state != QueryCancelationState::RUNNING)
{
pickQueryToExcludeImpl();
cancelation_state = QueryCancelationState::RUNNING;
}
}
friend struct BlockQueryIfMemoryLimit;
};
namespace DB
{
class ProcessList;
struct ProcessListForUser;
}
struct UserOvercommitTracker : OvercommitTracker
{
explicit UserOvercommitTracker(DB::ProcessListForUser * user_process_list_);
~UserOvercommitTracker() override = default;
protected:
void pickQueryToExcludeImpl() override final;
Poco::Logger * getLogger() override final { return logger; }
private:
DB::ProcessListForUser * user_process_list;
Poco::Logger * logger = &Poco::Logger::get("UserOvercommitTracker");
};
struct GlobalOvercommitTracker : OvercommitTracker
{
explicit GlobalOvercommitTracker(DB::ProcessList * process_list_)
: process_list(process_list_)
{}
~GlobalOvercommitTracker() override = default;
protected:
void pickQueryToExcludeImpl() override final;
Poco::Logger * getLogger() override final { return logger; }
private:
DB::ProcessList * process_list;
Poco::Logger * logger = &Poco::Logger::get("GlobalOvercommitTracker");
};
// UserOvercommitTracker requires to check the whole list of user's queries
// to pick one to stop. BlockQueryIfMemoryLimit struct allows to wait until
// query selection is finished. It's used in ProcessList to make user query
// list immutable when UserOvercommitTracker reads it.
struct BlockQueryIfMemoryLimit
{
BlockQueryIfMemoryLimit(OvercommitTracker const & overcommit_tracker)
: mutex(overcommit_tracker.overcommit_m)
, lk(mutex)
{
if (overcommit_tracker.cancelation_state == OvercommitTracker::QueryCancelationState::RUNNING)
{
overcommit_tracker.cv.wait_for(lk, overcommit_tracker.max_wait_time, [&overcommit_tracker]()
{
return overcommit_tracker.cancelation_state == OvercommitTracker::QueryCancelationState::NONE;
});
}
}
~BlockQueryIfMemoryLimit() = default;
private:
std::mutex & mutex;
std::unique_lock<std::mutex> lk;
};

View File

@ -281,6 +281,10 @@
M(ExternalDataSourceLocalCacheReadBytes, "Bytes read from local cache buffer in RemoteReadBufferCache")\
\
M(MainConfigLoads, "Number of times the main configuration was reloaded.") \
\
M(ScalarSubqueriesGlobalCacheHit, "Number of times a read from a scalar subquery was done using the global cache") \
M(ScalarSubqueriesLocalCacheHit, "Number of times a read from a scalar subquery was done using the local cache") \
M(ScalarSubqueriesCacheMiss, "Number of times a read from a scalar subquery was not cached and had to be calculated completely")
namespace ProfileEvents
{

View File

@ -17,6 +17,7 @@
#cmakedefine01 USE_YAML_CPP
#cmakedefine01 CLICKHOUSE_SPLIT_BINARY
#cmakedefine01 USE_BZIP2
#cmakedefine01 USE_MINIZIP
#cmakedefine01 USE_SNAPPY
#cmakedefine01 USE_HIVE
#cmakedefine01 USE_ODBC

View File

@ -105,7 +105,7 @@ void CachedCompressedReadBuffer::seek(size_t offset_in_compressed_file, size_t o
/// We will discard our working_buffer, but have to account rest bytes
bytes += offset();
/// No data, everything discarded
pos = working_buffer.end();
resetWorkingBuffer();
owned_cell.reset();
/// Remember required offset in decompressed block which will be set in

View File

@ -80,7 +80,7 @@ void CompressedReadBufferFromFile::seek(size_t offset_in_compressed_file, size_t
/// We will discard our working_buffer, but have to account rest bytes
bytes += offset();
/// No data, everything discarded
pos = working_buffer.end();
resetWorkingBuffer();
size_compressed = 0;
/// Remember required offset in decompressed block which will be set in
/// the next ReadBuffer::next() call
@ -113,7 +113,6 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n)
/// need to skip some bytes in decompressed data (seek happened before readBig call).
if (nextimpl_working_buffer_offset == 0 && size_decompressed + additional_size_at_the_end_of_buffer <= n - bytes_read)
{
decompressTo(to + bytes_read, size_decompressed, size_compressed_without_checksum);
bytes_read += size_decompressed;
bytes += size_decompressed;

View File

@ -286,10 +286,7 @@ RaftAppendResult KeeperServer::putRequestBatch(const KeeperStorage::RequestsForS
for (const auto & [session_id, request] : requests_for_sessions)
entries.push_back(getZooKeeperLogEntry(session_id, request));
{
std::lock_guard lock(append_entries_mutex);
return raft_instance->append_entries(entries);
}
return raft_instance->append_entries(entries);
}
bool KeeperServer::isLeader() const

View File

@ -28,8 +28,6 @@ private:
nuraft::ptr<nuraft::asio_service> asio_service;
nuraft::ptr<nuraft::rpc_listener> asio_listener;
std::mutex append_entries_mutex;
std::mutex initialized_mutex;
std::atomic<bool> initialized_flag = false;
std::condition_variable initialized_cv;

Some files were not shown because too many files have changed in this diff Show More