Merge branch 'master' into ldap-role-mapping

* master: (159 commits)
  Review fix.
  Update version_date.tsv after release 20.8.7.15
  wrong translation
  Update version_date.tsv after release 20.9.6.14
  Update version_date.tsv after release 20.10.5.10
  Update version_date.tsv after release 20.11.4.13
  Improvements in coverage images
  Fixed a problem with the translation of the document
  final_parallel
  final_parallel
  DOCSUP-4162: Document the system.replicated_fetches system table (#16900)
  Update settings.md
  Update settings.md
  Less verbose logging when fetch is impossible
  Don't add tons of client coverage files in stateful tests with coverage
  More compatible watches in TestKeeper
  Trying to make read_in_order_many_parts more stable
  trigger CI
  Update version_date.tsv after release 20.6.10.2
  Update visibleWidth.cpp
  ...
This commit is contained in:
Denis Glazachev 2020-11-22 00:58:55 +04:00
commit 1a587b0c21
469 changed files with 5569 additions and 2541 deletions

2
.gitmodules vendored
View File

@ -196,7 +196,7 @@
[submodule "contrib/rocksdb"] [submodule "contrib/rocksdb"]
path = contrib/rocksdb path = contrib/rocksdb
url = https://github.com/facebook/rocksdb url = https://github.com/facebook/rocksdb
branch = v6.11.4 branch = v6.14.5
[submodule "contrib/xz"] [submodule "contrib/xz"]
path = contrib/xz path = contrib/xz
url = https://github.com/xz-mirror/xz url = https://github.com/xz-mirror/xz

View File

@ -5,6 +5,9 @@
/// (See at http://www.boost.org/LICENSE_1_0.txt) /// (See at http://www.boost.org/LICENSE_1_0.txt)
#include "throwError.h" #include "throwError.h"
#include <cfloat>
#include <limits>
#include <cassert>
namespace wide namespace wide
{ {
@ -192,7 +195,7 @@ struct integer<Bits, Signed>::_impl
} }
template <typename T> template <typename T>
constexpr static auto to_Integral(T f) noexcept __attribute__((no_sanitize("undefined"))) constexpr static auto to_Integral(T f) noexcept
{ {
if constexpr (std::is_same_v<T, __int128>) if constexpr (std::is_same_v<T, __int128>)
return f; return f;
@ -225,25 +228,54 @@ struct integer<Bits, Signed>::_impl
self.items[i] = 0; self.items[i] = 0;
} }
constexpr static void wide_integer_from_bultin(integer<Bits, Signed> & self, double rhs) noexcept /**
{ * N.B. t is constructed from double, so max(t) = max(double) ~ 2^310
if ((rhs > 0 && rhs < std::numeric_limits<uint64_t>::max()) || (rhs < 0 && rhs > std::numeric_limits<int64_t>::min())) * the recursive call happens when t / 2^64 > 2^64, so there won't be more than 5 of them.
*
* t = a1 * max_int + b1, a1 > max_int, b1 < max_int
* a1 = a2 * max_int + b2, a2 > max_int, b2 < max_int
* a_(n - 1) = a_n * max_int + b2, a_n <= max_int <- base case.
*/
template <class T>
constexpr static void set_multiplier(integer<Bits, Signed> & self, T t) noexcept {
constexpr uint64_t max_int = std::numeric_limits<uint64_t>::max();
const T alpha = t / max_int;
if (alpha <= max_int)
self = static_cast<uint64_t>(alpha);
else // max(double) / 2^64 will surely contain less than 52 precision bits, so speed up computations.
set_multiplier<double>(self, alpha);
self *= max_int;
self += static_cast<uint64_t>(t - alpha * max_int); // += b_i
}
constexpr static void wide_integer_from_bultin(integer<Bits, Signed>& self, double rhs) noexcept {
constexpr int64_t max_int = std::numeric_limits<int64_t>::max();
constexpr int64_t min_int = std::numeric_limits<int64_t>::min();
/// There are values in int64 that have more than 53 significant bits (in terms of double
/// representation). Such values, being promoted to double, are rounded up or down. If they are rounded up,
/// the result may not fit in 64 bits.
/// The example of such a number is 9.22337e+18.
/// As to_Integral does a static_cast to int64_t, it may result in UB.
/// The necessary check here is that long double has enough significant (mantissa) bits to store the
/// int64_t max value precisely.
static_assert(LDBL_MANT_DIG >= 64,
"On your system long double has less than 64 precision bits,"
"which may result in UB when initializing double from int64_t");
if ((rhs > 0 && rhs < max_int) || (rhs < 0 && rhs > min_int))
{ {
self = to_Integral(rhs); self = static_cast<int64_t>(rhs);
return; return;
} }
long double r = rhs; const long double rhs_long_double = (static_cast<long double>(rhs) < 0)
if (r < 0) ? -static_cast<long double>(rhs)
r = -r; : rhs;
size_t count = r / std::numeric_limits<uint64_t>::max(); set_multiplier(self, rhs_long_double);
self = count;
self *= std::numeric_limits<uint64_t>::max();
long double to_diff = count;
to_diff *= std::numeric_limits<uint64_t>::max();
self += to_Integral(r - to_diff);
if (rhs < 0) if (rhs < 0)
self = -self; self = -self;

View File

@ -1,4 +1,6 @@
# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it.
OWNER(g:clickhouse)
LIBRARY() LIBRARY()
ADDINCL( ADDINCL(

View File

@ -1,3 +1,5 @@
OWNER(g:clickhouse)
LIBRARY() LIBRARY()
ADDINCL( ADDINCL(

View File

@ -1,3 +1,5 @@
OWNER(g:clickhouse)
LIBRARY() LIBRARY()
NO_COMPILER_WARNINGS() NO_COMPILER_WARNINGS()

View File

@ -1,3 +1,5 @@
OWNER(g:clickhouse)
LIBRARY() LIBRARY()
PEERDIR( PEERDIR(

View File

@ -1,3 +1,5 @@
OWNER(g:clickhouse)
LIBRARY() LIBRARY()
ADDINCL (GLOBAL clickhouse/base/pcg-random) ADDINCL (GLOBAL clickhouse/base/pcg-random)

View File

@ -1,3 +1,5 @@
OWNER(g:clickhouse)
LIBRARY() LIBRARY()
CFLAGS(-g0) CFLAGS(-g0)

View File

@ -1,3 +1,5 @@
OWNER(g:clickhouse)
LIBRARY() LIBRARY()
ADDINCL(GLOBAL clickhouse/base/widechar_width) ADDINCL(GLOBAL clickhouse/base/widechar_width)

View File

@ -1,3 +1,5 @@
OWNER(g:clickhouse)
RECURSE( RECURSE(
common common
daemon daemon

2
contrib/libunwind vendored

@ -1 +1 @@
Subproject commit 198458b35f100da32bd3e74c2a3ce8d236db299b Subproject commit 7d78d3618910752c256b2b58c3895f4efea47fac

2
contrib/rocksdb vendored

@ -1 +1 @@
Subproject commit 963314ffd681596ef2738a95249fe4c1163ef87a Subproject commit 35d8e36ef1b8e3e0759ca81215f855226a0a54bd

View File

@ -347,8 +347,9 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_builder.cc ${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_builder.cc
${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_garbage.cc ${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_garbage.cc
${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_meta.cc ${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_meta.cc
${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_reader.cc
${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_format.cc ${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_format.cc
${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_reader.cc ${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_sequential_reader.cc
${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_writer.cc ${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_writer.cc
${ROCKSDB_SOURCE_DIR}/db/builder.cc ${ROCKSDB_SOURCE_DIR}/db/builder.cc
${ROCKSDB_SOURCE_DIR}/db/c.cc ${ROCKSDB_SOURCE_DIR}/db/c.cc
@ -394,6 +395,8 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/db/memtable_list.cc ${ROCKSDB_SOURCE_DIR}/db/memtable_list.cc
${ROCKSDB_SOURCE_DIR}/db/merge_helper.cc ${ROCKSDB_SOURCE_DIR}/db/merge_helper.cc
${ROCKSDB_SOURCE_DIR}/db/merge_operator.cc ${ROCKSDB_SOURCE_DIR}/db/merge_operator.cc
${ROCKSDB_SOURCE_DIR}/db/output_validator.cc
${ROCKSDB_SOURCE_DIR}/db/periodic_work_scheduler.cc
${ROCKSDB_SOURCE_DIR}/db/range_del_aggregator.cc ${ROCKSDB_SOURCE_DIR}/db/range_del_aggregator.cc
${ROCKSDB_SOURCE_DIR}/db/range_tombstone_fragmenter.cc ${ROCKSDB_SOURCE_DIR}/db/range_tombstone_fragmenter.cc
${ROCKSDB_SOURCE_DIR}/db/repair.cc ${ROCKSDB_SOURCE_DIR}/db/repair.cc
@ -451,12 +454,12 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/monitoring/perf_level.cc ${ROCKSDB_SOURCE_DIR}/monitoring/perf_level.cc
${ROCKSDB_SOURCE_DIR}/monitoring/persistent_stats_history.cc ${ROCKSDB_SOURCE_DIR}/monitoring/persistent_stats_history.cc
${ROCKSDB_SOURCE_DIR}/monitoring/statistics.cc ${ROCKSDB_SOURCE_DIR}/monitoring/statistics.cc
${ROCKSDB_SOURCE_DIR}/monitoring/stats_dump_scheduler.cc
${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_impl.cc ${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_impl.cc
${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_updater.cc ${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_updater.cc
${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_util.cc ${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_util.cc
${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_util_debug.cc ${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_util_debug.cc
${ROCKSDB_SOURCE_DIR}/options/cf_options.cc ${ROCKSDB_SOURCE_DIR}/options/cf_options.cc
${ROCKSDB_SOURCE_DIR}/options/configurable.cc
${ROCKSDB_SOURCE_DIR}/options/db_options.cc ${ROCKSDB_SOURCE_DIR}/options/db_options.cc
${ROCKSDB_SOURCE_DIR}/options/options.cc ${ROCKSDB_SOURCE_DIR}/options/options.cc
${ROCKSDB_SOURCE_DIR}/options/options_helper.cc ${ROCKSDB_SOURCE_DIR}/options/options_helper.cc
@ -507,6 +510,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/table/sst_file_dumper.cc ${ROCKSDB_SOURCE_DIR}/table/sst_file_dumper.cc
${ROCKSDB_SOURCE_DIR}/table/sst_file_reader.cc ${ROCKSDB_SOURCE_DIR}/table/sst_file_reader.cc
${ROCKSDB_SOURCE_DIR}/table/sst_file_writer.cc ${ROCKSDB_SOURCE_DIR}/table/sst_file_writer.cc
${ROCKSDB_SOURCE_DIR}/table/table_factory.cc
${ROCKSDB_SOURCE_DIR}/table/table_properties.cc ${ROCKSDB_SOURCE_DIR}/table/table_properties.cc
${ROCKSDB_SOURCE_DIR}/table/two_level_iterator.cc ${ROCKSDB_SOURCE_DIR}/table/two_level_iterator.cc
${ROCKSDB_SOURCE_DIR}/test_util/sync_point.cc ${ROCKSDB_SOURCE_DIR}/test_util/sync_point.cc
@ -515,6 +519,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/test_util/transaction_test_util.cc ${ROCKSDB_SOURCE_DIR}/test_util/transaction_test_util.cc
${ROCKSDB_SOURCE_DIR}/tools/block_cache_analyzer/block_cache_trace_analyzer.cc ${ROCKSDB_SOURCE_DIR}/tools/block_cache_analyzer/block_cache_trace_analyzer.cc
${ROCKSDB_SOURCE_DIR}/tools/dump/db_dump_tool.cc ${ROCKSDB_SOURCE_DIR}/tools/dump/db_dump_tool.cc
${ROCKSDB_SOURCE_DIR}/tools/io_tracer_parser_tool.cc
${ROCKSDB_SOURCE_DIR}/tools/ldb_cmd.cc ${ROCKSDB_SOURCE_DIR}/tools/ldb_cmd.cc
${ROCKSDB_SOURCE_DIR}/tools/ldb_tool.cc ${ROCKSDB_SOURCE_DIR}/tools/ldb_tool.cc
${ROCKSDB_SOURCE_DIR}/tools/sst_dump_tool.cc ${ROCKSDB_SOURCE_DIR}/tools/sst_dump_tool.cc

View File

@ -7,8 +7,10 @@ ENV SOURCE_DIR=/build
ENV OUTPUT_DIR=/output ENV OUTPUT_DIR=/output
ENV IGNORE='.*contrib.*' ENV IGNORE='.*contrib.*'
CMD mkdir -p /build/obj-x86_64-linux-gnu && cd /build/obj-x86_64-linux-gnu && CC=clang-10 CXX=clang++-10 cmake .. && cd /; \ RUN apt-get update && apt-get install cmake --yes --no-install-recommends
CMD mkdir -p /build/obj-x86_64-linux-gnu && cd /build/obj-x86_64-linux-gnu && CC=clang-11 CXX=clang++-11 cmake .. && cd /; \
dpkg -i /package_folder/clickhouse-common-static_*.deb; \ dpkg -i /package_folder/clickhouse-common-static_*.deb; \
llvm-profdata-10 merge -sparse ${COVERAGE_DIR}/* -o clickhouse.profdata && \ llvm-profdata-11 merge -sparse ${COVERAGE_DIR}/* -o clickhouse.profdata && \
llvm-cov-10 export /usr/bin/clickhouse -instr-profile=clickhouse.profdata -j=16 -format=lcov -skip-functions -ignore-filename-regex $IGNORE > output.lcov && \ llvm-cov-11 export /usr/bin/clickhouse -instr-profile=clickhouse.profdata -j=16 -format=lcov -skip-functions -ignore-filename-regex $IGNORE > output.lcov && \
genhtml output.lcov --ignore-errors source --output-directory ${OUTPUT_DIR} genhtml output.lcov --ignore-errors source --output-directory ${OUTPUT_DIR}

View File

@ -287,6 +287,8 @@ TESTS_TO_SKIP=(
01322_ttest_scipy 01322_ttest_scipy
01545_system_errors 01545_system_errors
# Checks system.errors
01563_distributed_query_finish
) )
time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt" time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"

View File

@ -1,12 +1,12 @@
# docker build -t yandex/clickhouse-stateful-test-with-coverage . # docker build -t yandex/clickhouse-stateful-test-with-coverage .
FROM yandex/clickhouse-stateless-test FROM yandex/clickhouse-stateless-test-with-coverage
RUN echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-9 main" >> /etc/apt/sources.list RUN echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-9 main" >> /etc/apt/sources.list
RUN apt-get update -y \ RUN apt-get update -y \
&& env DEBIAN_FRONTEND=noninteractive \ && env DEBIAN_FRONTEND=noninteractive \
apt-get install --yes --no-install-recommends \ apt-get install --yes --no-install-recommends \
python3-requests python3-requests procps psmisc
COPY s3downloader /s3downloader COPY s3downloader /s3downloader
COPY run.sh /run.sh COPY run.sh /run.sh

View File

@ -1,40 +1,44 @@
#!/bin/bash #!/bin/bash
kill_clickhouse () { kill_clickhouse () {
kill "$(pgrep -u clickhouse)" 2>/dev/null echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S'
pkill -f "clickhouse-server" 2>/dev/null
for _ in {1..10}
for _ in {1..120}
do do
if ! kill -0 "$(pgrep -u clickhouse)"; then if ! pkill -0 -f "clickhouse-server" ; then break ; fi
echo "No clickhouse process" echo "ClickHouse still alive" | ts '%Y-%m-%d %H:%M:%S'
break sleep 1
else
echo "Process $(pgrep -u clickhouse) still alive"
sleep 10
fi
done done
if pkill -0 -f "clickhouse-server"
then
pstree -apgT
jobs
echo "Failed to kill the ClickHouse server" | ts '%Y-%m-%d %H:%M:%S'
return 1
fi
} }
start_clickhouse () { start_clickhouse () {
LLVM_PROFILE_FILE='server_%h_%p_%m.profraw' sudo -Eu clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml & LLVM_PROFILE_FILE='server_%h_%p_%m.profraw' sudo -Eu clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml &
} counter=0
until clickhouse-client --query "SELECT 1"
wait_llvm_profdata () {
while kill -0 "$(pgrep llvm-profdata-10)"
do do
echo "Waiting for profdata $(pgrep llvm-profdata-10) still alive" if [ "$counter" -gt 120 ]
sleep 3 then
echo "Cannot start clickhouse-server"
cat /var/log/clickhouse-server/stdout.log
tail -n1000 /var/log/clickhouse-server/stderr.log
tail -n1000 /var/log/clickhouse-server/clickhouse-server.log
break
fi
sleep 0.5
counter=$((counter + 1))
done done
} }
merge_client_files_in_background () {
client_files=$(ls /client_*profraw 2>/dev/null)
if [ -n "$client_files" ]
then
llvm-profdata-10 merge -sparse "$client_files" -o "merged_client_$(date +%s).profraw"
rm "$client_files"
fi
}
chmod 777 / chmod 777 /
@ -51,26 +55,7 @@ chmod 777 -R /var/log/clickhouse-server/
# install test configs # install test configs
/usr/share/clickhouse-test/config/install.sh /usr/share/clickhouse-test/config/install.sh
function start() start_clickhouse
{
counter=0
until clickhouse-client --query "SELECT 1"
do
if [ "$counter" -gt 120 ]
then
echo "Cannot start clickhouse-server"
cat /var/log/clickhouse-server/stdout.log
tail -n1000 /var/log/clickhouse-server/stderr.log
tail -n1000 /var/log/clickhouse-server/clickhouse-server.log
break
fi
timeout 120 service clickhouse-server start
sleep 0.5
counter=$((counter + 1))
done
}
start
# shellcheck disable=SC2086 # No quotes because I want to split it into words. # shellcheck disable=SC2086 # No quotes because I want to split it into words.
if ! /s3downloader --dataset-names $DATASETS; then if ! /s3downloader --dataset-names $DATASETS; then
@ -81,25 +66,20 @@ fi
chmod 777 -R /var/lib/clickhouse chmod 777 -R /var/lib/clickhouse
while /bin/true; do
merge_client_files_in_background
sleep 2
done &
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW DATABASES" LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW DATABASES"
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "ATTACH DATABASE datasets ENGINE = Ordinary" LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "ATTACH DATABASE datasets ENGINE = Ordinary"
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "CREATE DATABASE test" LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "CREATE DATABASE test"
kill_clickhouse kill_clickhouse
start_clickhouse start_clickhouse
sleep 10 LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW TABLES FROM datasets"
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW TABLES FROM test"
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits"
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW TABLES FROM test"
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABLES FROM datasets"
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABLES FROM test"
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits"
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABLES FROM test"
if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then
SKIP_LIST_OPT="--use-skip-list" SKIP_LIST_OPT="--use-skip-list"
@ -109,15 +89,10 @@ fi
# more idiologically correct. # more idiologically correct.
read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}" read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}"
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
kill_clickhouse kill_clickhouse
wait_llvm_profdata
sleep 3 sleep 3
wait_llvm_profdata # 100% merged all parts
cp /*.profraw /profraw ||: cp /*.profraw /profraw ||:

View File

@ -29,7 +29,7 @@ def dowload_with_progress(url, path):
logging.info("Downloading from %s to temp path %s", url, path) logging.info("Downloading from %s to temp path %s", url, path)
for i in range(RETRIES_COUNT): for i in range(RETRIES_COUNT):
try: try:
with open(path, 'w') as f: with open(path, 'wb') as f:
response = requests.get(url, stream=True) response = requests.get(url, stream=True)
response.raise_for_status() response.raise_for_status()
total_length = response.headers.get('content-length') total_length = response.headers.get('content-length')

View File

@ -1,4 +1,4 @@
# docker build -t yandex/clickhouse-stateless-with-coverage-test . # docker build -t yandex/clickhouse-stateless-test-with-coverage .
# TODO: that can be based on yandex/clickhouse-stateless-test (llvm version and CMD differs) # TODO: that can be based on yandex/clickhouse-stateless-test (llvm version and CMD differs)
FROM yandex/clickhouse-test-base FROM yandex/clickhouse-test-base
@ -28,7 +28,9 @@ RUN apt-get update -y \
lsof \ lsof \
unixodbc \ unixodbc \
wget \ wget \
qemu-user-static qemu-user-static \
procps \
psmisc
RUN mkdir -p /tmp/clickhouse-odbc-tmp \ RUN mkdir -p /tmp/clickhouse-odbc-tmp \
&& wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \ && wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \

View File

@ -2,27 +2,41 @@
kill_clickhouse () { kill_clickhouse () {
echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S' echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S'
kill "$(pgrep -u clickhouse)" 2>/dev/null pkill -f "clickhouse-server" 2>/dev/null
for _ in {1..10}
for _ in {1..120}
do do
if ! kill -0 "$(pgrep -u clickhouse)"; then if ! pkill -0 -f "clickhouse-server" ; then break ; fi
echo "No clickhouse process" | ts '%Y-%m-%d %H:%M:%S' echo "ClickHouse still alive" | ts '%Y-%m-%d %H:%M:%S'
break sleep 1
else
echo "Process $(pgrep -u clickhouse) still alive" | ts '%Y-%m-%d %H:%M:%S'
sleep 10
fi
done done
echo "Will try to send second kill signal for sure" if pkill -0 -f "clickhouse-server"
kill "$(pgrep -u clickhouse)" 2>/dev/null then
sleep 5 pstree -apgT
echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S' jobs
echo "Failed to kill the ClickHouse server" | ts '%Y-%m-%d %H:%M:%S'
return 1
fi
} }
start_clickhouse () { start_clickhouse () {
LLVM_PROFILE_FILE='server_%h_%p_%m.profraw' sudo -Eu clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml & LLVM_PROFILE_FILE='server_%h_%p_%m.profraw' sudo -Eu clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml &
counter=0
until clickhouse-client --query "SELECT 1"
do
if [ "$counter" -gt 120 ]
then
echo "Cannot start clickhouse-server"
cat /var/log/clickhouse-server/stdout.log
tail -n1000 /var/log/clickhouse-server/stderr.log
tail -n1000 /var/log/clickhouse-server/clickhouse-server.log
break
fi
sleep 0.5
counter=$((counter + 1))
done
} }
chmod 777 / chmod 777 /
@ -44,9 +58,6 @@ chmod 777 -R /var/log/clickhouse-server/
start_clickhouse start_clickhouse
sleep 10
if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then
SKIP_LIST_OPT="--use-skip-list" SKIP_LIST_OPT="--use-skip-list"
fi fi

View File

@ -35,7 +35,7 @@ RUN apt-get update \
ENV TZ=Europe/Moscow ENV TZ=Europe/Moscow
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
RUN pip3 install urllib3 testflows==1.6.62 docker-compose docker dicttoxml kazoo tzlocal RUN pip3 install urllib3 testflows==1.6.65 docker-compose docker dicttoxml kazoo tzlocal
ENV DOCKER_CHANNEL stable ENV DOCKER_CHANNEL stable
ENV DOCKER_VERSION 17.09.1-ce ENV DOCKER_VERSION 17.09.1-ce

View File

@ -2317,4 +2317,10 @@ Possible values:
Default value: `1`. Default value: `1`.
## output_format_tsv_null_representation {#output_format_tsv_null_representation}
Allows configurable `NULL` representation for [TSV](../../interfaces/formats.md#tabseparated) output format. The setting only controls output format and `\N` is the only supported `NULL` representation for TSV input format.
Default value: `\N`.
[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide --> [Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->

View File

@ -0,0 +1,70 @@
# system.replicated_fetches {#system_tables-replicated_fetches}
Contains information about currently running background fetches.
Columns:
- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database.
- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table.
- `elapsed` ([Float64](../../sql-reference/data-types/float.md)) — The time elapsed (in seconds) since showing currently running background fetches started.
- `progress` ([Float64](../../sql-reference/data-types/float.md)) — The percentage of completed work from 0 to 1.
- `result_part_name` ([String](../../sql-reference/data-types/string.md)) — The name of the part that will be formed as the result of showing currently running background fetches.
- `result_part_path` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the part that will be formed as the result of showing currently running background fetches.
- `partition_id` ([String](../../sql-reference/data-types/string.md)) — ID of the partition.
- `total_size_bytes_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The total size (in bytes) of the compressed data in the result part.
- `bytes_read_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of compressed bytes read from the result part.
- `source_replica_path` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the source replica.
- `source_replica_hostname` ([String](../../sql-reference/data-types/string.md)) — Hostname of the source replica.
- `source_replica_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — Port number of the source replica.
- `interserver_scheme` ([String](../../sql-reference/data-types/string.md)) — Name of the interserver scheme.
- `URI` ([String](../../sql-reference/data-types/string.md)) — Uniform resource identifier.
- `to_detached` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The flag indicates whether the currently running background fetch is being performed using the `TO DETACHED` expression.
- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Thread identifier.
**Example**
``` sql
SELECT * FROM system.replicated_fetches LIMIT 1 FORMAT Vertical;
```
``` text
Row 1:
──────
database: default
table: t
elapsed: 7.243039876
progress: 0.41832135995612835
result_part_name: all_0_0_0
result_part_path: /var/lib/clickhouse/store/700/70080a04-b2de-4adf-9fa5-9ea210e81766/all_0_0_0/
partition_id: all
total_size_bytes_compressed: 1052783726
bytes_read_compressed: 440401920
source_replica_path: /clickhouse/test/t/replicas/1
source_replica_hostname: node1
source_replica_port: 9009
interserver_scheme: http
URI: http://node1:9009/?endpoint=DataPartsExchange%3A%2Fclickhouse%2Ftest%2Ft%2Freplicas%2F1&part=all_0_0_0&client_protocol_version=4&compress=false
to_detached: 0
thread_id: 54
```
**See Also**
- [Managing ReplicatedMergeTree Tables](../../sql-reference/statements/system/#query-language-system-replicated)
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/replicated_fetches) <!--hide-->

View File

@ -0,0 +1,381 @@
---
toc_priority: 67
toc_title: Encryption
---
# Encryption functions {#encryption-functions}
These functions implement encryption and decryption of data with AES (Advanced Encryption Standard) algorithm.
Key length depends on encryption mode. It is 16, 24, and 32 bytes long for `-128-`, `-196-`, and `-256-` modes respectively.
Initialization vector length is always 16 bytes (bytes in excess of 16 are ignored).
Note that these functions work slowly.
## encrypt {#encrypt}
This function encrypts data using these modes:
- aes-128-ecb, aes-192-ecb, aes-256-ecb
- aes-128-cbc, aes-192-cbc, aes-256-cbc
- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1
- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8
- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128
- aes-128-ofb, aes-192-ofb, aes-256-ofb
- aes-128-gcm, aes-192-gcm, aes-256-gcm
**Syntax**
``` sql
encrypt('mode', 'plaintext', 'key' [, iv, aad])
```
**Parameters**
- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string).
- `plaintext` — Text thats need to be encrypted. [String](../../sql-reference/data-types/string.md#string).
- `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string).
- `iv` — Initialization vector. Required for `-gcm` modes, optinal for others. [String](../../sql-reference/data-types/string.md#string).
- `aad` — Additional authenticated data. It isn't encrypted, but it affects decryption. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string).
**Returned value**
- Ciphered String. [String](../../sql-reference/data-types/string.md#string).
**Examples**
Create this table:
Query:
``` sql
CREATE TABLE encryption_test
(
input String,
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
key32 String DEFAULT substring(key, 1, 32),
key24 String DEFAULT substring(key, 1, 24),
key16 String DEFAULT substring(key, 1, 16)
) Engine = Memory;
```
Insert this data:
Query:
``` sql
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
```
Example without `iv`:
Query:
``` sql
SELECT 'aes-128-ecb' AS mode, hex(encrypt(mode, input, key16)) FROM encryption_test;
```
Result:
``` text
┌─mode────────┬─hex(encrypt('aes-128-ecb', input, key16))────────────────────────┐
│ aes-128-ecb │ 4603E6862B0D94BBEC68E0B0DF51D60F │
│ aes-128-ecb │ 3004851B86D3F3950672DE7085D27C03 │
│ aes-128-ecb │ E807F8C8D40A11F65076361AFC7D8B68D8658C5FAA6457985CAA380F16B3F7E4 │
└─────────────┴──────────────────────────────────────────────────────────────────┘
```
Example with `iv`:
Query:
``` sql
SELECT 'aes-256-ctr' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test;
```
Result:
``` text
┌─mode────────┬─hex(encrypt('aes-256-ctr', input, key32, iv))─┐
│ aes-256-ctr │ │
│ aes-256-ctr │ 7FB039F7 │
│ aes-256-ctr │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2B325949 │
└─────────────┴───────────────────────────────────────────────┘
```
Example with `-gcm`:
Query:
``` sql
SELECT 'aes-256-gcm' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test;
```
Result:
``` text
┌─mode────────┬─hex(encrypt('aes-256-gcm', input, key32, iv))──────────────────────────┐
│ aes-256-gcm │ E99DBEBC01F021758352D7FBD9039EFA │
│ aes-256-gcm │ 8742CE3A7B0595B281C712600D274CA881F47414 │
│ aes-256-gcm │ A44FD73ACEB1A64BDE2D03808A2576EDBB60764CC6982DB9AF2C33C893D91B00C60DC5 │
└─────────────┴────────────────────────────────────────────────────────────────────────┘
```
Example with `-gcm` mode and with `aad`:
Query:
``` sql
SELECT 'aes-192-gcm' AS mode, hex(encrypt(mode, input, key24, iv, 'AAD')) FROM encryption_test;
```
Result:
``` text
┌─mode────────┬─hex(encrypt('aes-192-gcm', input, key24, iv, 'AAD'))───────────────────┐
│ aes-192-gcm │ 04C13E4B1D62481ED22B3644595CB5DB │
│ aes-192-gcm │ 9A6CF0FD2B329B04EAD18301818F016DF8F77447 │
│ aes-192-gcm │ B961E9FD9B940EBAD7ADDA75C9F198A40797A5EA1722D542890CC976E21113BBB8A7AA │
└─────────────┴────────────────────────────────────────────────────────────────────────┘
```
## aes_encrypt_mysql {#aes_encrypt_mysql}
Compatible with mysql encryption and can be decrypted with [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt) function.
Supported encryption modes:
- aes-128-ecb, aes-192-ecb, aes-256-ecb
- aes-128-cbc, aes-192-cbc, aes-256-cbc
- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1
- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8
- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128
- aes-128-ofb, aes-192-ofb, aes-256-ofb
**Syntax**
```sql
aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv])
```
**Parameters**
- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string).
- `plaintext` — Text that needs to be encrypted. [String](../../sql-reference/data-types/string.md#string).
- `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string).
- `iv` — Initialization vector. Optinal. [String](../../sql-reference/data-types/string.md#string).
**Returned value**
- Ciphered String. [String](../../sql-reference/data-types/string.md#string).
**Examples**
Create this table:
Query:
``` sql
CREATE TABLE encryption_test
(
input String,
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
key32 String DEFAULT substring(key, 1, 32),
key24 String DEFAULT substring(key, 1, 24),
key16 String DEFAULT substring(key, 1, 16)
) Engine = Memory;
```
Insert this data:
Query:
``` sql
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
```
Example without `iv`:
Query:
``` sql
SELECT 'aes-128-cbc' AS mode, hex(aes_encrypt_mysql(mode, input, key32)) FROM encryption_test;
```
Result:
``` text
┌─mode────────┬─hex(aes_encrypt_mysql('aes-128-cbc', input, key32))──────────────┐
│ aes-128-cbc │ FEA8CFDE6EE2C6E7A2CC6ADDC9F62C83 │
│ aes-128-cbc │ 78B16CD4BE107660156124C5FEE6454A │
│ aes-128-cbc │ 67C0B119D96F18E2823968D42871B3D179221B1E7EE642D628341C2B29BA2E18 │
└─────────────┴──────────────────────────────────────────────────────────────────┘
```
Example with `iv`:
Query:
``` sql
SELECT 'aes-256-cfb128' AS mode, hex(aes_encrypt_mysql(mode, input, key32, iv)) FROM encryption_test;
```
Result:
``` text
┌─mode───────────┬─hex(aes_encrypt_mysql('aes-256-cfb128', input, key32, iv))─┐
│ aes-256-cfb128 │ │
│ aes-256-cfb128 │ 7FB039F7 │
│ aes-256-cfb128 │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2BB5174F │
└────────────────┴────────────────────────────────────────────────────────────┘
```
## decrypt {#decrypt}
This function decrypts data using these modes:
- aes-128-ecb, aes-192-ecb, aes-256-ecb
- aes-128-cbc, aes-192-cbc, aes-256-cbc
- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1
- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8
- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128
- aes-128-ofb, aes-192-ofb, aes-256-ofb
- aes-128-gcm, aes-192-gcm, aes-256-gcm
**Syntax**
```sql
decrypt('mode', 'ciphertext', 'key' [, iv, aad])
```
**Parameters**
- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string).
- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string).
- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string).
- `iv` — Initialization vector. Required for `-gcm` modes, optinal for others. [String](../../sql-reference/data-types/string.md#string).
- `aad` — Additional authenticated data. Won't decrypt if this value is incorrect. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string).
**Returned value**
- Decrypted String. [String](../../sql-reference/data-types/string.md#string).
**Examples**
Create this table:
Query:
``` sql
CREATE TABLE encryption_test
(
input String,
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
key32 String DEFAULT substring(key, 1, 32),
key24 String DEFAULT substring(key, 1, 24),
key16 String DEFAULT substring(key, 1, 16)
) Engine = Memory;
```
Insert this data:
Query:
``` sql
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
```
Query:
``` sql
SELECT 'aes-128-ecb' AS mode, decrypt(mode, encrypt(mode, input, key16), key16) FROM encryption_test;
```
Result:
```text
┌─mode────────┬─decrypt('aes-128-ecb', encrypt('aes-128-ecb', input, key16), key16)─┐
│ aes-128-ecb │ │
│ aes-128-ecb │ text │
│ aes-128-ecb │ What Is ClickHouse? │
└─────────────┴─────────────────────────────────────────────────────────────────────┘
```
## aes_decrypt_mysql {#aes_decrypt_mysql}
Compatible with mysql encryption and decrypts data encrypted with [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt) function.
Supported decryption modes:
- aes-128-ecb, aes-192-ecb, aes-256-ecb
- aes-128-cbc, aes-192-cbc, aes-256-cbc
- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1
- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8
- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128
- aes-128-ofb, aes-192-ofb, aes-256-ofb
**Syntax**
```sql
aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
```
**Parameters**
- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string).
- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string).
- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string).
- `iv` — Initialization vector. Optinal. [String](../../sql-reference/data-types/string.md#string).
**Returned value**
- Decrypted String. [String](../../sql-reference/data-types/string.md#string).
**Examples**
Create this table:
Query:
``` sql
CREATE TABLE encryption_test
(
input String,
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
key32 String DEFAULT substring(key, 1, 32),
key24 String DEFAULT substring(key, 1, 24),
key16 String DEFAULT substring(key, 1, 16)
) Engine = Memory;
```
Insert this data:
Query:
``` sql
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
```
Query:
``` sql
SELECT 'aes-128-cbc' AS mode, aes_decrypt_mysql(mode, aes_encrypt_mysql(mode, input, key), key) FROM encryption_test;
```
Result:
``` text
┌─mode────────┬─aes_decrypt_mysql('aes-128-cbc', aes_encrypt_mysql('aes-128-cbc', input, key), key)─┐
│ aes-128-cbc │ │
│ aes-128-cbc │ text │
│ aes-128-cbc │ What Is ClickHouse? │
└─────────────┴─────────────────────────────────────────────────────────────────────────────────────┘
```
[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/encryption_functions/) <!--hide-->

View File

@ -306,3 +306,67 @@ execute_native_thread_routine
start_thread start_thread
clone clone
``` ```
## tid {#tid}
Returns id of the thread, in which current [Block](https://clickhouse.tech/docs/en/development/architecture/#block) is processed.
**Syntax**
``` sql
tid()
```
**Returned value**
- Current thread id. [Uint64](../../sql-reference/data-types/int-uint.md#uint-ranges).
**Example**
Query:
``` sql
SELECT tid();
```
Result:
``` text
┌─tid()─┐
│ 3878 │
└───────┘
```
## logTrace {#logtrace}
Emits trace log message to server log for each [Block](https://clickhouse.tech/docs/en/development/architecture/#block).
**Syntax**
``` sql
logTrace('message')
```
**Parameters**
- `message` — Message that is emitted to server log. [String](../../sql-reference/data-types/string.md#string).
**Returned value**
- Always returns 0.
**Example**
Query:
``` sql
SELECT logTrace('logTrace message');
```
Result:
``` text
┌─logTrace('logTrace message')─┐
│ 0 │
└──────────────────────────────┘
```
[Original article](https://clickhouse.tech/docs/en/query_language/functions/introspection/) <!--hide-->

View File

@ -115,7 +115,21 @@ Returns the “first significant subdomain”. This is a non-standard concept sp
Returns the part of the domain that includes top-level subdomains up to the “first significant subdomain” (see the explanation above). Returns the part of the domain that includes top-level subdomains up to the “first significant subdomain” (see the explanation above).
For example, `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`. For example:
- `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`.
- `cutToFirstSignificantSubdomain('www.tr') = 'tr'`.
- `cutToFirstSignificantSubdomain('tr') = ''`.
### cutToFirstSignificantSubdomainWithWWW {#cuttofirstsignificantsubdomainwithwww}
Returns the part of the domain that includes top-level subdomains up to the “first significant subdomain”, without stripping "www".
For example:
- `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`.
- `cutToFirstSignificantSubdomain('www.tr') = 'www.tr'`.
- `cutToFirstSignificantSubdomain('tr') = ''`.
### port(URL\[, default_port = 0\]) {#port} ### port(URL\[, default_port = 0\]) {#port}

View File

@ -20,7 +20,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster]
PRIMARY KEY key1, key2 PRIMARY KEY key1, key2
SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN])) SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN]))
LAYOUT(LAYOUT_NAME([param_name param_value])) LAYOUT(LAYOUT_NAME([param_name param_value]))
LIFETIME([MIN val1] MAX val2) LIFETIME({MIN min_val MAX max_val | max_val})
``` ```
External dictionary structure consists of attributes. Dictionary attributes are specified similarly to table columns. The only required attribute property is its type, all other properties may have default values. External dictionary structure consists of attributes. Dictionary attributes are specified similarly to table columns. The only required attribute property is its type, all other properties may have default values.

View File

@ -27,9 +27,9 @@ It is applicable when selecting data from tables that use the [MergeTree](../../
### Drawbacks {#drawbacks} ### Drawbacks {#drawbacks}
Queries that use `FINAL` are executed not as fast as similar queries that dont, because: Queries that use `FINAL` are executed slightly slower than similar queries that dont, because:
- Query is executed in a single thread and data is merged during query execution. - Data is merged during query execution.
- Queries with `FINAL` read primary key columns in addition to the columns specified in the query. - Queries with `FINAL` read primary key columns in addition to the columns specified in the query.
**In most cases, avoid using `FINAL`.** The common approach is to use different queries that assume the background processes of the `MergeTree` engine havet happened yet and deal with it by applying aggregation (for example, to discard duplicates). {## TODO: examples ##} **In most cases, avoid using `FINAL`.** The common approach is to use different queries that assume the background processes of the `MergeTree` engine havet happened yet and deal with it by applying aggregation (for example, to discard duplicates). {## TODO: examples ##}

View File

@ -291,7 +291,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster]
PRIMARY KEY key1, key2 PRIMARY KEY key1, key2
SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN])) SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN]))
LAYOUT(LAYOUT_NAME([param_name param_value])) LAYOUT(LAYOUT_NAME([param_name param_value]))
LIFETIME([MIN val1] MAX val2) LIFETIME({MIN min_val MAX max_val | max_val})
``` ```
Crear [diccionario externo](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) con dado [estructura](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [fuente](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [diseño](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) y [vida](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). Crear [diccionario externo](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) con dado [estructura](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [fuente](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [diseño](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) y [vida](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md).

View File

@ -291,7 +291,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster]
PRIMARY KEY key1, key2 PRIMARY KEY key1, key2
SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN])) SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN]))
LAYOUT(LAYOUT_NAME([param_name param_value])) LAYOUT(LAYOUT_NAME([param_name param_value]))
LIFETIME([MIN val1] MAX val2) LIFETIME({MIN min_val MAX max_val | max_val})
``` ```
ایجاد [فرهنگ لغت خارجی](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) با توجه به [ساختار](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [متن](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [طرحبندی](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) و [طول عمر](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). ایجاد [فرهنگ لغت خارجی](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) با توجه به [ساختار](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [متن](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [طرحبندی](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) و [طول عمر](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md).

View File

@ -291,7 +291,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster]
PRIMARY KEY key1, key2 PRIMARY KEY key1, key2
SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN])) SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN]))
LAYOUT(LAYOUT_NAME([param_name param_value])) LAYOUT(LAYOUT_NAME([param_name param_value]))
LIFETIME([MIN val1] MAX val2) LIFETIME({MIN min_val MAX max_val | max_val})
``` ```
Crée [externe dictionnaire](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) avec le [structure](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [source](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [disposition](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) et [vie](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). Crée [externe dictionnaire](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) avec le [structure](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [source](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [disposition](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) et [vie](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md).

View File

@ -291,7 +291,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster]
PRIMARY KEY key1, key2 PRIMARY KEY key1, key2
SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN])) SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN]))
LAYOUT(LAYOUT_NAME([param_name param_value])) LAYOUT(LAYOUT_NAME([param_name param_value]))
LIFETIME([MIN val1] MAX val2) LIFETIME({MIN min_val MAX max_val | max_val})
``` ```
作成 [外部辞書](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) 与えられたと [構造](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [ソース](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [レイアウト](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) と [生涯](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). 作成 [外部辞書](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) 与えられたと [構造](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [ソース](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [レイアウト](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) と [生涯](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md).

View File

@ -2187,4 +2187,10 @@ SELECT CAST(toNullable(toInt32(0)) AS Int32) as x, toTypeName(x);
Значение по умолчанию: `1`. Значение по умолчанию: `1`.
## output_format_tsv_null_representation {#output_format_tsv_null_representation}
Позволяет настраивать представление `NULL` для формата выходных данных [TSV](../../interfaces/formats.md#tabseparated). Настройка управляет форматом выходных данных, `\N` является единственным поддерживаемым представлением для формата входных данных TSV.
Значение по умолчанию: `\N`.
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) <!--hide--> [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) <!--hide-->

View File

@ -0,0 +1,70 @@
# system.replicated_fetches {#system_tables-replicated_fetches}
Содержит информацию о выполняемых в данный момент фоновых операциях скачивания кусков данных с других реплик.
Столбцы:
- `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных.
- `table` ([String](../../sql-reference/data-types/string.md)) — имя таблицы.
- `elapsed` ([Float64](../../sql-reference/data-types/float.md)) — время, прошедшее от момента начала скачивания куска, в секундах.
- `progress` ([Float64](../../sql-reference/data-types/float.md)) — доля выполненной работы от 0 до 1.
- `result_part_name` ([String](../../sql-reference/data-types/string.md)) — имя скачиваемого куска.
- `result_part_path` ([String](../../sql-reference/data-types/string.md)) — абсолютный путь к скачиваемому куску.
- `partition_id` ([String](../../sql-reference/data-types/string.md)) — идентификатор партиции.
- `total_size_bytes_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — общий размер сжатой информации в скачиваемом куске в байтах.
- `bytes_read_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — размер сжатой информации, считанной из скачиваемого куска, в байтах.
- `source_replica_path` ([String](../../sql-reference/data-types/string.md)) — абсолютный путь к исходной реплике.
- `source_replica_hostname` ([String](../../sql-reference/data-types/string.md)) — имя хоста исходной реплики.
- `source_replica_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — номер порта исходной реплики.
- `interserver_scheme` ([String](../../sql-reference/data-types/string.md)) — имя межсерверной схемы.
- `URI` ([String](../../sql-reference/data-types/string.md)) — универсальный идентификатор ресурса.
- `to_detached` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, указывающий на использование выражения `TO DETACHED` в текущих фоновых операциях.
- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — идентификатор потока.
**Пример**
``` sql
SELECT * FROM system.replicated_fetches LIMIT 1 FORMAT Vertical;
```
``` text
Row 1:
──────
database: default
table: t
elapsed: 7.243039876
progress: 0.41832135995612835
result_part_name: all_0_0_0
result_part_path: /var/lib/clickhouse/store/700/70080a04-b2de-4adf-9fa5-9ea210e81766/all_0_0_0/
partition_id: all
total_size_bytes_compressed: 1052783726
bytes_read_compressed: 440401920
source_replica_path: /clickhouse/test/t/replicas/1
source_replica_hostname: node1
source_replica_port: 9009
interserver_scheme: http
URI: http://node1:9009/?endpoint=DataPartsExchange%3A%2Fclickhouse%2Ftest%2Ft%2Freplicas%2F1&part=all_0_0_0&client_protocol_version=4&compress=false
to_detached: 0
thread_id: 54
```
**Смотрите также**
- [Управление таблицами ReplicatedMergeTree](../../sql-reference/statements/system/#query-language-system-replicated)
[Оригинальная статья](https://clickhouse.tech/docs/en/operations/system_tables/replicated_fetches) <!--hide-->

View File

@ -1157,6 +1157,7 @@ SELECT arrayCumSum([1, 1, 1, 1]) AS res
┌─res──────────┐ ┌─res──────────┐
│ [1, 2, 3, 4] │ │ [1, 2, 3, 4] │
└──────────────┘ └──────────────┘
```
## arrayAUC {#arrayauc} ## arrayAUC {#arrayauc}

View File

@ -0,0 +1,382 @@
---
toc_priority: 67
toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438 \u0434\u043b\u044f \u0448\u0438\u0444\u0440\u043e\u0432\u0430\u043d\u0438\u044f"
---
# Функции шифрования {#encryption-functions}
Даннвые функции реализуют шифрование и расшифровку данных с помощью AES (Advanced Encryption Standard) алгоритма.
Длина ключа зависит от режима шифрования. Он может быть длинной в 16, 24 и 32 байта для режимов шифрования `-128-`, `-196-` и `-256-` соответственно.
Длина инициализирующего вектора всегда 16 байт (лишнии байты игнорируются).
Обратите внимание, что эти функции работают медленно.
## encrypt {#encrypt}
Функция поддерживает шифрование данных следующими режимами:
- aes-128-ecb, aes-192-ecb, aes-256-ecb
- aes-128-cbc, aes-192-cbc, aes-256-cbc
- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1
- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8
- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128
- aes-128-ofb, aes-192-ofb, aes-256-ofb
- aes-128-gcm, aes-192-gcm, aes-256-gcm
**Синтаксис**
``` sql
encrypt('mode', 'plaintext', 'key' [, iv, aad])
```
**Параметры**
- `mode` — режим шифрования. [String](../../sql-reference/data-types/string.md#string).
- `plaintext` — текст, который будет зашифрован. [String](../../sql-reference/data-types/string.md#string).
- `key` — ключ шифрования. [String](../../sql-reference/data-types/string.md#string).
- `iv` — инициализирующий вектор. Обязателен для `-gcm` режимов, для остальных режимов необязателен. [String](../../sql-reference/data-types/string.md#string).
- `aad` — дополнительные аутентифицированные данные. Не шифруются, но влияют на расшифровку. Параметр работает только с `-gcm` режимами. Для остальных вызовет исключение. [String](../../sql-reference/data-types/string.md#string).
**Возвращаемое значение**
- Зашифрованная строка. [String](../../sql-reference/data-types/string.md#string).
**Примеры**
Создадим такую таблицу:
Запрос:
``` sql
CREATE TABLE encryption_test
(
input String,
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
key32 String DEFAULT substring(key, 1, 32),
key24 String DEFAULT substring(key, 1, 24),
key16 String DEFAULT substring(key, 1, 16)
) Engine = Memory;
```
Вставим эти данные:
Запрос:
``` sql
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
```
Пример без `iv`:
Запрос:
``` sql
SELECT 'aes-128-ecb' AS mode, hex(encrypt(mode, input, key16)) FROM encryption_test;
```
Результат:
``` text
┌─mode────────┬─hex(encrypt('aes-128-ecb', input, key16))────────────────────────┐
│ aes-128-ecb │ 4603E6862B0D94BBEC68E0B0DF51D60F │
│ aes-128-ecb │ 3004851B86D3F3950672DE7085D27C03 │
│ aes-128-ecb │ E807F8C8D40A11F65076361AFC7D8B68D8658C5FAA6457985CAA380F16B3F7E4 │
└─────────────┴──────────────────────────────────────────────────────────────────┘
```
Пример с `iv`:
Запрос:
``` sql
SELECT 'aes-256-ctr' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test;
```
Результат:
``` text
┌─mode────────┬─hex(encrypt('aes-256-ctr', input, key32, iv))─┐
│ aes-256-ctr │ │
│ aes-256-ctr │ 7FB039F7 │
│ aes-256-ctr │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2B325949 │
└─────────────┴───────────────────────────────────────────────┘
```
Пример в режиме `-gcm`:
Запрос:
``` sql
SELECT 'aes-256-gcm' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test;
```
Результат:
``` text
┌─mode────────┬─hex(encrypt('aes-256-gcm', input, key32, iv))──────────────────────────┐
│ aes-256-gcm │ E99DBEBC01F021758352D7FBD9039EFA │
│ aes-256-gcm │ 8742CE3A7B0595B281C712600D274CA881F47414 │
│ aes-256-gcm │ A44FD73ACEB1A64BDE2D03808A2576EDBB60764CC6982DB9AF2C33C893D91B00C60DC5 │
└─────────────┴────────────────────────────────────────────────────────────────────────┘
```
Пример в режиме `-gcm` и с `aad`:
Запрос:
``` sql
SELECT 'aes-192-gcm' AS mode, hex(encrypt(mode, input, key24, iv, 'AAD')) FROM encryption_test;
```
Результат:
``` text
┌─mode────────┬─hex(encrypt('aes-192-gcm', input, key24, iv, 'AAD'))───────────────────┐
│ aes-192-gcm │ 04C13E4B1D62481ED22B3644595CB5DB │
│ aes-192-gcm │ 9A6CF0FD2B329B04EAD18301818F016DF8F77447 │
│ aes-192-gcm │ B961E9FD9B940EBAD7ADDA75C9F198A40797A5EA1722D542890CC976E21113BBB8A7AA │
└─────────────┴────────────────────────────────────────────────────────────────────────┘
```
## aes_encrypt_mysql {#aes_encrypt_mysql}
Совместима с шифрованием myqsl, результат может быть расшифрован функцией [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt).
Функция поддерживает шифрофание данных следующими режимами:
- aes-128-ecb, aes-192-ecb, aes-256-ecb
- aes-128-cbc, aes-192-cbc, aes-256-cbc
- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1
- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8
- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128
- aes-128-ofb, aes-192-ofb, aes-256-ofb
**Синтаксис**
```sql
aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv])
```
**Параметры**
- `mode` — режим шифрования. [String](../../sql-reference/data-types/string.md#string).
- `plaintext` — текст, который будет зашифрован. [String](../../sql-reference/data-types/string.md#string).
- `key` — ключ шифрования. [String](../../sql-reference/data-types/string.md#string).
- `iv` — инициализирующий вектор. Необязателен. [String](../../sql-reference/data-types/string.md#string).
**Возвращаемое значение**
- Зашифрованная строка. [String](../../sql-reference/data-types/string.md#string).
**Примеры**
Создадим такую таблицу:
Запрос:
``` sql
CREATE TABLE encryption_test
(
input String,
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
key32 String DEFAULT substring(key, 1, 32),
key24 String DEFAULT substring(key, 1, 24),
key16 String DEFAULT substring(key, 1, 16)
) Engine = Memory;
```
Вставим эти данные:
Запрос:
``` sql
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
```
Пример без `iv`:
Запрос:
``` sql
SELECT 'aes-128-cbc' AS mode, hex(aes_encrypt_mysql(mode, input, key32)) FROM encryption_test;
```
Результат:
``` text
┌─mode────────┬─hex(aes_encrypt_mysql('aes-128-cbc', input, key32))──────────────┐
│ aes-128-cbc │ FEA8CFDE6EE2C6E7A2CC6ADDC9F62C83 │
│ aes-128-cbc │ 78B16CD4BE107660156124C5FEE6454A │
│ aes-128-cbc │ 67C0B119D96F18E2823968D42871B3D179221B1E7EE642D628341C2B29BA2E18 │
└─────────────┴──────────────────────────────────────────────────────────────────┘
```
Пример с `iv`:
Запрос:
``` sql
SELECT 'aes-256-cfb128' AS mode, hex(aes_encrypt_mysql(mode, input, key32, iv)) FROM encryption_test;
```
Результат:
``` text
┌─mode───────────┬─hex(aes_encrypt_mysql('aes-256-cfb128', input, key32, iv))─┐
│ aes-256-cfb128 │ │
│ aes-256-cfb128 │ 7FB039F7 │
│ aes-256-cfb128 │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2BB5174F │
└────────────────┴────────────────────────────────────────────────────────────┘
```
## decrypt {#decrypt}
Функция поддерживает расшифровку данных следующими режимами:
- aes-128-ecb, aes-192-ecb, aes-256-ecb
- aes-128-cbc, aes-192-cbc, aes-256-cbc
- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1
- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8
- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128
- aes-128-ofb, aes-192-ofb, aes-256-ofb
- aes-128-gcm, aes-192-gcm, aes-256-gcm
**Синтаксис**
```sql
decrypt('mode', 'ciphertext', 'key' [, iv, aad])
```
**Параметры**
- `mode` — режим шифрования. [String](../../sql-reference/data-types/string.md#string).
- `ciphertext` — зашифрованный текст, который будет расшифрован. [String](../../sql-reference/data-types/string.md#string).
- `key` — ключ шифрования. [String](../../sql-reference/data-types/string.md#string).
- `iv` — инициализирующий вектор. Обязателен для `-gcm` режимов, для остальных режимов опциональный. [String](../../sql-reference/data-types/string.md#string).
- `aad` — дополнительные аутентифицированные данные. Текст не будет расшифрован, если это значение неверно. Работает только с `-gcm` режимами. Для остальных вызовет исключение. [String](../../sql-reference/data-types/string.md#string).
**Возвращаемое значение**
- Расшифрованная строка. [String](../../sql-reference/data-types/string.md#string).
**Примеры**
Создадим такую таблицу:
Запрос:
``` sql
CREATE TABLE encryption_test
(
input String,
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
key32 String DEFAULT substring(key, 1, 32),
key24 String DEFAULT substring(key, 1, 24),
key16 String DEFAULT substring(key, 1, 16)
) Engine = Memory;
```
Вставим эти данные:
Запрос:
``` sql
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
```
Запрос:
``` sql
SELECT 'aes-128-ecb' AS mode, decrypt(mode, encrypt(mode, input, key16), key16) FROM encryption_test;
```
Результат:
```text
┌─mode────────┬─decrypt('aes-128-ecb', encrypt('aes-128-ecb', input, key16), key16)─┐
│ aes-128-ecb │ │
│ aes-128-ecb │ text │
│ aes-128-ecb │ What Is ClickHouse? │
└─────────────┴─────────────────────────────────────────────────────────────────────┘
```
## aes_decrypt_mysql {#aes_decrypt_mysql}
Совместима с шифрованием myqsl и может расшифровать данные, зашифрованные функцией [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt).
Функция поддерживает расшифровку данных следующими режимами:
- aes-128-ecb, aes-192-ecb, aes-256-ecb
- aes-128-cbc, aes-192-cbc, aes-256-cbc
- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1
- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8
- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128
- aes-128-ofb, aes-192-ofb, aes-256-ofb
**Синтаксис**
```sql
aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
```
**Параметры**
- `mode` — режим шифрования. [String](../../sql-reference/data-types/string.md#string).
- `ciphertext` — зашифрованный текст, который будет расшифрован. [String](../../sql-reference/data-types/string.md#string).
- `key` — ключ шифрования. [String](../../sql-reference/data-types/string.md#string).
- `iv` — инициализирующий вектор. Необязателен. [String](../../sql-reference/data-types/string.md#string).
**Возвращаемое значение**
- Расшифрованная строка. [String](../../sql-reference/data-types/string.md#string).
**Примеры**
Создадим такую таблицу:
Запрос:
``` sql
CREATE TABLE encryption_test
(
input String,
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
key32 String DEFAULT substring(key, 1, 32),
key24 String DEFAULT substring(key, 1, 24),
key16 String DEFAULT substring(key, 1, 16)
) Engine = Memory;
```
Вставим эти данные:
Запрос:
``` sql
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
```
Запрос:
``` sql
SELECT 'aes-128-cbc' AS mode, aes_decrypt_mysql(mode, aes_encrypt_mysql(mode, input, key), key) FROM encryption_test;
```
Результат:
``` text
┌─mode────────┬─aes_decrypt_mysql('aes-128-cbc', aes_encrypt_mysql('aes-128-cbc', input, key), key)─┐
│ aes-128-cbc │ │
│ aes-128-cbc │ text │
│ aes-128-cbc │ What Is ClickHouse? │
└─────────────┴─────────────────────────────────────────────────────────────────────────────────────┘
```
[Original article](https://clickhouse.tech/docs/ru/sql-reference/functions/encryption_functions/) <!--hide-->

View File

@ -306,3 +306,68 @@ execute_native_thread_routine
start_thread start_thread
clone clone
``` ```
## tid {#tid}
Возвращает id потока, в котором обрабатывается текущий [Block](https://clickhouse.tech/docs/ru/development/architecture/#block).
**Синтаксис**
``` sql
tid()
```
**Возвращаемое значение**
- Id текущего потока. [Uint64](../../sql-reference/data-types/int-uint.md#uint-ranges).
**Пример**
Запрос:
``` sql
SELECT tid();
```
Результат:
``` text
┌─tid()─┐
│ 3878 │
└───────┘
```
## logTrace {#logtrace}
Выводит сообщение в лог сервера для каждого [Block](https://clickhouse.tech/docs/ru/development/architecture/#block).
**Синтаксис**
``` sql
logTrace('message')
```
**Параметры**
- `message` — сообщение, которое отправляется в серверный лог. [String](../../sql-reference/data-types/string.md#string).
**Возвращаемое значение**
- Всегда возвращает 0.
**Example**
Запрос:
``` sql
SELECT logTrace('logTrace message');
```
Результат:
``` text
┌─logTrace('logTrace message')─┐
│ 0 │
└──────────────────────────────┘
```
[Original article](https://clickhouse.tech/docs/en/query_language/functions/introspection/) <!--hide-->

View File

@ -16,7 +16,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster]
PRIMARY KEY key1, key2 PRIMARY KEY key1, key2
SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN])) SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN]))
LAYOUT(LAYOUT_NAME([param_name param_value])) LAYOUT(LAYOUT_NAME([param_name param_value]))
LIFETIME([MIN val1] MAX val2) LIFETIME({MIN min_val MAX max_val | max_val})
``` ```
Создаёт [внешний словарь](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) с заданной [структурой](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [источником](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [способом размещения в памяти](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) и [периодом обновления](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). Создаёт [внешний словарь](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) с заданной [структурой](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [источником](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [способом размещения в памяти](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) и [периодом обновления](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md).

View File

@ -27,9 +27,9 @@ toc_title: FROM
### Недостатки {#drawbacks} ### Недостатки {#drawbacks}
Запросы, которые используют `FINAL` выполняются не так быстро, как аналогичные запросы без него, потому что: Запросы, которые используют `FINAL` выполняются немного медленее, чем аналогичные запросы без него, потому что:
- Запрос выполняется в одном потоке, и данные мёржатся во время выполнения запроса. - Данные мёржатся во время выполнения запроса.
- Запросы с модификатором `FINAL` читают столбцы первичного ключа в дополнение к столбцам, используемым в запросе. - Запросы с модификатором `FINAL` читают столбцы первичного ключа в дополнение к столбцам, используемым в запросе.
**В большинстве случаев избегайте использования `FINAL`.** Общий подход заключается в использовании агрегирующих запросов, которые предполагают, что фоновые процессы движков семейства `MergeTree` ещё не случились (например, сами отбрасывают дубликаты). {## TODO: examples ##} **В большинстве случаев избегайте использования `FINAL`.** Общий подход заключается в использовании агрегирующих запросов, которые предполагают, что фоновые процессы движков семейства `MergeTree` ещё не случились (например, сами отбрасывают дубликаты). {## TODO: examples ##}

View File

@ -21,7 +21,7 @@ mkdocs-htmlproofer-plugin==0.0.3
mkdocs-macros-plugin==0.4.20 mkdocs-macros-plugin==0.4.20
nltk==3.5 nltk==3.5
nose==1.3.7 nose==1.3.7
protobuf==3.13.0 protobuf==3.14.0
numpy==1.19.2 numpy==1.19.2
Pygments==2.5.2 Pygments==2.5.2
pymdown-extensions==8.0 pymdown-extensions==8.0

View File

@ -291,7 +291,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster]
PRIMARY KEY key1, key2 PRIMARY KEY key1, key2
SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN])) SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN]))
LAYOUT(LAYOUT_NAME([param_name param_value])) LAYOUT(LAYOUT_NAME([param_name param_value]))
LIFETIME([MIN val1] MAX val2) LIFETIME({MIN min_val MAX max_val | max_val})
``` ```
Oluşturuyor [dış sözlük](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) verilen ile [yapılı](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [kaynaklı](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [düzen](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) ve [ömür](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). Oluşturuyor [dış sözlük](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) verilen ile [yapılı](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [kaynaklı](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [düzen](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) ve [ömür](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md).

View File

@ -21,15 +21,15 @@ toc_title: "\u266A\u64CD\u573A\u266A"
ClickHouse体验还有如下 ClickHouse体验还有如下
[ClickHouse管理服务](https://cloud.yandex.com/services/managed-clickhouse) [ClickHouse管理服务](https://cloud.yandex.com/services/managed-clickhouse)
实例托管 [Yandex云](https://cloud.yandex.com/). 实例托管 [Yandex云](https://cloud.yandex.com/)
更多信息 [云提供商](../commercial/cloud.md). 更多信息 [云提供商](../commercial/cloud.md)
ClickHouse体验平台界面实际上是通过ClickHouse [HTTP API](../interfaces/http.md)接口实现的. ClickHouse体验平台界面实际上是通过ClickHouse [HTTP API](../interfaces/http.md)接口实现的.
体验平台后端只是一个ClickHouse集群没有任何额外的服务器端应用程序。 体验平台后端只是一个ClickHouse集群没有任何额外的服务器端应用程序。
体验平台也同样提供了ClickHouse HTTPS服务端口。 体验平台也同样提供了ClickHouse HTTPS服务端口。
您可以使用任何HTTP客户端向体验平台进行查询例如 [curl](https://curl.haxx.se) 或 [wget](https://www.gnu.org/software/wget/),或使用以下方式建立连接 [JDBC](../interfaces/jdbc.md) 或 [ODBC](../interfaces/odbc.md) 司机 您可以使用任何HTTP客户端向体验平台进行查询例如 [curl](https://curl.haxx.se) 或 [wget](https://www.gnu.org/software/wget/),或使用以下方式建立连接 [JDBC](../interfaces/jdbc.md) 或 [ODBC](../interfaces/odbc.md) 驱动。
有关支持ClickHouse的软件产品的更多信息请访问 [这里](../interfaces/index.md). 有关支持ClickHouse的软件产品的更多信息请访问 [这里](../interfaces/index.md)
| 参数 | 值 | | 参数 | 值 |
|:---------|:--------------------------------------| |:---------|:--------------------------------------|

View File

@ -33,10 +33,10 @@ ClickHouse 收集的指标项:
- 服务用于计算的资源占用的各种指标。 - 服务用于计算的资源占用的各种指标。
- 关于查询处理的常见统计信息。 - 关于查询处理的常见统计信息。
可以在 [系统指标](system-tables/metrics.md#system_tables-metrics) [系统事件](system-tables/events.md#system_tables-events) 以及[系统异步指标](system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) 等系统表查看所有的指标项。 可以在[系统指标](system-tables/metrics.md#system_tables-metrics)[系统事件](system-tables/events.md#system_tables-events)以及[系统异步指标](system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics)等系统表查看所有的指标项。
可以配置ClickHouse 往 [石墨](https://github.com/graphite-project)导入指标。 参考 [石墨部分](server-configuration-parameters/settings.md#server_configuration_parameters-graphite) 配置文件。在配置指标导出之前需要参考Graphite[官方教程](https://graphite.readthedocs.io/en/latest/install.html)搭建服务。 可以配置ClickHouse向[Graphite](https://github.com/graphite-project)推送监控信息并导入指标。参考[Graphite监控](server-configuration-parameters/settings.md#server_configuration_parameters-graphite)配置文件。在配置指标导出之前,需要参考[Graphite官方教程](https://graphite.readthedocs.io/en/latest/install.html)搭建Graphite服务。
此外您可以通过HTTP API监视服务器可用性。 将HTTP GET请求发送到 `/ping` 如果服务器可用,它将以 `200 OK` 响应。 此外您可以通过HTTP API监视服务器可用性。将HTTP GET请求发送到`/ping`。如果服务器可用,它将以 `200 OK` 响应。
要监视服务器集群的配置,应设置[max_replica_delay_for_distributed_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries)参数并使用HTTP资源`/replicas_status`。 如果副本可用,并且不延迟在其他副本之后,则对`/replicas_status`的请求将返回200 OK。 如果副本滞后,请求将返回 `503 HTTP_SERVICE_UNAVAILABLE`,包括有关待办事项大小的信息。 要监视服务器集群的配置,应设置[max_replica_delay_for_distributed_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries)参数并使用HTTP资源`/replicas_status`。 如果副本可用,并且不延迟在其他副本之后,则对`/replicas_status`的请求将返回`200 OK`。 如果副本滞后,请求将返回`503 HTTP_SERVICE_UNAVAILABLE`,包括有关待办事项大小的信息。

View File

@ -259,5 +259,5 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster]
PRIMARY KEY key1, key2 PRIMARY KEY key1, key2
SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN])) SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN]))
LAYOUT(LAYOUT_NAME([param_name param_value])) LAYOUT(LAYOUT_NAME([param_name param_value]))
LIFETIME([MIN val1] MAX val2) LIFETIME({MIN min_val MAX max_val | max_val})
``` ```

View File

@ -1,3 +1,5 @@
OWNER(g:clickhouse)
PROGRAM(clickhouse-server) PROGRAM(clickhouse-server)
PEERDIR( PEERDIR(

View File

@ -1,3 +1,5 @@
OWNER(g:clickhouse)
PROGRAM(clickhouse) PROGRAM(clickhouse)
CFLAGS( CFLAGS(

View File

@ -1,4 +1,6 @@
# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it.
OWNER(g:clickhouse)
LIBRARY() LIBRARY()
PEERDIR( PEERDIR(

View File

@ -1,3 +1,5 @@
OWNER(g:clickhouse)
LIBRARY() LIBRARY()
PEERDIR( PEERDIR(

View File

@ -8,7 +8,7 @@ namespace DB
{ {
AggregateFunctionPtr AggregateFunctionCount::getOwnNullAdapter( AggregateFunctionPtr AggregateFunctionCount::getOwnNullAdapter(
const AggregateFunctionPtr &, const DataTypes & types, const Array & params) const const AggregateFunctionPtr &, const DataTypes & types, const Array & params, const AggregateFunctionProperties & /*properties*/) const
{ {
return std::make_shared<AggregateFunctionCountNotNullUnary>(types[0], params); return std::make_shared<AggregateFunctionCountNotNullUnary>(types[0], params);
} }

View File

@ -69,7 +69,7 @@ public:
} }
AggregateFunctionPtr getOwnNullAdapter( AggregateFunctionPtr getOwnNullAdapter(
const AggregateFunctionPtr &, const DataTypes & types, const Array & params) const override; const AggregateFunctionPtr &, const DataTypes & types, const Array & params, const AggregateFunctionProperties & /*properties*/) const override;
}; };

View File

@ -1,6 +1,7 @@
#include <AggregateFunctions/AggregateFunctionIf.h> #include <AggregateFunctions/AggregateFunctionIf.h>
#include <AggregateFunctions/AggregateFunctionCombinatorFactory.h> #include <AggregateFunctions/AggregateFunctionCombinatorFactory.h>
#include "registerAggregateFunctions.h" #include "registerAggregateFunctions.h"
#include "AggregateFunctionNull.h"
namespace DB namespace DB
@ -8,6 +9,7 @@ namespace DB
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int LOGICAL_ERROR;
extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
} }
@ -40,6 +42,164 @@ public:
} }
}; };
/** There are two cases: for single argument and variadic.
* Code for single argument is much more efficient.
*/
template <bool result_is_nullable, bool serialize_flag>
class AggregateFunctionIfNullUnary final
: public AggregateFunctionNullBase<result_is_nullable, serialize_flag,
AggregateFunctionIfNullUnary<result_is_nullable, serialize_flag>>
{
private:
size_t num_arguments;
using Base = AggregateFunctionNullBase<result_is_nullable, serialize_flag,
AggregateFunctionIfNullUnary<result_is_nullable, serialize_flag>>;
public:
String getName() const override
{
return Base::getName();
}
AggregateFunctionIfNullUnary(AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params)
: Base(std::move(nested_function_), arguments, params), num_arguments(arguments.size())
{
if (num_arguments == 0)
throw Exception("Aggregate function " + getName() + " require at least one argument",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
}
static inline bool singleFilter(const IColumn ** columns, size_t row_num, size_t num_arguments)
{
const IColumn * filter_column = columns[num_arguments - 1];
if (const ColumnNullable * nullable_column = typeid_cast<const ColumnNullable *>(filter_column))
filter_column = nullable_column->getNestedColumnPtr().get();
return assert_cast<const ColumnUInt8 &>(*filter_column).getData()[row_num];
}
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
const ColumnNullable * column = assert_cast<const ColumnNullable *>(columns[0]);
const IColumn * nested_column = &column->getNestedColumn();
if (!column->isNullAt(row_num) && singleFilter(columns, row_num, num_arguments))
{
this->setFlag(place);
this->nested_function->add(this->nestedPlace(place), &nested_column, row_num, arena);
}
}
};
template <bool result_is_nullable, bool serialize_flag, bool null_is_skipped>
class AggregateFunctionIfNullVariadic final
: public AggregateFunctionNullBase<result_is_nullable, serialize_flag,
AggregateFunctionIfNullVariadic<result_is_nullable, serialize_flag, null_is_skipped>>
{
public:
String getName() const override
{
return Base::getName();
}
AggregateFunctionIfNullVariadic(AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params)
: Base(std::move(nested_function_), arguments, params), number_of_arguments(arguments.size())
{
if (number_of_arguments == 1)
throw Exception("Logical error: single argument is passed to AggregateFunctionIfNullVariadic", ErrorCodes::LOGICAL_ERROR);
if (number_of_arguments > MAX_ARGS)
throw Exception("Maximum number of arguments for aggregate function with Nullable types is " + toString(size_t(MAX_ARGS)),
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
for (size_t i = 0; i < number_of_arguments; ++i)
is_nullable[i] = arguments[i]->isNullable();
}
static inline bool singleFilter(const IColumn ** columns, size_t row_num, size_t num_arguments)
{
return assert_cast<const ColumnUInt8 &>(*columns[num_arguments - 1]).getData()[row_num];
}
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
/// This container stores the columns we really pass to the nested function.
const IColumn * nested_columns[number_of_arguments];
for (size_t i = 0; i < number_of_arguments; ++i)
{
if (is_nullable[i])
{
const ColumnNullable & nullable_col = assert_cast<const ColumnNullable &>(*columns[i]);
if (null_is_skipped && nullable_col.isNullAt(row_num))
{
/// If at least one column has a null value in the current row,
/// we don't process this row.
return;
}
nested_columns[i] = &nullable_col.getNestedColumn();
}
else
nested_columns[i] = columns[i];
}
if (singleFilter(nested_columns, row_num, number_of_arguments))
{
this->setFlag(place);
this->nested_function->add(this->nestedPlace(place), nested_columns, row_num, arena);
}
}
private:
using Base = AggregateFunctionNullBase<result_is_nullable, serialize_flag,
AggregateFunctionIfNullVariadic<result_is_nullable, serialize_flag, null_is_skipped>>;
enum { MAX_ARGS = 8 };
size_t number_of_arguments = 0;
std::array<char, MAX_ARGS> is_nullable; /// Plain array is better than std::vector due to one indirection less.
};
AggregateFunctionPtr AggregateFunctionIf::getOwnNullAdapter(
const AggregateFunctionPtr & nested_function, const DataTypes & arguments,
const Array & params, const AggregateFunctionProperties & properties) const
{
bool return_type_is_nullable = !properties.returns_default_when_only_null && getReturnType()->canBeInsideNullable();
size_t nullable_size = std::count_if(arguments.begin(), arguments.end(), [](const auto & element) { return element->isNullable(); });
return_type_is_nullable &= nullable_size != 1 || !arguments.back()->isNullable(); /// If only condition is nullable. we should non-nullable type.
bool serialize_flag = return_type_is_nullable || properties.returns_default_when_only_null;
if (arguments.size() <= 2 && arguments.front()->isNullable())
{
if (return_type_is_nullable)
{
return std::make_shared<AggregateFunctionIfNullUnary<true, true>>(nested_func, arguments, params);
}
else
{
if (serialize_flag)
return std::make_shared<AggregateFunctionIfNullUnary<false, true>>(nested_func, arguments, params);
else
return std::make_shared<AggregateFunctionIfNullUnary<false, false>>(nested_func, arguments, params);
}
}
else
{
if (return_type_is_nullable)
{
return std::make_shared<AggregateFunctionIfNullVariadic<true, true, true>>(nested_function, arguments, params);
}
else
{
if (serialize_flag)
return std::make_shared<AggregateFunctionIfNullVariadic<false, true, true>>(nested_function, arguments, params);
else
return std::make_shared<AggregateFunctionIfNullVariadic<false, false, true>>(nested_function, arguments, params);
}
}
}
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory & factory) void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory & factory)
{ {
factory.registerCombinator(std::make_shared<AggregateFunctionCombinatorIf>()); factory.registerCombinator(std::make_shared<AggregateFunctionCombinatorIf>());

View File

@ -109,6 +109,10 @@ public:
{ {
return nested_func->isState(); return nested_func->isState();
} }
AggregateFunctionPtr getOwnNullAdapter(
const AggregateFunctionPtr & nested_function, const DataTypes & arguments,
const Array & params, const AggregateFunctionProperties & properties) const override;
}; };
} }

View File

@ -143,7 +143,7 @@ void LinearModelData::updateState()
void LinearModelData::predict( void LinearModelData::predict(
ColumnVector<Float64>::Container & container, ColumnVector<Float64>::Container & container,
ColumnsWithTypeAndName & arguments, const ColumnsWithTypeAndName & arguments,
size_t offset, size_t offset,
size_t limit, size_t limit,
const Context & context) const const Context & context) const
@ -264,8 +264,8 @@ void Adam::merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac)
average_gradient[i] = average_gradient[i] * frac + adam_rhs.average_gradient[i] * rhs_frac; average_gradient[i] = average_gradient[i] * frac + adam_rhs.average_gradient[i] * rhs_frac;
average_squared_gradient[i] = average_squared_gradient[i] * frac + adam_rhs.average_squared_gradient[i] * rhs_frac; average_squared_gradient[i] = average_squared_gradient[i] * frac + adam_rhs.average_squared_gradient[i] * rhs_frac;
} }
beta1_powered_ *= adam_rhs.beta1_powered_; beta1_powered *= adam_rhs.beta1_powered;
beta2_powered_ *= adam_rhs.beta2_powered_; beta2_powered *= adam_rhs.beta2_powered;
} }
void Adam::update(UInt64 batch_size, std::vector<Float64> & weights, Float64 & bias, Float64 learning_rate, const std::vector<Float64> & batch_gradient) void Adam::update(UInt64 batch_size, std::vector<Float64> & weights, Float64 & bias, Float64 learning_rate, const std::vector<Float64> & batch_gradient)
@ -282,21 +282,21 @@ void Adam::update(UInt64 batch_size, std::vector<Float64> & weights, Float64 & b
for (size_t i = 0; i != average_gradient.size(); ++i) for (size_t i = 0; i != average_gradient.size(); ++i)
{ {
Float64 normed_gradient = batch_gradient[i] / batch_size; Float64 normed_gradient = batch_gradient[i] / batch_size;
average_gradient[i] = beta1_ * average_gradient[i] + (1 - beta1_) * normed_gradient; average_gradient[i] = beta1 * average_gradient[i] + (1 - beta1) * normed_gradient;
average_squared_gradient[i] = beta2_ * average_squared_gradient[i] + average_squared_gradient[i] = beta2 * average_squared_gradient[i] +
(1 - beta2_) * normed_gradient * normed_gradient; (1 - beta2) * normed_gradient * normed_gradient;
} }
for (size_t i = 0; i < weights.size(); ++i) for (size_t i = 0; i < weights.size(); ++i)
{ {
weights[i] += (learning_rate * average_gradient[i]) / weights[i] += (learning_rate * average_gradient[i]) /
((1 - beta1_powered_) * (sqrt(average_squared_gradient[i] / (1 - beta2_powered_)) + eps_)); ((1 - beta1_powered) * (sqrt(average_squared_gradient[i] / (1 - beta2_powered)) + eps));
} }
bias += (learning_rate * average_gradient[weights.size()]) / bias += (learning_rate * average_gradient[weights.size()]) /
((1 - beta1_powered_) * (sqrt(average_squared_gradient[weights.size()] / (1 - beta2_powered_)) + eps_)); ((1 - beta1_powered) * (sqrt(average_squared_gradient[weights.size()] / (1 - beta2_powered)) + eps));
beta1_powered_ *= beta1_; beta1_powered *= beta1;
beta2_powered_ *= beta2_; beta2_powered *= beta2;
} }
void Adam::addToBatch( void Adam::addToBatch(
@ -348,7 +348,7 @@ void Nesterov::update(UInt64 batch_size, std::vector<Float64> & weights, Float64
for (size_t i = 0; i < batch_gradient.size(); ++i) for (size_t i = 0; i < batch_gradient.size(); ++i)
{ {
accumulated_gradient[i] = accumulated_gradient[i] * alpha_ + (learning_rate * batch_gradient[i]) / batch_size; accumulated_gradient[i] = accumulated_gradient[i] * alpha + (learning_rate * batch_gradient[i]) / batch_size;
} }
for (size_t i = 0; i < weights.size(); ++i) for (size_t i = 0; i < weights.size(); ++i)
{ {
@ -375,9 +375,9 @@ void Nesterov::addToBatch(
std::vector<Float64> shifted_weights(weights.size()); std::vector<Float64> shifted_weights(weights.size());
for (size_t i = 0; i != shifted_weights.size(); ++i) for (size_t i = 0; i != shifted_weights.size(); ++i)
{ {
shifted_weights[i] = weights[i] + accumulated_gradient[i] * alpha_; shifted_weights[i] = weights[i] + accumulated_gradient[i] * alpha;
} }
auto shifted_bias = bias + accumulated_gradient[weights.size()] * alpha_; auto shifted_bias = bias + accumulated_gradient[weights.size()] * alpha;
gradient_computer.compute(batch_gradient, shifted_weights, shifted_bias, l2_reg_coef, target, columns, row_num); gradient_computer.compute(batch_gradient, shifted_weights, shifted_bias, l2_reg_coef, target, columns, row_num);
} }
@ -411,7 +411,7 @@ void Momentum::update(UInt64 batch_size, std::vector<Float64> & weights, Float64
for (size_t i = 0; i < batch_gradient.size(); ++i) for (size_t i = 0; i < batch_gradient.size(); ++i)
{ {
accumulated_gradient[i] = accumulated_gradient[i] * alpha_ + (learning_rate * batch_gradient[i]) / batch_size; accumulated_gradient[i] = accumulated_gradient[i] * alpha + (learning_rate * batch_gradient[i]) / batch_size;
} }
for (size_t i = 0; i < weights.size(); ++i) for (size_t i = 0; i < weights.size(); ++i)
{ {
@ -448,7 +448,7 @@ void IWeightsUpdater::addToBatch(
void LogisticRegression::predict( void LogisticRegression::predict(
ColumnVector<Float64>::Container & container, ColumnVector<Float64>::Container & container,
ColumnsWithTypeAndName & arguments, const ColumnsWithTypeAndName & arguments,
size_t offset, size_t offset,
size_t limit, size_t limit,
const std::vector<Float64> & weights, const std::vector<Float64> & weights,
@ -516,7 +516,7 @@ void LogisticRegression::compute(
void LinearRegression::predict( void LinearRegression::predict(
ColumnVector<Float64>::Container & container, ColumnVector<Float64>::Container & container,
ColumnsWithTypeAndName & arguments, const ColumnsWithTypeAndName & arguments,
size_t offset, size_t offset,
size_t limit, size_t limit,
const std::vector<Float64> & weights, const std::vector<Float64> & weights,

View File

@ -23,7 +23,7 @@ GradientComputer class computes gradient according to its loss function
class IGradientComputer class IGradientComputer
{ {
public: public:
IGradientComputer() {} IGradientComputer() = default;
virtual ~IGradientComputer() = default; virtual ~IGradientComputer() = default;
@ -39,7 +39,7 @@ public:
virtual void predict( virtual void predict(
ColumnVector<Float64>::Container & container, ColumnVector<Float64>::Container & container,
ColumnsWithTypeAndName & arguments, const ColumnsWithTypeAndName & arguments,
size_t offset, size_t offset,
size_t limit, size_t limit,
const std::vector<Float64> & weights, const std::vector<Float64> & weights,
@ -51,7 +51,7 @@ public:
class LinearRegression : public IGradientComputer class LinearRegression : public IGradientComputer
{ {
public: public:
LinearRegression() {} LinearRegression() = default;
void compute( void compute(
std::vector<Float64> & batch_gradient, std::vector<Float64> & batch_gradient,
@ -64,7 +64,7 @@ public:
void predict( void predict(
ColumnVector<Float64>::Container & container, ColumnVector<Float64>::Container & container,
ColumnsWithTypeAndName & arguments, const ColumnsWithTypeAndName & arguments,
size_t offset, size_t offset,
size_t limit, size_t limit,
const std::vector<Float64> & weights, const std::vector<Float64> & weights,
@ -76,7 +76,7 @@ public:
class LogisticRegression : public IGradientComputer class LogisticRegression : public IGradientComputer
{ {
public: public:
LogisticRegression() {} LogisticRegression() = default;
void compute( void compute(
std::vector<Float64> & batch_gradient, std::vector<Float64> & batch_gradient,
@ -89,7 +89,7 @@ public:
void predict( void predict(
ColumnVector<Float64>::Container & container, ColumnVector<Float64>::Container & container,
ColumnsWithTypeAndName & arguments, const ColumnsWithTypeAndName & arguments,
size_t offset, size_t offset,
size_t limit, size_t limit,
const std::vector<Float64> & weights, const std::vector<Float64> & weights,
@ -147,9 +147,9 @@ public:
class Momentum : public IWeightsUpdater class Momentum : public IWeightsUpdater
{ {
public: public:
Momentum() {} Momentum() = default;
Momentum(Float64 alpha) : alpha_(alpha) {} explicit Momentum(Float64 alpha_) : alpha(alpha_) {}
void update(UInt64 batch_size, std::vector<Float64> & weights, Float64 & bias, Float64 learning_rate, const std::vector<Float64> & batch_gradient) override; void update(UInt64 batch_size, std::vector<Float64> & weights, Float64 & bias, Float64 learning_rate, const std::vector<Float64> & batch_gradient) override;
@ -160,7 +160,7 @@ public:
void read(ReadBuffer & buf) override; void read(ReadBuffer & buf) override;
private: private:
Float64 alpha_{0.1}; Float64 alpha{0.1};
std::vector<Float64> accumulated_gradient; std::vector<Float64> accumulated_gradient;
}; };
@ -168,9 +168,9 @@ private:
class Nesterov : public IWeightsUpdater class Nesterov : public IWeightsUpdater
{ {
public: public:
Nesterov() {} Nesterov() = default;
Nesterov(Float64 alpha) : alpha_(alpha) {} explicit Nesterov(Float64 alpha_) : alpha(alpha_) {}
void addToBatch( void addToBatch(
std::vector<Float64> & batch_gradient, std::vector<Float64> & batch_gradient,
@ -191,7 +191,7 @@ public:
void read(ReadBuffer & buf) override; void read(ReadBuffer & buf) override;
private: private:
const Float64 alpha_ = 0.9; const Float64 alpha = 0.9;
std::vector<Float64> accumulated_gradient; std::vector<Float64> accumulated_gradient;
}; };
@ -201,8 +201,8 @@ class Adam : public IWeightsUpdater
public: public:
Adam() Adam()
{ {
beta1_powered_ = beta1_; beta1_powered = beta1;
beta2_powered_ = beta2_; beta2_powered = beta2;
} }
void addToBatch( void addToBatch(
@ -225,11 +225,11 @@ public:
private: private:
/// beta1 and beta2 hyperparameters have such recommended values /// beta1 and beta2 hyperparameters have such recommended values
const Float64 beta1_ = 0.9; const Float64 beta1 = 0.9;
const Float64 beta2_ = 0.999; const Float64 beta2 = 0.999;
const Float64 eps_ = 0.000001; const Float64 eps = 0.000001;
Float64 beta1_powered_; Float64 beta1_powered;
Float64 beta2_powered_; Float64 beta2_powered;
std::vector<Float64> average_gradient; std::vector<Float64> average_gradient;
std::vector<Float64> average_squared_gradient; std::vector<Float64> average_squared_gradient;
@ -241,7 +241,7 @@ private:
class LinearModelData class LinearModelData
{ {
public: public:
LinearModelData() {} LinearModelData() = default;
LinearModelData( LinearModelData(
Float64 learning_rate_, Float64 learning_rate_,
@ -261,7 +261,7 @@ public:
void predict( void predict(
ColumnVector<Float64>::Container & container, ColumnVector<Float64>::Container & container,
ColumnsWithTypeAndName & arguments, const ColumnsWithTypeAndName & arguments,
size_t offset, size_t offset,
size_t limit, size_t limit,
const Context & context) const; const Context & context) const;
@ -360,7 +360,7 @@ public:
void predictValues( void predictValues(
ConstAggregateDataPtr place, ConstAggregateDataPtr place,
IColumn & to, IColumn & to,
ColumnsWithTypeAndName & arguments, const ColumnsWithTypeAndName & arguments,
size_t offset, size_t offset,
size_t limit, size_t limit,
const Context & context) const override const Context & context) const override

View File

@ -72,7 +72,7 @@ public:
assert(nested_function); assert(nested_function);
if (auto adapter = nested_function->getOwnNullAdapter(nested_function, arguments, params)) if (auto adapter = nested_function->getOwnNullAdapter(nested_function, arguments, params, properties))
return adapter; return adapter;
/// If applied to aggregate function with -State combinator, we apply -Null combinator to it's nested_function instead of itself. /// If applied to aggregate function with -State combinator, we apply -Null combinator to it's nested_function instead of itself.

View File

@ -239,7 +239,8 @@ public:
} }
AggregateFunctionPtr getOwnNullAdapter( AggregateFunctionPtr getOwnNullAdapter(
const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array & params) const override const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array & params,
const AggregateFunctionProperties & /*properties*/) const override
{ {
return std::make_shared<AggregateFunctionNullVariadic<false, false, false>>(nested_function, arguments, params); return std::make_shared<AggregateFunctionNullVariadic<false, false, false>>(nested_function, arguments, params);
} }

View File

@ -33,6 +33,7 @@ using ConstAggregateDataPtr = const char *;
class IAggregateFunction; class IAggregateFunction;
using AggregateFunctionPtr = std::shared_ptr<IAggregateFunction>; using AggregateFunctionPtr = std::shared_ptr<IAggregateFunction>;
struct AggregateFunctionProperties;
/** Aggregate functions interface. /** Aggregate functions interface.
* Instances of classes with this interface do not contain the data itself for aggregation, * Instances of classes with this interface do not contain the data itself for aggregation,
@ -60,7 +61,7 @@ public:
throw Exception("Prediction is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); throw Exception("Prediction is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
} }
virtual ~IAggregateFunction() {} virtual ~IAggregateFunction() = default;
/** Data manipulating functions. */ /** Data manipulating functions. */
@ -113,7 +114,7 @@ public:
virtual void predictValues( virtual void predictValues(
ConstAggregateDataPtr /* place */, ConstAggregateDataPtr /* place */,
IColumn & /*to*/, IColumn & /*to*/,
ColumnsWithTypeAndName & /*arguments*/, const ColumnsWithTypeAndName & /*arguments*/,
size_t /*offset*/, size_t /*offset*/,
size_t /*limit*/, size_t /*limit*/,
const Context & /*context*/) const const Context & /*context*/) const
@ -185,7 +186,8 @@ public:
* arguments and params are for nested_function. * arguments and params are for nested_function.
*/ */
virtual AggregateFunctionPtr getOwnNullAdapter( virtual AggregateFunctionPtr getOwnNullAdapter(
const AggregateFunctionPtr & /*nested_function*/, const DataTypes & /*arguments*/, const Array & /*params*/) const const AggregateFunctionPtr & /*nested_function*/, const DataTypes & /*arguments*/,
const Array & /*params*/, const AggregateFunctionProperties & /*properties*/) const
{ {
return nullptr; return nullptr;
} }

View File

@ -14,6 +14,7 @@ namespace DB
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int TOO_LARGE_ARRAY_SIZE; extern const int TOO_LARGE_ARRAY_SIZE;
extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
} }
@ -36,10 +37,11 @@ namespace ErrorCodes
* uses asin, which slows down the algorithm a bit. * uses asin, which slows down the algorithm a bit.
*/ */
template <typename T> template <typename T>
class TDigest class QuantileTDigest
{ {
using Value = Float32; using Value = Float32;
using Count = Float32; using Count = Float32;
using BetterFloat = Float64; // For intermediate results and sum(Count). Must have better precision, than Count
/** The centroid stores the weight of points around their mean value /** The centroid stores the weight of points around their mean value
*/ */
@ -55,13 +57,6 @@ class TDigest
, count(count_) , count(count_)
{} {}
Centroid & operator+=(const Centroid & other)
{
count += other.count;
mean += other.count * (other.mean - mean) / count;
return *this;
}
bool operator<(const Centroid & other) const bool operator<(const Centroid & other) const
{ {
return mean < other.mean; return mean < other.mean;
@ -71,26 +66,42 @@ class TDigest
/** :param epsilon: value \delta from the article - error in the range /** :param epsilon: value \delta from the article - error in the range
* quantile 0.5 (default is 0.01, i.e. 1%) * quantile 0.5 (default is 0.01, i.e. 1%)
* if you change epsilon, you must also change max_centroids
* :param max_centroids: depends on epsilon, the better accuracy, the more centroids you need
* to describe data with this accuracy. Read article before changing.
* :param max_unmerged: when accumulating count of new points beyond this * :param max_unmerged: when accumulating count of new points beyond this
* value centroid compression is triggered * value centroid compression is triggered
* (default is 2048, the higher the value - the * (default is 2048, the higher the value - the
* more memory is required, but amortization of execution time increases) * more memory is required, but amortization of execution time increases)
* Change freely anytime.
*/ */
struct Params struct Params
{ {
Value epsilon = 0.01; Value epsilon = 0.01;
size_t max_centroids = 2048;
size_t max_unmerged = 2048; size_t max_unmerged = 2048;
}; };
/** max_centroids_deserialize should be >= all max_centroids ever used in production.
* This is security parameter, preventing allocation of too much centroids in deserialize, so can be relatively large.
*/
static constexpr size_t max_centroids_deserialize = 65536;
Params params; static constexpr Params params{};
/// The memory will be allocated to several elements at once, so that the state occupies 64 bytes. static constexpr size_t bytes_in_arena = 128 - sizeof(PODArray<Centroid>) - sizeof(BetterFloat) - sizeof(size_t); // If alignment is imperfect, sizeof(TDigest) will be more than naively expected
static constexpr size_t bytes_in_arena = 128 - sizeof(PODArray<Centroid>) - sizeof(Count) - sizeof(UInt32);
using Centroids = PODArrayWithStackMemory<Centroid, bytes_in_arena>; using Centroids = PODArrayWithStackMemory<Centroid, bytes_in_arena>;
Centroids centroids; Centroids centroids;
Count count = 0; BetterFloat count = 0;
UInt32 unmerged = 0; size_t unmerged = 0;
/** Linear interpolation at the point x on the line (x1, y1)..(x2, y2)
*/
static Value interpolate(Value x, Value x1, Value y1, Value x2, Value y2)
{
double k = (x - x1) / (x2 - x1);
return y1 + k * (y2 - y1);
}
struct RadixSortTraits struct RadixSortTraits
{ {
@ -111,15 +122,56 @@ class TDigest
}; };
/** Adds a centroid `c` to the digest /** Adds a centroid `c` to the digest
* centroid must be valid, validity is checked in add(), deserialize() and is maintained by compress()
*/ */
void addCentroid(const Centroid & c) void addCentroid(const Centroid & c)
{ {
centroids.push_back(c); centroids.push_back(c);
count += c.count; count += c.count;
++unmerged; ++unmerged;
if (unmerged >= params.max_unmerged) if (unmerged > params.max_unmerged)
compress(); compress();
} }
void compressBrute()
{
if (centroids.size() <= params.max_centroids)
return;
const size_t batch_size = (centroids.size() + params.max_centroids - 1) / params.max_centroids; // at least 2
auto l = centroids.begin();
auto r = std::next(l);
BetterFloat sum = 0;
BetterFloat l_mean = l->mean; // We have high-precision temporaries for numeric stability
BetterFloat l_count = l->count;
size_t batch_pos = 0;
for (;r != centroids.end(); ++r)
{
if (batch_pos < batch_size - 1)
{
/// The left column "eats" the right. Middle of the batch
l_count += r->count;
l_mean += r->count * (r->mean - l_mean) / l_count; // Symmetric algo (M1*C1 + M2*C2)/(C1+C2) is numerically better, but slower
l->mean = l_mean;
l->count = l_count;
batch_pos += 1;
}
else
{
// End of the batch, start the next one
sum += l->count; // Not l_count, otherwise actual sum of elements will be different
++l;
/// We skip all the values "eaten" earlier.
*l = *r;
l_mean = l->mean;
l_count = l->count;
batch_pos = 0;
}
}
count = sum + l_count; // Update count, it might be different due to += inaccuracy
centroids.resize(l - centroids.begin() + 1);
// Here centroids.size() <= params.max_centroids
}
public: public:
/** Performs compression of accumulated centroids /** Performs compression of accumulated centroids
@ -128,74 +180,92 @@ public:
*/ */
void compress() void compress()
{ {
if (unmerged > 0) if (unmerged > 0 || centroids.size() > params.max_centroids)
{ {
// unmerged > 0 implies centroids.size() > 0, hence *l is valid below
RadixSort<RadixSortTraits>::executeLSD(centroids.data(), centroids.size()); RadixSort<RadixSortTraits>::executeLSD(centroids.data(), centroids.size());
if (centroids.size() > 3) /// A pair of consecutive bars of the histogram.
auto l = centroids.begin();
auto r = std::next(l);
const BetterFloat count_epsilon_4 = count * params.epsilon * 4; // Compiler is unable to do this optimization
BetterFloat sum = 0;
BetterFloat l_mean = l->mean; // We have high-precision temporaries for numeric stability
BetterFloat l_count = l->count;
while (r != centroids.end())
{ {
/// A pair of consecutive bars of the histogram. if (l->mean == r->mean) // Perfect aggregation (fast). We compare l->mean, not l_mean, to avoid identical elements after compress
auto l = centroids.begin();
auto r = std::next(l);
Count sum = 0;
while (r != centroids.end())
{ {
// we use quantile which gives us the smallest error l_count += r->count;
l->count = l_count;
/// The ratio of the part of the histogram to l, including the half l to the entire histogram. That is, what level quantile in position l.
Value ql = (sum + l->count * 0.5) / count;
Value err = ql * (1 - ql);
/// The ratio of the portion of the histogram to l, including l and half r to the entire histogram. That is, what level is the quantile in position r.
Value qr = (sum + l->count + r->count * 0.5) / count;
Value err2 = qr * (1 - qr);
if (err > err2)
err = err2;
Value k = 4 * count * err * params.epsilon;
/** The ratio of the weight of the glued column pair to all values is not greater,
* than epsilon multiply by a certain quadratic coefficient, which in the median is 1 (4 * 1/2 * 1/2),
* and at the edges decreases and is approximately equal to the distance to the edge * 4.
*/
if (l->count + r->count <= k)
{
// it is possible to merge left and right
/// The left column "eats" the right.
*l += *r;
}
else
{
// not enough capacity, check the next pair
sum += l->count;
++l;
/// We skip all the values "eaten" earlier.
if (l != r)
*l = *r;
}
++r; ++r;
continue;
} }
// we use quantile which gives us the smallest error
/// At the end of the loop, all values to the right of l were "eaten". /// The ratio of the part of the histogram to l, including the half l to the entire histogram. That is, what level quantile in position l.
centroids.resize(l - centroids.begin() + 1); BetterFloat ql = (sum + l_count * 0.5) / count;
BetterFloat err = ql * (1 - ql);
/// The ratio of the portion of the histogram to l, including l and half r to the entire histogram. That is, what level is the quantile in position r.
BetterFloat qr = (sum + l_count + r->count * 0.5) / count;
BetterFloat err2 = qr * (1 - qr);
if (err > err2)
err = err2;
BetterFloat k = count_epsilon_4 * err;
/** The ratio of the weight of the glued column pair to all values is not greater,
* than epsilon multiply by a certain quadratic coefficient, which in the median is 1 (4 * 1/2 * 1/2),
* and at the edges decreases and is approximately equal to the distance to the edge * 4.
*/
if (l_count + r->count <= k)
{
// it is possible to merge left and right
/// The left column "eats" the right.
l_count += r->count;
l_mean += r->count * (r->mean - l_mean) / l_count; // Symmetric algo (M1*C1 + M2*C2)/(C1+C2) is numerically better, but slower
l->mean = l_mean;
l->count = l_count;
}
else
{
// not enough capacity, check the next pair
sum += l->count; // Not l_count, otherwise actual sum of elements will be different
++l;
/// We skip all the values "eaten" earlier.
if (l != r)
*l = *r;
l_mean = l->mean;
l_count = l->count;
}
++r;
} }
count = sum + l_count; // Update count, it might be different due to += inaccuracy
/// At the end of the loop, all values to the right of l were "eaten".
centroids.resize(l - centroids.begin() + 1);
unmerged = 0; unmerged = 0;
} }
// Ensures centroids.size() < max_centroids, independent of unprovable floating point blackbox above
compressBrute();
} }
/** Adds to the digest a change in `x` with a weight of `cnt` (default 1) /** Adds to the digest a change in `x` with a weight of `cnt` (default 1)
*/ */
void add(T x, UInt64 cnt = 1) void add(T x, UInt64 cnt = 1)
{ {
addCentroid(Centroid(Value(x), Count(cnt))); auto vx = static_cast<Value>(x);
if (cnt == 0 || std::isnan(vx))
return; // Count 0 breaks compress() assumptions, Nan breaks sort(). We treat them as no sample.
addCentroid(Centroid{vx, static_cast<Count>(cnt)});
} }
void merge(const TDigest & other) void merge(const QuantileTDigest & other)
{ {
for (const auto & c : other.centroids) for (const auto & c : other.centroids)
addCentroid(c); addCentroid(c);
@ -213,89 +283,23 @@ public:
size_t size = 0; size_t size = 0;
readVarUInt(size, buf); readVarUInt(size, buf);
if (size > params.max_unmerged) if (size > max_centroids_deserialize)
throw Exception("Too large t-digest centroids size", ErrorCodes::TOO_LARGE_ARRAY_SIZE); throw Exception("Too large t-digest centroids size", ErrorCodes::TOO_LARGE_ARRAY_SIZE);
centroids.resize(size);
buf.read(reinterpret_cast<char *>(centroids.data()), size * sizeof(centroids[0]));
count = 0;
for (const auto & c : centroids)
count += c.count;
}
Count getCount()
{
return count;
}
const Centroids & getCentroids() const
{
return centroids;
}
void reset()
{
centroids.resize(0);
count = 0; count = 0;
unmerged = 0; unmerged = 0;
}
};
template <typename T> centroids.resize(size);
class QuantileTDigest // From now, TDigest will be in invalid state if exception is thrown.
{ buf.read(reinterpret_cast<char *>(centroids.data()), size * sizeof(centroids[0]));
using Value = Float32;
using Count = Float32;
/** We store two t-digests. When an amount of elements in sub_tdigest become more than merge_threshold for (const auto & c : centroids)
* we merge sub_tdigest in main_tdigest and reset sub_tdigest. This method is needed to decrease an amount of {
* centroids in t-digest (experiments show that after merge_threshold the size of t-digest significantly grows, if (c.count <= 0 || std::isnan(c.count) || std::isnan(c.mean)) // invalid count breaks compress(), invalid mean breaks sort()
* but merging two big t-digest decreases it). throw Exception("Invalid centroid " + std::to_string(c.count) + ":" + std::to_string(c.mean), ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED);
*/ count += c.count;
TDigest<T> main_tdigest; }
TDigest<T> sub_tdigest; compress(); // Allows reading/writing TDigests with different epsilon/max_centroids params
size_t merge_threshold = 1e7;
/** Linear interpolation at the point x on the line (x1, y1)..(x2, y2)
*/
static Value interpolate(Value x, Value x1, Value y1, Value x2, Value y2)
{
double k = (x - x1) / (x2 - x1);
return y1 + k * (y2 - y1);
}
void mergeTDigests()
{
main_tdigest.merge(sub_tdigest);
sub_tdigest.reset();
}
public:
void add(T x, UInt64 cnt = 1)
{
if (sub_tdigest.getCount() >= merge_threshold)
mergeTDigests();
sub_tdigest.add(x, cnt);
}
void merge(const QuantileTDigest & other)
{
mergeTDigests();
main_tdigest.merge(other.main_tdigest);
main_tdigest.merge(other.sub_tdigest);
}
void serialize(WriteBuffer & buf)
{
mergeTDigests();
main_tdigest.serialize(buf);
}
void deserialize(ReadBuffer & buf)
{
sub_tdigest.reset();
main_tdigest.deserialize(buf);
} }
/** Calculates the quantile q [0, 1] based on the digest. /** Calculates the quantile q [0, 1] based on the digest.
@ -304,18 +308,15 @@ public:
template <typename ResultType> template <typename ResultType>
ResultType getImpl(Float64 level) ResultType getImpl(Float64 level)
{ {
mergeTDigests();
auto & centroids = main_tdigest.getCentroids();
if (centroids.empty()) if (centroids.empty())
return std::is_floating_point_v<ResultType> ? NAN : 0; return std::is_floating_point_v<ResultType> ? NAN : 0;
main_tdigest.compress(); compress();
if (centroids.size() == 1) if (centroids.size() == 1)
return centroids.front().mean; return centroids.front().mean;
Float64 x = level * main_tdigest.getCount(); Float64 x = level * count;
Float64 prev_x = 0; Float64 prev_x = 0;
Count sum = 0; Count sum = 0;
Value prev_mean = centroids.front().mean; Value prev_mean = centroids.front().mean;
@ -343,9 +344,6 @@ public:
template <typename ResultType> template <typename ResultType>
void getManyImpl(const Float64 * levels, const size_t * levels_permutation, size_t size, ResultType * result) void getManyImpl(const Float64 * levels, const size_t * levels_permutation, size_t size, ResultType * result)
{ {
mergeTDigests();
auto & centroids = main_tdigest.getCentroids();
if (centroids.empty()) if (centroids.empty())
{ {
for (size_t result_num = 0; result_num < size; ++result_num) for (size_t result_num = 0; result_num < size; ++result_num)
@ -353,7 +351,7 @@ public:
return; return;
} }
main_tdigest.compress(); compress();
if (centroids.size() == 1) if (centroids.size() == 1)
{ {
@ -362,7 +360,7 @@ public:
return; return;
} }
Float64 x = levels[levels_permutation[0]] * main_tdigest.getCount(); Float64 x = levels[levels_permutation[0]] * count;
Float64 prev_x = 0; Float64 prev_x = 0;
Count sum = 0; Count sum = 0;
Value prev_mean = centroids.front().mean; Value prev_mean = centroids.front().mean;
@ -380,7 +378,7 @@ public:
if (result_num >= size) if (result_num >= size)
return; return;
x = levels[levels_permutation[result_num]] * main_tdigest.getCount(); x = levels[levels_permutation[result_num]] * count;
} }
sum += c.count; sum += c.count;

View File

@ -1,4 +1,6 @@
# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it.
OWNER(g:clickhouse)
LIBRARY() LIBRARY()
PEERDIR( PEERDIR(

View File

@ -1,3 +1,5 @@
OWNER(g:clickhouse)
LIBRARY() LIBRARY()
PEERDIR( PEERDIR(

View File

@ -1,4 +1,6 @@
# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it.
OWNER(g:clickhouse)
LIBRARY() LIBRARY()
PEERDIR( PEERDIR(

View File

@ -1,3 +1,5 @@
OWNER(g:clickhouse)
LIBRARY() LIBRARY()
PEERDIR( PEERDIR(

View File

@ -161,7 +161,7 @@ MutableColumnPtr ColumnAggregateFunction::convertToValues(MutableColumnPtr colum
return res; return res;
} }
MutableColumnPtr ColumnAggregateFunction::predictValues(ColumnsWithTypeAndName & arguments, const Context & context) const MutableColumnPtr ColumnAggregateFunction::predictValues(const ColumnsWithTypeAndName & arguments, const Context & context) const
{ {
MutableColumnPtr res = func->getReturnTypeToPredict()->createColumn(); MutableColumnPtr res = func->getReturnTypeToPredict()->createColumn();
res->reserve(data.size()); res->reserve(data.size());

View File

@ -119,7 +119,7 @@ public:
const char * getFamilyName() const override { return "AggregateFunction"; } const char * getFamilyName() const override { return "AggregateFunction"; }
TypeIndex getDataType() const override { return TypeIndex::AggregateFunction; } TypeIndex getDataType() const override { return TypeIndex::AggregateFunction; }
MutableColumnPtr predictValues(ColumnsWithTypeAndName & arguments, const Context & context) const; MutableColumnPtr predictValues(const ColumnsWithTypeAndName & arguments, const Context & context) const;
size_t size() const override size_t size() const override
{ {

View File

@ -138,4 +138,12 @@ void ColumnConst::updateWeakHash32(WeakHash32 & hash) const
value = intHashCRC32(data_hash, value); value = intHashCRC32(data_hash, value);
} }
void ColumnConst::compareColumn(
const IColumn & rhs, size_t, PaddedPODArray<UInt64> *, PaddedPODArray<Int8> & compare_results, int, int nan_direction_hint)
const
{
Int8 res = compareAt(1, 1, rhs, nan_direction_hint);
std::fill(compare_results.begin(), compare_results.end(), res);
}
} }

View File

@ -199,11 +199,7 @@ public:
void compareColumn(const IColumn & rhs, size_t rhs_row_num, void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results, PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override int direction, int nan_direction_hint) const override;
{
return data->compareColumn(rhs, rhs_row_num, row_indexes,
compare_results, direction, nan_direction_hint);
}
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;

View File

@ -55,32 +55,16 @@ void ColumnDecimal<T>::compareColumn(const IColumn & rhs, size_t rhs_row_num,
template <typename T> template <typename T>
StringRef ColumnDecimal<T>::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const StringRef ColumnDecimal<T>::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
{ {
if constexpr (is_POD) auto * pos = arena.allocContinue(sizeof(T), begin);
{ memcpy(pos, &data[n], sizeof(T));
auto * pos = arena.allocContinue(sizeof(T), begin); return StringRef(pos, sizeof(T));
memcpy(pos, &data[n], sizeof(T));
return StringRef(pos, sizeof(T));
}
else
{
char * pos = arena.allocContinue(BigInt<T>::size, begin);
return BigInt<Int256>::serialize(data[n], pos);
}
} }
template <typename T> template <typename T>
const char * ColumnDecimal<T>::deserializeAndInsertFromArena(const char * pos) const char * ColumnDecimal<T>::deserializeAndInsertFromArena(const char * pos)
{ {
if constexpr (is_POD) data.push_back(unalignedLoad<T>(pos));
{ return pos + sizeof(T);
data.push_back(unalignedLoad<T>(pos));
return pos + sizeof(T);
}
else
{
data.push_back(BigInt<Int256>::deserialize(pos));
return pos + BigInt<Int256>::size;
}
} }
template <typename T> template <typename T>
@ -252,24 +236,13 @@ MutableColumnPtr ColumnDecimal<T>::cloneResized(size_t size) const
new_col.data.resize(size); new_col.data.resize(size);
size_t count = std::min(this->size(), size); size_t count = std::min(this->size(), size);
if constexpr (is_POD)
{
memcpy(new_col.data.data(), data.data(), count * sizeof(data[0]));
if (size > count) memcpy(new_col.data.data(), data.data(), count * sizeof(data[0]));
{
void * tail = &new_col.data[count];
memset(tail, 0, (size - count) * sizeof(T));
}
}
else
{
for (size_t i = 0; i < count; i++)
new_col.data[i] = data[i];
if (size > count) if (size > count)
for (size_t i = count; i < size; i++) {
new_col.data[i] = T{}; void * tail = &new_col.data[count];
memset(tail, 0, (size - count) * sizeof(T));
} }
} }
@ -279,16 +252,9 @@ MutableColumnPtr ColumnDecimal<T>::cloneResized(size_t size) const
template <typename T> template <typename T>
void ColumnDecimal<T>::insertData(const char * src, size_t /*length*/) void ColumnDecimal<T>::insertData(const char * src, size_t /*length*/)
{ {
if constexpr (is_POD) T tmp;
{ memcpy(&tmp, src, sizeof(T));
T tmp; data.emplace_back(tmp);
memcpy(&tmp, src, sizeof(T));
data.emplace_back(tmp);
}
else
{
data.push_back(BigInt<Int256>::deserialize(src));
}
} }
template <typename T> template <typename T>
@ -303,13 +269,8 @@ void ColumnDecimal<T>::insertRangeFrom(const IColumn & src, size_t start, size_t
size_t old_size = data.size(); size_t old_size = data.size();
data.resize(old_size + length); data.resize(old_size + length);
if constexpr (is_POD)
memcpy(data.data() + old_size, &src_vec.data[start], length * sizeof(data[0])); memcpy(data.data() + old_size, &src_vec.data[start], length * sizeof(data[0]));
else
{
for (size_t i = 0; i < length; i++)
data[old_size + i] = src_vec.data[start + i];
}
} }
template <typename T> template <typename T>

View File

@ -4,6 +4,7 @@
#include <Columns/IColumn.h> #include <Columns/IColumn.h>
#include <Columns/IColumnImpl.h> #include <Columns/IColumnImpl.h>
#include <Core/Field.h> #include <Core/Field.h>
#include <Core/DecimalFunctions.h>
#include <Common/typeid_cast.h> #include <Common/typeid_cast.h>
#include <common/sort.h> #include <common/sort.h>
@ -12,12 +13,6 @@
namespace DB namespace DB
{ {
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
/// PaddedPODArray extended by Decimal scale /// PaddedPODArray extended by Decimal scale
template <typename T> template <typename T>
class DecimalPaddedPODArray : public PaddedPODArray<T> class DecimalPaddedPODArray : public PaddedPODArray<T>
@ -55,43 +50,6 @@ private:
UInt32 scale; UInt32 scale;
}; };
/// std::vector extended by Decimal scale
template <typename T>
class DecimalVector : public std::vector<T>
{
public:
using Base = std::vector<T>;
using Base::operator[];
DecimalVector(size_t size, UInt32 scale_)
: Base(size),
scale(scale_)
{}
DecimalVector(const DecimalVector & other)
: Base(other.begin(), other.end()),
scale(other.scale)
{}
DecimalVector(DecimalVector && other)
{
this->swap(other);
std::swap(scale, other.scale);
}
DecimalVector & operator=(DecimalVector && other)
{
this->swap(other);
std::swap(scale, other.scale);
return *this;
}
UInt32 getScale() const { return scale; }
private:
UInt32 scale;
};
/// A ColumnVector for Decimals /// A ColumnVector for Decimals
template <typename T> template <typename T>
class ColumnDecimal final : public COWHelper<ColumnVectorHelper, ColumnDecimal<T>> class ColumnDecimal final : public COWHelper<ColumnVectorHelper, ColumnDecimal<T>>
@ -105,10 +63,7 @@ private:
public: public:
using ValueType = T; using ValueType = T;
using NativeT = typename T::NativeType; using NativeT = typename T::NativeType;
static constexpr bool is_POD = !is_big_int_v<NativeT>; using Container = DecimalPaddedPODArray<T>;
using Container = std::conditional_t<is_POD,
DecimalPaddedPODArray<T>,
DecimalVector<T>>;
private: private:
ColumnDecimal(const size_t n, UInt32 scale_) ColumnDecimal(const size_t n, UInt32 scale_)
@ -132,18 +87,8 @@ public:
size_t size() const override { return data.size(); } size_t size() const override { return data.size(); }
size_t byteSize() const override { return data.size() * sizeof(data[0]); } size_t byteSize() const override { return data.size() * sizeof(data[0]); }
size_t allocatedBytes() const override size_t allocatedBytes() const override { return data.allocated_bytes(); }
{ void protect() override { data.protect(); }
if constexpr (is_POD)
return data.allocated_bytes();
else
return data.capacity() * sizeof(data[0]);
}
void protect() override
{
if constexpr (is_POD)
data.protect();
}
void reserve(size_t n) override { data.reserve(n); } void reserve(size_t n) override { data.reserve(n); }
void insertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast<const Self &>(src).getData()[n]); } void insertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast<const Self &>(src).getData()[n]); }
@ -151,38 +96,28 @@ public:
void insertDefault() override { data.push_back(T()); } void insertDefault() override { data.push_back(T()); }
virtual void insertManyDefaults(size_t length) override virtual void insertManyDefaults(size_t length) override
{ {
if constexpr (is_POD) data.resize_fill(data.size() + length);
data.resize_fill(data.size() + length);
else
data.resize(data.size() + length);
} }
void insert(const Field & x) override { data.push_back(DB::get<NearestFieldType<T>>(x)); } void insert(const Field & x) override { data.push_back(DB::get<NearestFieldType<T>>(x)); }
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
void popBack(size_t n) override void popBack(size_t n) override
{ {
if constexpr (is_POD) data.resize_assume_reserved(data.size() - n);
data.resize_assume_reserved(data.size() - n);
else
data.resize(data.size() - n);
} }
StringRef getRawData() const override StringRef getRawData() const override
{ {
if constexpr (is_POD) return StringRef(reinterpret_cast<const char*>(data.data()), byteSize());
return StringRef(reinterpret_cast<const char*>(data.data()), byteSize());
else
throw Exception("getRawData() is not implemented for big integers", ErrorCodes::NOT_IMPLEMENTED);
} }
StringRef getDataAt(size_t n) const override StringRef getDataAt(size_t n) const override
{ {
if constexpr (is_POD) return StringRef(reinterpret_cast<const char *>(&data[n]), sizeof(data[n]));
return StringRef(reinterpret_cast<const char *>(&data[n]), sizeof(data[n]));
else
throw Exception("getDataAt() is not implemented for big integers", ErrorCodes::NOT_IMPLEMENTED);
} }
Float64 getFloat64(size_t n) const final { return DecimalUtils::convertTo<Float64>(data[n], scale); }
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
const char * deserializeAndInsertFromArena(const char * pos) override; const char * deserializeAndInsertFromArena(const char * pos) override;
void updateHashWithValue(size_t n, SipHash & hash) const override; void updateHashWithValue(size_t n, SipHash & hash) const override;

View File

@ -1,4 +1,6 @@
# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it.
OWNER(g:clickhouse)
LIBRARY() LIBRARY()
ADDINCL( ADDINCL(

View File

@ -1,3 +1,5 @@
OWNER(g:clickhouse)
LIBRARY() LIBRARY()
ADDINCL( ADDINCL(

View File

@ -519,9 +519,11 @@
M(550, CONDITIONAL_TREE_PARENT_NOT_FOUND) \ M(550, CONDITIONAL_TREE_PARENT_NOT_FOUND) \
M(551, ILLEGAL_PROJECTION_MANIPULATOR) \ M(551, ILLEGAL_PROJECTION_MANIPULATOR) \
M(552, UNRECOGNIZED_ARGUMENTS) \ M(552, UNRECOGNIZED_ARGUMENTS) \
M(553, ROCKSDB_ERROR) \
M(553, LZMA_STREAM_ENCODER_FAILED) \ M(553, LZMA_STREAM_ENCODER_FAILED) \
M(554, LZMA_STREAM_DECODER_FAILED) \ M(554, LZMA_STREAM_DECODER_FAILED) \
M(555, ROCKSDB_ERROR) \
M(556, SYNC_MYSQL_USER_ACCESS_ERROR)\
\
M(999, KEEPER_EXCEPTION) \ M(999, KEEPER_EXCEPTION) \
M(1000, POCO_EXCEPTION) \ M(1000, POCO_EXCEPTION) \
M(1001, STD_EXCEPTION) \ M(1001, STD_EXCEPTION) \

View File

@ -11,6 +11,7 @@
#include <cstdint> #include <cstdint>
#include <cassert> #include <cassert>
#include <type_traits> #include <type_traits>
#include <memory>
#include <ext/bit_cast.h> #include <ext/bit_cast.h>
#include <common/extended_types.h> #include <common/extended_types.h>

View File

@ -511,19 +511,30 @@ void TestKeeper::processingThread()
if (expired) if (expired)
break; break;
if (info.watch)
{
auto & watches_type = dynamic_cast<const ListRequest *>(info.request.get())
? list_watches
: watches;
watches_type[info.request->getPath()].emplace_back(std::move(info.watch));
}
++zxid; ++zxid;
info.request->addRootPath(root_path); info.request->addRootPath(root_path);
auto [response, _] = info.request->process(container, zxid); auto [response, _] = info.request->process(container, zxid);
if (info.watch)
{
/// To be compatible with real ZooKeeper we add watch if request was successful (i.e. node exists)
/// or if it was exists request which allows to add watches for non existing nodes.
if (response->error == Error::ZOK)
{
auto & watches_type = dynamic_cast<const ListRequest *>(info.request.get())
? list_watches
: watches;
watches_type[info.request->getPath()].emplace_back(std::move(info.watch));
}
else if (response->error == Error::ZNONODE && dynamic_cast<const ExistsRequest *>(info.request.get()))
{
watches[info.request->getPath()].emplace_back(std::move(info.watch));
}
}
if (response->error == Error::ZOK) if (response->error == Error::ZOK)
info.request->processWatches(watches, list_watches); info.request->processWatches(watches, list_watches);

View File

@ -1,4 +1,6 @@
# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it.
OWNER(g:clickhouse)
LIBRARY() LIBRARY()
ADDINCL ( ADDINCL (

View File

@ -1,3 +1,5 @@
OWNER(g:clickhouse)
LIBRARY() LIBRARY()
ADDINCL ( ADDINCL (

View File

@ -1,4 +1,6 @@
# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it.
OWNER(g:clickhouse)
LIBRARY() LIBRARY()
ADDINCL( ADDINCL(

View File

@ -1,3 +1,5 @@
OWNER(g:clickhouse)
LIBRARY() LIBRARY()
ADDINCL( ADDINCL(

View File

@ -57,6 +57,7 @@ public:
using Op = Operation<CompareInt, CompareInt>; using Op = Operation<CompareInt, CompareInt>;
using ColVecA = std::conditional_t<IsDecimalNumber<A>, ColumnDecimal<A>, ColumnVector<A>>; using ColVecA = std::conditional_t<IsDecimalNumber<A>, ColumnDecimal<A>, ColumnVector<A>>;
using ColVecB = std::conditional_t<IsDecimalNumber<B>, ColumnDecimal<B>, ColumnVector<B>>; using ColVecB = std::conditional_t<IsDecimalNumber<B>, ColumnDecimal<B>, ColumnVector<B>>;
using ArrayA = typename ColVecA::Container; using ArrayA = typename ColVecA::Container;
using ArrayB = typename ColVecB::Container; using ArrayB = typename ColVecB::Container;

View File

@ -6,6 +6,7 @@
#include <Core/MySQL/PacketsProtocolText.h> #include <Core/MySQL/PacketsProtocolText.h>
#include <Core/MySQL/PacketsReplication.h> #include <Core/MySQL/PacketsReplication.h>
#include <Core/MySQL/MySQLReplication.h> #include <Core/MySQL/MySQLReplication.h>
#include <Poco/String.h>
namespace DB namespace DB
{ {
@ -132,11 +133,19 @@ void MySQLClient::ping()
writeCommand(Command::COM_PING, ""); writeCommand(Command::COM_PING, "");
} }
void MySQLClient::startBinlogDumpGTID(UInt32 slave_id, String replicate_db, String gtid_str) void MySQLClient::setBinlogChecksum(const String & binlog_checksum)
{ {
/// Set binlog checksum to CRC32. replication.setChecksumSignatureLength(Poco::toUpper(binlog_checksum) == "NONE" ? 0 : 4);
String checksum = "CRC32"; }
writeCommand(Command::COM_QUERY, "SET @master_binlog_checksum = '" + checksum + "'");
void MySQLClient::startBinlogDumpGTID(UInt32 slave_id, String replicate_db, String gtid_str, const String & binlog_checksum)
{
/// Maybe CRC32 or NONE. mysqlbinlog.cc use NONE, see its below comments:
/// Make a notice to the server that this client is checksum-aware.
/// It does not need the first fake Rotate necessary checksummed.
writeCommand(Command::COM_QUERY, "SET @master_binlog_checksum = 'CRC32'");
setBinlogChecksum(binlog_checksum);
/// Set heartbeat 1s. /// Set heartbeat 1s.
UInt64 period_ns = (1 * 1e9); UInt64 period_ns = (1 * 1e9);

View File

@ -29,10 +29,12 @@ public:
void disconnect(); void disconnect();
void ping(); void ping();
void setBinlogChecksum(const String & binlog_checksum);
/// Start replication stream by GTID. /// Start replication stream by GTID.
/// replicate_db: replication database schema, events from other databases will be ignored. /// replicate_db: replication database schema, events from other databases will be ignored.
/// gtid: executed gtid sets format like 'hhhhhhhh-hhhh-hhhh-hhhh-hhhhhhhhhhhh:x-y'. /// gtid: executed gtid sets format like 'hhhhhhhh-hhhh-hhhh-hhhh-hhhhhhhhhhhh:x-y'.
void startBinlogDumpGTID(UInt32 slave_id, String replicate_db, String gtid); void startBinlogDumpGTID(UInt32 slave_id, String replicate_db, String gtid, const String & binlog_checksum);
BinlogEventPtr readOneBinlogEvent(UInt64 milliseconds = 0); BinlogEventPtr readOneBinlogEvent(UInt64 milliseconds = 0);
Position getPosition() const { return replication.getPosition(); } Position getPosition() const { return replication.getPosition(); }

View File

@ -57,7 +57,6 @@ namespace MySQLReplication
payload.readStrict(reinterpret_cast<char *>(&create_timestamp), 4); payload.readStrict(reinterpret_cast<char *>(&create_timestamp), 4);
payload.readStrict(reinterpret_cast<char *>(&event_header_length), 1); payload.readStrict(reinterpret_cast<char *>(&event_header_length), 1);
assert(event_header_length == EVENT_HEADER_LENGTH); assert(event_header_length == EVENT_HEADER_LENGTH);
readStringUntilEOF(event_type_header_length, payload); readStringUntilEOF(event_type_header_length, payload);
} }
@ -745,7 +744,7 @@ namespace MySQLReplication
// skip the generic response packets header flag. // skip the generic response packets header flag.
payload.ignore(1); payload.ignore(1);
MySQLBinlogEventReadBuffer event_payload(payload); MySQLBinlogEventReadBuffer event_payload(payload, checksum_signature_length);
EventHeader event_header; EventHeader event_header;
event_header.parse(event_payload); event_header.parse(event_payload);

View File

@ -526,6 +526,8 @@ namespace MySQLReplication
virtual BinlogEventPtr readOneEvent() = 0; virtual BinlogEventPtr readOneEvent() = 0;
virtual void setReplicateDatabase(String db) = 0; virtual void setReplicateDatabase(String db) = 0;
virtual void setGTIDSets(GTIDSets sets) = 0; virtual void setGTIDSets(GTIDSets sets) = 0;
virtual void setChecksumSignatureLength(size_t checksum_signature_length_) = 0;
virtual ~IFlavor() override = default; virtual ~IFlavor() override = default;
}; };
@ -538,12 +540,14 @@ namespace MySQLReplication
BinlogEventPtr readOneEvent() override { return event; } BinlogEventPtr readOneEvent() override { return event; }
void setReplicateDatabase(String db) override { replicate_do_db = std::move(db); } void setReplicateDatabase(String db) override { replicate_do_db = std::move(db); }
void setGTIDSets(GTIDSets sets) override { position.gtid_sets = std::move(sets); } void setGTIDSets(GTIDSets sets) override { position.gtid_sets = std::move(sets); }
void setChecksumSignatureLength(size_t checksum_signature_length_) override { checksum_signature_length = checksum_signature_length_; }
private: private:
Position position; Position position;
BinlogEventPtr event; BinlogEventPtr event;
String replicate_do_db; String replicate_do_db;
std::shared_ptr<TableMapEvent> table_map; std::shared_ptr<TableMapEvent> table_map;
size_t checksum_signature_length = 4;
inline bool do_replicate() { return (replicate_do_db.empty() || table_map->schema == replicate_do_db); } inline bool do_replicate() { return (replicate_do_db.empty() || table_map->schema == replicate_do_db); }
}; };

View File

@ -441,6 +441,8 @@ class IColumn;
M(Bool, output_format_json_quote_denormals, false, "Enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format.", 0) \ M(Bool, output_format_json_quote_denormals, false, "Enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format.", 0) \
\ \
M(Bool, output_format_json_escape_forward_slashes, true, "Controls escaping forward slashes for string outputs in JSON output format. This is intended for compatibility with JavaScript. Don't confuse with backslashes that are always escaped.", 0) \ M(Bool, output_format_json_escape_forward_slashes, true, "Controls escaping forward slashes for string outputs in JSON output format. This is intended for compatibility with JavaScript. Don't confuse with backslashes that are always escaped.", 0) \
M(Bool, output_format_json_named_tuples_as_objects, false, "Serialize named tuple columns as JSON objects.", 0) \
M(Bool, output_format_json_array_of_rows, false, "Output a JSON array of all rows in JSONEachRow(Compact) format.", 0) \
\ \
M(UInt64, output_format_pretty_max_rows, 10000, "Rows limit for Pretty formats.", 0) \ M(UInt64, output_format_pretty_max_rows, 10000, "Rows limit for Pretty formats.", 0) \
M(UInt64, output_format_pretty_max_column_pad_width, 250, "Maximum width to pad all values in a column in Pretty formats.", 0) \ M(UInt64, output_format_pretty_max_column_pad_width, 250, "Maximum width to pad all values in a column in Pretty formats.", 0) \
@ -517,7 +519,7 @@ struct Settings : public BaseSettings<SettingsTraits>
}; };
/* /*
* User-specified file format settings for File and ULR engines. * User-specified file format settings for File and URL engines.
*/ */
DECLARE_SETTINGS_TRAITS(FormatFactorySettingsTraits, FORMAT_FACTORY_SETTINGS) DECLARE_SETTINGS_TRAITS(FormatFactorySettingsTraits, FORMAT_FACTORY_SETTINGS)

View File

@ -145,7 +145,7 @@ struct Decimal
operator T () const { return value; } operator T () const { return value; }
template <typename U> template <typename U>
U convertTo() U convertTo() const
{ {
/// no IsDecimalNumber defined yet /// no IsDecimalNumber defined yet
if constexpr (std::is_same_v<U, Decimal<Int32>> || if constexpr (std::is_same_v<U, Decimal<Int32>> ||

View File

@ -304,7 +304,8 @@ int main(int argc, char ** argv)
"user", boost::program_options::value<std::string>()->default_value("root"), "master user")( "user", boost::program_options::value<std::string>()->default_value("root"), "master user")(
"password", boost::program_options::value<std::string>()->required(), "master password")( "password", boost::program_options::value<std::string>()->required(), "master password")(
"gtid", boost::program_options::value<std::string>()->default_value(""), "executed GTID sets")( "gtid", boost::program_options::value<std::string>()->default_value(""), "executed GTID sets")(
"db", boost::program_options::value<std::string>()->required(), "replicate do db"); "db", boost::program_options::value<std::string>()->required(), "replicate do db")(
"binlog_checksum", boost::program_options::value<std::string>()->default_value("CRC32"), "master binlog_checksum");
boost::program_options::variables_map options; boost::program_options::variables_map options;
boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options); boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options);
@ -319,6 +320,7 @@ int main(int argc, char ** argv)
auto master_password = options.at("password").as<DB::String>(); auto master_password = options.at("password").as<DB::String>();
auto gtid_sets = options.at("gtid").as<DB::String>(); auto gtid_sets = options.at("gtid").as<DB::String>();
auto replicate_db = options.at("db").as<DB::String>(); auto replicate_db = options.at("db").as<DB::String>();
auto binlog_checksum = options.at("binlog_checksum").as<String>();
std::cerr << "Master Host: " << host << ", Port: " << port << ", User: " << master_user << ", Password: " << master_password std::cerr << "Master Host: " << host << ", Port: " << port << ", User: " << master_user << ", Password: " << master_password
<< ", Replicate DB: " << replicate_db << ", GTID: " << gtid_sets << std::endl; << ", Replicate DB: " << replicate_db << ", GTID: " << gtid_sets << std::endl;
@ -328,7 +330,7 @@ int main(int argc, char ** argv)
/// Connect to the master. /// Connect to the master.
slave.connect(); slave.connect();
slave.startBinlogDumpGTID(slave_id, replicate_db, gtid_sets); slave.startBinlogDumpGTID(slave_id, replicate_db, gtid_sets, binlog_checksum);
WriteBufferFromOStream cerr(std::cerr); WriteBufferFromOStream cerr(std::cerr);

View File

@ -1,4 +1,6 @@
# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it.
OWNER(g:clickhouse)
LIBRARY() LIBRARY()
PEERDIR( PEERDIR(

View File

@ -1,3 +1,5 @@
OWNER(g:clickhouse)
LIBRARY() LIBRARY()
PEERDIR( PEERDIR(

View File

@ -17,7 +17,7 @@ ParallelParsingBlockInputStream::ParallelParsingBlockInputStream(const Params &
// Subtract one thread that we use for segmentation and one for // Subtract one thread that we use for segmentation and one for
// reading. After that, must have at least two threads left for // reading. After that, must have at least two threads left for
// parsing. See the assertion below. // parsing. See the assertion below.
pool(std::max(2, params.max_threads - 2)), pool(std::max(2, static_cast<int>(params.max_threads) - 2)),
file_segmentation_engine(params.file_segmentation_engine) file_segmentation_engine(params.file_segmentation_engine)
{ {
// See comment above. // See comment above.

View File

@ -69,7 +69,7 @@ public:
const InputProcessorCreator & input_processor_creator; const InputProcessorCreator & input_processor_creator;
const InputCreatorParams & input_creator_params; const InputCreatorParams & input_creator_params;
FormatFactory::FileSegmentationEngine file_segmentation_engine; FormatFactory::FileSegmentationEngine file_segmentation_engine;
int max_threads; size_t max_threads;
size_t min_chunk_bytes; size_t min_chunk_bytes;
}; };

View File

@ -10,6 +10,7 @@
#include <Common/CurrentThread.h> #include <Common/CurrentThread.h>
#include <Common/setThreadName.h> #include <Common/setThreadName.h>
#include <Common/ThreadPool.h> #include <Common/ThreadPool.h>
#include <Common/checkStackSize.h>
#include <Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h> #include <Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h>
#include <Storages/StorageValues.h> #include <Storages/StorageValues.h>
#include <Storages/LiveView/StorageLiveView.h> #include <Storages/LiveView/StorageLiveView.h>
@ -29,6 +30,8 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
, context(context_) , context(context_)
, query_ptr(query_ptr_) , query_ptr(query_ptr_)
{ {
checkStackSize();
/** TODO This is a very important line. At any insertion into the table one of streams should own lock. /** TODO This is a very important line. At any insertion into the table one of streams should own lock.
* Although now any insertion into the table is done via PushingToViewsBlockOutputStream, * Although now any insertion into the table is done via PushingToViewsBlockOutputStream,
* but it's clear that here is not the best place for this functionality. * but it's clear that here is not the best place for this functionality.

View File

@ -1,4 +1,6 @@
# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it.
OWNER(g:clickhouse)
LIBRARY() LIBRARY()
PEERDIR( PEERDIR(

View File

@ -1,3 +1,5 @@
OWNER(g:clickhouse)
LIBRARY() LIBRARY()
PEERDIR( PEERDIR(

View File

@ -25,12 +25,13 @@ namespace DB
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int LOGICAL_ERROR;
extern const int EMPTY_DATA_PASSED;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int DUPLICATE_COLUMN;
extern const int BAD_ARGUMENTS; extern const int BAD_ARGUMENTS;
extern const int DUPLICATE_COLUMN;
extern const int EMPTY_DATA_PASSED;
extern const int LOGICAL_ERROR;
extern const int NOT_FOUND_COLUMN_IN_BLOCK; extern const int NOT_FOUND_COLUMN_IN_BLOCK;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH;
} }
@ -145,6 +146,20 @@ static void addElementSafe(const DataTypes & elems, IColumn & column, F && impl)
try try
{ {
impl(); impl();
// Check that all columns now have the same size.
size_t new_size = column.size();
for (auto i : ext::range(1, ext::size(elems)))
{
const auto & element_column = extractElementColumn(column, i);
if (element_column.size() != new_size)
{
// This is not a logical error because it may work with
// user-supplied data.
throw Exception(ErrorCodes::SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH,
"Cannot read a tuple because not all elements are present");
}
}
} }
catch (...) catch (...)
{ {
@ -213,37 +228,93 @@ void DataTypeTuple::deserializeText(IColumn & column, ReadBuffer & istr, const F
void DataTypeTuple::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const void DataTypeTuple::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{ {
writeChar('[', ostr); if (settings.json.named_tuples_as_objects
for (const auto i : ext::range(0, ext::size(elems))) && have_explicit_names)
{ {
if (i != 0) writeChar('{', ostr);
writeChar(',', ostr); for (const auto i : ext::range(0, ext::size(elems)))
elems[i]->serializeAsTextJSON(extractElementColumn(column, i), row_num, ostr, settings); {
if (i != 0)
{
writeChar(',', ostr);
}
writeJSONString(names[i], ostr, settings);
writeChar(':', ostr);
elems[i]->serializeAsTextJSON(extractElementColumn(column, i), row_num, ostr, settings);
}
writeChar('}', ostr);
}
else
{
writeChar('[', ostr);
for (const auto i : ext::range(0, ext::size(elems)))
{
if (i != 0)
writeChar(',', ostr);
elems[i]->serializeAsTextJSON(extractElementColumn(column, i), row_num, ostr, settings);
}
writeChar(']', ostr);
} }
writeChar(']', ostr);
} }
void DataTypeTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const void DataTypeTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{ {
const size_t size = elems.size(); if (settings.json.named_tuples_as_objects
assertChar('[', istr); && have_explicit_names)
addElementSafe(elems, column, [&]
{ {
for (const auto i : ext::range(0, size)) skipWhitespaceIfAny(istr);
{ assertChar('{', istr);
skipWhitespaceIfAny(istr); skipWhitespaceIfAny(istr);
if (i != 0)
{
assertChar(',', istr);
skipWhitespaceIfAny(istr);
}
elems[i]->deserializeAsTextJSON(extractElementColumn(column, i), istr, settings);
}
});
skipWhitespaceIfAny(istr); addElementSafe(elems, column, [&]
assertChar(']', istr); {
// Require all elements but in arbitrary order.
for (auto i : ext::range(0, ext::size(elems)))
{
if (i > 0)
{
skipWhitespaceIfAny(istr);
assertChar(',', istr);
skipWhitespaceIfAny(istr);
}
std::string name;
readDoubleQuotedString(name, istr);
skipWhitespaceIfAny(istr);
assertChar(':', istr);
skipWhitespaceIfAny(istr);
const size_t element_pos = getPositionByName(name);
auto & element_column = extractElementColumn(column, element_pos);
elems[element_pos]->deserializeAsTextJSON(element_column, istr, settings);
}
});
skipWhitespaceIfAny(istr);
assertChar('}', istr);
}
else
{
const size_t size = elems.size();
assertChar('[', istr);
addElementSafe(elems, column, [&]
{
for (const auto i : ext::range(0, size))
{
skipWhitespaceIfAny(istr);
if (i != 0)
{
assertChar(',', istr);
skipWhitespaceIfAny(istr);
}
elems[i]->deserializeAsTextJSON(extractElementColumn(column, i), istr, settings);
}
});
skipWhitespaceIfAny(istr);
assertChar(']', istr);
}
} }
void DataTypeTuple::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const void DataTypeTuple::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const

View File

@ -466,75 +466,66 @@ struct WhichDataType
{ {
TypeIndex idx; TypeIndex idx;
WhichDataType(TypeIndex idx_ = TypeIndex::Nothing) constexpr WhichDataType(TypeIndex idx_ = TypeIndex::Nothing) : idx(idx_) {}
: idx(idx_) constexpr WhichDataType(const IDataType & data_type) : idx(data_type.getTypeId()) {}
{} constexpr WhichDataType(const IDataType * data_type) : idx(data_type->getTypeId()) {}
WhichDataType(const IDataType & data_type) // shared ptr -> is non-constexpr in gcc
: idx(data_type.getTypeId()) WhichDataType(const DataTypePtr & data_type) : idx(data_type->getTypeId()) {}
{}
WhichDataType(const IDataType * data_type) constexpr bool isUInt8() const { return idx == TypeIndex::UInt8; }
: idx(data_type->getTypeId()) constexpr bool isUInt16() const { return idx == TypeIndex::UInt16; }
{} constexpr bool isUInt32() const { return idx == TypeIndex::UInt32; }
constexpr bool isUInt64() const { return idx == TypeIndex::UInt64; }
constexpr bool isUInt128() const { return idx == TypeIndex::UInt128; }
constexpr bool isUInt256() const { return idx == TypeIndex::UInt256; }
constexpr bool isUInt() const { return isUInt8() || isUInt16() || isUInt32() || isUInt64() || isUInt128() || isUInt256(); }
constexpr bool isNativeUInt() const { return isUInt8() || isUInt16() || isUInt32() || isUInt64(); }
WhichDataType(const DataTypePtr & data_type) constexpr bool isInt8() const { return idx == TypeIndex::Int8; }
: idx(data_type->getTypeId()) constexpr bool isInt16() const { return idx == TypeIndex::Int16; }
{} constexpr bool isInt32() const { return idx == TypeIndex::Int32; }
constexpr bool isInt64() const { return idx == TypeIndex::Int64; }
constexpr bool isInt128() const { return idx == TypeIndex::Int128; }
constexpr bool isInt256() const { return idx == TypeIndex::Int256; }
constexpr bool isInt() const { return isInt8() || isInt16() || isInt32() || isInt64() || isInt128() || isInt256(); }
constexpr bool isNativeInt() const { return isInt8() || isInt16() || isInt32() || isInt64(); }
bool isUInt8() const { return idx == TypeIndex::UInt8; } constexpr bool isDecimal32() const { return idx == TypeIndex::Decimal32; }
bool isUInt16() const { return idx == TypeIndex::UInt16; } constexpr bool isDecimal64() const { return idx == TypeIndex::Decimal64; }
bool isUInt32() const { return idx == TypeIndex::UInt32; } constexpr bool isDecimal128() const { return idx == TypeIndex::Decimal128; }
bool isUInt64() const { return idx == TypeIndex::UInt64; } constexpr bool isDecimal256() const { return idx == TypeIndex::Decimal256; }
bool isUInt128() const { return idx == TypeIndex::UInt128; } constexpr bool isDecimal() const { return isDecimal32() || isDecimal64() || isDecimal128() || isDecimal256(); }
bool isUInt256() const { return idx == TypeIndex::UInt256; }
bool isUInt() const { return isUInt8() || isUInt16() || isUInt32() || isUInt64() || isUInt128() || isUInt256(); }
bool isNativeUInt() const { return isUInt8() || isUInt16() || isUInt32() || isUInt64(); }
bool isInt8() const { return idx == TypeIndex::Int8; } constexpr bool isFloat32() const { return idx == TypeIndex::Float32; }
bool isInt16() const { return idx == TypeIndex::Int16; } constexpr bool isFloat64() const { return idx == TypeIndex::Float64; }
bool isInt32() const { return idx == TypeIndex::Int32; } constexpr bool isFloat() const { return isFloat32() || isFloat64(); }
bool isInt64() const { return idx == TypeIndex::Int64; }
bool isInt128() const { return idx == TypeIndex::Int128; }
bool isInt256() const { return idx == TypeIndex::Int256; }
bool isInt() const { return isInt8() || isInt16() || isInt32() || isInt64() || isInt128() || isInt256(); }
bool isNativeInt() const { return isInt8() || isInt16() || isInt32() || isInt64(); }
bool isDecimal32() const { return idx == TypeIndex::Decimal32; } constexpr bool isEnum8() const { return idx == TypeIndex::Enum8; }
bool isDecimal64() const { return idx == TypeIndex::Decimal64; } constexpr bool isEnum16() const { return idx == TypeIndex::Enum16; }
bool isDecimal128() const { return idx == TypeIndex::Decimal128; } constexpr bool isEnum() const { return isEnum8() || isEnum16(); }
bool isDecimal256() const { return idx == TypeIndex::Decimal256; }
bool isDecimal() const { return isDecimal32() || isDecimal64() || isDecimal128() || isDecimal256(); }
bool isFloat32() const { return idx == TypeIndex::Float32; } constexpr bool isDate() const { return idx == TypeIndex::Date; }
bool isFloat64() const { return idx == TypeIndex::Float64; } constexpr bool isDateTime() const { return idx == TypeIndex::DateTime; }
bool isFloat() const { return isFloat32() || isFloat64(); } constexpr bool isDateTime64() const { return idx == TypeIndex::DateTime64; }
constexpr bool isDateOrDateTime() const { return isDate() || isDateTime() || isDateTime64(); }
bool isEnum8() const { return idx == TypeIndex::Enum8; } constexpr bool isString() const { return idx == TypeIndex::String; }
bool isEnum16() const { return idx == TypeIndex::Enum16; } constexpr bool isFixedString() const { return idx == TypeIndex::FixedString; }
bool isEnum() const { return isEnum8() || isEnum16(); } constexpr bool isStringOrFixedString() const { return isString() || isFixedString(); }
bool isDate() const { return idx == TypeIndex::Date; } constexpr bool isUUID() const { return idx == TypeIndex::UUID; }
bool isDateTime() const { return idx == TypeIndex::DateTime; } constexpr bool isArray() const { return idx == TypeIndex::Array; }
bool isDateTime64() const { return idx == TypeIndex::DateTime64; } constexpr bool isTuple() const { return idx == TypeIndex::Tuple; }
bool isDateOrDateTime() const { return isDate() || isDateTime() || isDateTime64(); } constexpr bool isSet() const { return idx == TypeIndex::Set; }
constexpr bool isInterval() const { return idx == TypeIndex::Interval; }
bool isString() const { return idx == TypeIndex::String; } constexpr bool isNothing() const { return idx == TypeIndex::Nothing; }
bool isFixedString() const { return idx == TypeIndex::FixedString; } constexpr bool isNullable() const { return idx == TypeIndex::Nullable; }
bool isStringOrFixedString() const { return isString() || isFixedString(); } constexpr bool isFunction() const { return idx == TypeIndex::Function; }
constexpr bool isAggregateFunction() const { return idx == TypeIndex::AggregateFunction; }
bool isUUID() const { return idx == TypeIndex::UUID; } constexpr bool IsBigIntOrDeimal() const { return isInt128() || isInt256() || isUInt256() || isDecimal256(); }
bool isArray() const { return idx == TypeIndex::Array; }
bool isTuple() const { return idx == TypeIndex::Tuple; }
bool isSet() const { return idx == TypeIndex::Set; }
bool isInterval() const { return idx == TypeIndex::Interval; }
bool isNothing() const { return idx == TypeIndex::Nothing; }
bool isNullable() const { return idx == TypeIndex::Nullable; }
bool isFunction() const { return idx == TypeIndex::Function; }
bool isAggregateFunction() const { return idx == TypeIndex::AggregateFunction; }
bool IsBigIntOrDeimal() const { return isInt128() || isInt256() || isUInt256() || isDecimal256(); }
}; };
/// IDataType helpers (alternative for IDataType virtual methods with single point of truth) /// IDataType helpers (alternative for IDataType virtual methods with single point of truth)

View File

@ -1,4 +1,6 @@
# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it.
OWNER(g:clickhouse)
LIBRARY() LIBRARY()
PEERDIR( PEERDIR(

View File

@ -1,3 +1,5 @@
OWNER(g:clickhouse)
LIBRARY() LIBRARY()
PEERDIR( PEERDIR(

View File

@ -67,14 +67,14 @@ namespace
} }
void tryAttachDictionary(const ASTPtr & query, DatabaseOrdinary & database, const String & metadata_path) void tryAttachDictionary(const ASTPtr & query, DatabaseOrdinary & database, const String & metadata_path, const Context & context)
{ {
auto & create_query = query->as<ASTCreateQuery &>(); auto & create_query = query->as<ASTCreateQuery &>();
assert(create_query.is_dictionary); assert(create_query.is_dictionary);
try try
{ {
Poco::File meta_file(metadata_path); Poco::File meta_file(metadata_path);
auto config = getDictionaryConfigurationFromAST(create_query, database.getDatabaseName()); auto config = getDictionaryConfigurationFromAST(create_query, context, database.getDatabaseName());
time_t modification_time = meta_file.getLastModified().epochTime(); time_t modification_time = meta_file.getLastModified().epochTime();
database.attachDictionary(create_query.table, DictionaryAttachInfo{query, config, modification_time}); database.attachDictionary(create_query.table, DictionaryAttachInfo{query, config, modification_time});
} }
@ -190,7 +190,7 @@ void DatabaseOrdinary::loadStoredObjects(Context & context, bool has_force_resto
auto create_query = query->as<const ASTCreateQuery &>(); auto create_query = query->as<const ASTCreateQuery &>();
if (create_query.is_dictionary) if (create_query.is_dictionary)
{ {
tryAttachDictionary(query, *this, getMetadataPath() + name); tryAttachDictionary(query, *this, getMetadataPath() + name, context);
/// Messages, so that it's not boring to wait for the server to load for a long time. /// Messages, so that it's not boring to wait for the server to load for a long time.
logAboutProgress(log, ++dictionaries_processed, total_dictionaries, watch); logAboutProgress(log, ++dictionaries_processed, total_dictionaries, watch);

View File

@ -176,7 +176,7 @@ void DatabaseWithDictionaries::createDictionary(const Context & context, const S
/// Add a temporary repository containing the dictionary. /// Add a temporary repository containing the dictionary.
/// We need this temp repository to try loading the dictionary before actually attaching it to the database. /// We need this temp repository to try loading the dictionary before actually attaching it to the database.
auto temp_repository = external_loader.addConfigRepository(std::make_unique<ExternalLoaderTempConfigRepository>( auto temp_repository = external_loader.addConfigRepository(std::make_unique<ExternalLoaderTempConfigRepository>(
getDatabaseName(), dictionary_metadata_tmp_path, getDictionaryConfigurationFromAST(query->as<const ASTCreateQuery &>()))); getDatabaseName(), dictionary_metadata_tmp_path, getDictionaryConfigurationFromAST(query->as<const ASTCreateQuery &>(), context)));
bool lazy_load = context.getConfigRef().getBool("dictionaries_lazy_load", true); bool lazy_load = context.getConfigRef().getBool("dictionaries_lazy_load", true);
if (!lazy_load) if (!lazy_load)
@ -186,7 +186,7 @@ void DatabaseWithDictionaries::createDictionary(const Context & context, const S
external_loader.load(dict_id.getInternalDictionaryName()); external_loader.load(dict_id.getInternalDictionaryName());
} }
auto config = getDictionaryConfigurationFromAST(query->as<const ASTCreateQuery &>()); auto config = getDictionaryConfigurationFromAST(query->as<const ASTCreateQuery &>(), context);
attachDictionary(dictionary_name, DictionaryAttachInfo{query, config, time(nullptr)}); attachDictionary(dictionary_name, DictionaryAttachInfo{query, config, time(nullptr)});
SCOPE_EXIT({ SCOPE_EXIT({
if (!succeeded) if (!succeeded)

View File

@ -12,6 +12,7 @@
#include <Common/quoteString.h> #include <Common/quoteString.h>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <IO/Operators.h>
namespace DB namespace DB
{ {
@ -19,6 +20,7 @@ namespace DB
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
extern const int SYNC_MYSQL_USER_ACCESS_ERROR;
} }
static std::unordered_map<String, String> fetchTablesCreateQuery( static std::unordered_map<String, String> fetchTablesCreateQuery(
@ -64,6 +66,7 @@ static std::vector<String> fetchTablesInDB(const mysqlxx::PoolWithFailover::Entr
return tables_in_db; return tables_in_db;
} }
void MaterializeMetadata::fetchMasterStatus(mysqlxx::PoolWithFailover::Entry & connection) void MaterializeMetadata::fetchMasterStatus(mysqlxx::PoolWithFailover::Entry & connection)
{ {
Block header{ Block header{
@ -88,6 +91,29 @@ void MaterializeMetadata::fetchMasterStatus(mysqlxx::PoolWithFailover::Entry & c
executed_gtid_set = (*master_status.getByPosition(4).column)[0].safeGet<String>(); executed_gtid_set = (*master_status.getByPosition(4).column)[0].safeGet<String>();
} }
void MaterializeMetadata::fetchMasterVariablesValue(const mysqlxx::PoolWithFailover::Entry & connection)
{
Block variables_header{
{std::make_shared<DataTypeString>(), "Variable_name"},
{std::make_shared<DataTypeString>(), "Value"}
};
const String & fetch_query = "SHOW VARIABLES WHERE Variable_name = 'binlog_checksum'";
MySQLBlockInputStream variables_input(connection, fetch_query, variables_header, DEFAULT_BLOCK_SIZE);
while (Block variables_block = variables_input.read())
{
ColumnPtr variables_name = variables_block.getByName("Variable_name").column;
ColumnPtr variables_value = variables_block.getByName("Value").column;
for (size_t index = 0; index < variables_block.rows(); ++index)
{
if (variables_name->getDataAt(index) == "binlog_checksum")
binlog_checksum = variables_value->getDataAt(index).toString();
}
}
}
static Block getShowMasterLogHeader(const String & mysql_version) static Block getShowMasterLogHeader(const String & mysql_version)
{ {
if (startsWith(mysql_version, "5.")) if (startsWith(mysql_version, "5."))
@ -105,6 +131,49 @@ static Block getShowMasterLogHeader(const String & mysql_version)
}; };
} }
static bool checkSyncUserPrivImpl(mysqlxx::PoolWithFailover::Entry & connection, WriteBuffer & out)
{
Block sync_user_privs_header
{
{std::make_shared<DataTypeString>(), "current_user_grants"}
};
String grants_query, sub_privs;
MySQLBlockInputStream input(connection, "SHOW GRANTS FOR CURRENT_USER();", sync_user_privs_header, DEFAULT_BLOCK_SIZE);
while (Block block = input.read())
{
for (size_t index = 0; index < block.rows(); ++index)
{
grants_query = (*block.getByPosition(0).column)[index].safeGet<String>();
out << grants_query << "; ";
sub_privs = grants_query.substr(0, grants_query.find(" ON "));
if (sub_privs.find("ALL PRIVILEGES") == std::string::npos)
{
if ((sub_privs.find("RELOAD") != std::string::npos and
sub_privs.find("REPLICATION SLAVE") != std::string::npos and
sub_privs.find("REPLICATION CLIENT") != std::string::npos))
return true;
}
else
{
return true;
}
}
}
return false;
}
static void checkSyncUserPriv(mysqlxx::PoolWithFailover::Entry & connection)
{
WriteBufferFromOwnString out;
if (!checkSyncUserPrivImpl(connection, out))
throw Exception("MySQL SYNC USER ACCESS ERR: mysql sync user needs "
"at least GLOBAL PRIVILEGES:'RELOAD, REPLICATION SLAVE, REPLICATION CLIENT' "
"and SELECT PRIVILEGE on MySQL Database."
"But the SYNC USER grant query is: " + out.str(), ErrorCodes::SYNC_MYSQL_USER_ACCESS_ERROR);
}
bool MaterializeMetadata::checkBinlogFileExists(mysqlxx::PoolWithFailover::Entry & connection, const String & mysql_version) const bool MaterializeMetadata::checkBinlogFileExists(mysqlxx::PoolWithFailover::Entry & connection, const String & mysql_version) const
{ {
MySQLBlockInputStream input(connection, "SHOW MASTER LOGS", getShowMasterLogHeader(mysql_version), DEFAULT_BLOCK_SIZE); MySQLBlockInputStream input(connection, "SHOW MASTER LOGS", getShowMasterLogHeader(mysql_version), DEFAULT_BLOCK_SIZE);
@ -167,6 +236,8 @@ MaterializeMetadata::MaterializeMetadata(
const String & database, bool & opened_transaction, const String & mysql_version) const String & database, bool & opened_transaction, const String & mysql_version)
: persistent_path(path_) : persistent_path(path_)
{ {
checkSyncUserPriv(connection);
if (Poco::File(persistent_path).exists()) if (Poco::File(persistent_path).exists())
{ {
ReadBufferFromFile in(persistent_path, DBMS_DEFAULT_BUFFER_SIZE); ReadBufferFromFile in(persistent_path, DBMS_DEFAULT_BUFFER_SIZE);
@ -193,6 +264,7 @@ MaterializeMetadata::MaterializeMetadata(
locked_tables = true; locked_tables = true;
fetchMasterStatus(connection); fetchMasterStatus(connection);
fetchMasterVariablesValue(connection);
connection->query("SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;").execute(); connection->query("SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;").execute();
connection->query("START TRANSACTION /*!40100 WITH CONSISTENT SNAPSHOT */;").execute(); connection->query("START TRANSACTION /*!40100 WITH CONSISTENT SNAPSHOT */;").execute();

View File

@ -34,10 +34,13 @@ struct MaterializeMetadata
size_t data_version = 1; size_t data_version = 1;
size_t meta_version = 2; size_t meta_version = 2;
String binlog_checksum = "CRC32";
std::unordered_map<String, String> need_dumping_tables; std::unordered_map<String, String> need_dumping_tables;
void fetchMasterStatus(mysqlxx::PoolWithFailover::Entry & connection); void fetchMasterStatus(mysqlxx::PoolWithFailover::Entry & connection);
void fetchMasterVariablesValue(const mysqlxx::PoolWithFailover::Entry & connection);
bool checkBinlogFileExists(mysqlxx::PoolWithFailover::Entry & connection, const String & mysql_version) const; bool checkBinlogFileExists(mysqlxx::PoolWithFailover::Entry & connection, const String & mysql_version) const;
void transaction(const MySQLReplication::Position & position, const std::function<void()> & fun); void transaction(const MySQLReplication::Position & position, const std::function<void()> & fun);

Some files were not shown because too many files have changed in this diff Show More