mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 16:12:01 +00:00
Merge remote-tracking branch 'upstream/master' into improvement/diff-types-in-avg-weighted
This commit is contained in:
commit
fbb0e6e6aa
@ -2,8 +2,7 @@
|
||||
name: Documentation issue
|
||||
about: Report something incorrect or missing in documentation
|
||||
title: ''
|
||||
labels: documentation
|
||||
assignees: BayoNet
|
||||
labels: comp-documentation
|
||||
|
||||
---
|
||||
|
||||
|
2
.gitmodules
vendored
2
.gitmodules
vendored
@ -196,7 +196,7 @@
|
||||
[submodule "contrib/rocksdb"]
|
||||
path = contrib/rocksdb
|
||||
url = https://github.com/facebook/rocksdb
|
||||
branch = v6.11.4
|
||||
branch = v6.14.5
|
||||
[submodule "contrib/xz"]
|
||||
path = contrib/xz
|
||||
url = https://github.com/xz-mirror/xz
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <stdexcept> // for std::logic_error
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
|
@ -1,4 +1,6 @@
|
||||
# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it.
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
ADDINCL(
|
||||
|
@ -1,3 +1,5 @@
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
ADDINCL(
|
||||
|
@ -1,3 +1,5 @@
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
NO_COMPILER_WARNINGS()
|
||||
|
@ -1,3 +1,5 @@
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
PEERDIR(
|
||||
|
@ -1,3 +1,5 @@
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
ADDINCL (GLOBAL clickhouse/base/pcg-random)
|
||||
|
@ -1,3 +1,5 @@
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
CFLAGS(-g0)
|
||||
|
@ -1,3 +1,5 @@
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
ADDINCL(GLOBAL clickhouse/base/widechar_width)
|
||||
|
@ -1,3 +1,5 @@
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
RECURSE(
|
||||
common
|
||||
daemon
|
||||
|
@ -1,9 +1,9 @@
|
||||
# This strings autochanged from release_lib.sh:
|
||||
SET(VERSION_REVISION 54443)
|
||||
SET(VERSION_REVISION 54444)
|
||||
SET(VERSION_MAJOR 20)
|
||||
SET(VERSION_MINOR 12)
|
||||
SET(VERSION_MINOR 13)
|
||||
SET(VERSION_PATCH 1)
|
||||
SET(VERSION_GITHASH c53725fb1f846fda074347607ab582fbb9c6f7a1)
|
||||
SET(VERSION_DESCRIBE v20.12.1.1-prestable)
|
||||
SET(VERSION_STRING 20.12.1.1)
|
||||
SET(VERSION_GITHASH e581f9ccfc5c64867b0f488cce72412fd2966471)
|
||||
SET(VERSION_DESCRIBE v20.13.1.1-prestable)
|
||||
SET(VERSION_STRING 20.13.1.1)
|
||||
# end of autochange
|
||||
|
2
contrib/cctz
vendored
2
contrib/cctz
vendored
@ -1 +1 @@
|
||||
Subproject commit 7a2db4ece6e0f1b246173cbdb62711ae258ee841
|
||||
Subproject commit 260ba195ef6c489968bae8c88c62a67cdac5ff9d
|
2
contrib/libunwind
vendored
2
contrib/libunwind
vendored
@ -1 +1 @@
|
||||
Subproject commit 198458b35f100da32bd3e74c2a3ce8d236db299b
|
||||
Subproject commit 7d78d3618910752c256b2b58c3895f4efea47fac
|
2
contrib/rocksdb
vendored
2
contrib/rocksdb
vendored
@ -1 +1 @@
|
||||
Subproject commit 963314ffd681596ef2738a95249fe4c1163ef87a
|
||||
Subproject commit 35d8e36ef1b8e3e0759ca81215f855226a0a54bd
|
@ -347,8 +347,9 @@ set(SOURCES
|
||||
${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_builder.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_garbage.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_meta.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_reader.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_format.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_reader.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_sequential_reader.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_writer.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/builder.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/c.cc
|
||||
@ -394,6 +395,8 @@ set(SOURCES
|
||||
${ROCKSDB_SOURCE_DIR}/db/memtable_list.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/merge_helper.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/merge_operator.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/output_validator.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/periodic_work_scheduler.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/range_del_aggregator.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/range_tombstone_fragmenter.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/repair.cc
|
||||
@ -451,12 +454,12 @@ set(SOURCES
|
||||
${ROCKSDB_SOURCE_DIR}/monitoring/perf_level.cc
|
||||
${ROCKSDB_SOURCE_DIR}/monitoring/persistent_stats_history.cc
|
||||
${ROCKSDB_SOURCE_DIR}/monitoring/statistics.cc
|
||||
${ROCKSDB_SOURCE_DIR}/monitoring/stats_dump_scheduler.cc
|
||||
${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_impl.cc
|
||||
${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_updater.cc
|
||||
${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_util.cc
|
||||
${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_util_debug.cc
|
||||
${ROCKSDB_SOURCE_DIR}/options/cf_options.cc
|
||||
${ROCKSDB_SOURCE_DIR}/options/configurable.cc
|
||||
${ROCKSDB_SOURCE_DIR}/options/db_options.cc
|
||||
${ROCKSDB_SOURCE_DIR}/options/options.cc
|
||||
${ROCKSDB_SOURCE_DIR}/options/options_helper.cc
|
||||
@ -507,6 +510,7 @@ set(SOURCES
|
||||
${ROCKSDB_SOURCE_DIR}/table/sst_file_dumper.cc
|
||||
${ROCKSDB_SOURCE_DIR}/table/sst_file_reader.cc
|
||||
${ROCKSDB_SOURCE_DIR}/table/sst_file_writer.cc
|
||||
${ROCKSDB_SOURCE_DIR}/table/table_factory.cc
|
||||
${ROCKSDB_SOURCE_DIR}/table/table_properties.cc
|
||||
${ROCKSDB_SOURCE_DIR}/table/two_level_iterator.cc
|
||||
${ROCKSDB_SOURCE_DIR}/test_util/sync_point.cc
|
||||
@ -515,6 +519,7 @@ set(SOURCES
|
||||
${ROCKSDB_SOURCE_DIR}/test_util/transaction_test_util.cc
|
||||
${ROCKSDB_SOURCE_DIR}/tools/block_cache_analyzer/block_cache_trace_analyzer.cc
|
||||
${ROCKSDB_SOURCE_DIR}/tools/dump/db_dump_tool.cc
|
||||
${ROCKSDB_SOURCE_DIR}/tools/io_tracer_parser_tool.cc
|
||||
${ROCKSDB_SOURCE_DIR}/tools/ldb_cmd.cc
|
||||
${ROCKSDB_SOURCE_DIR}/tools/ldb_tool.cc
|
||||
${ROCKSDB_SOURCE_DIR}/tools/sst_dump_tool.cc
|
||||
|
4
debian/changelog
vendored
4
debian/changelog
vendored
@ -1,5 +1,5 @@
|
||||
clickhouse (20.12.1.1) unstable; urgency=low
|
||||
clickhouse (20.13.1.1) unstable; urgency=low
|
||||
|
||||
* Modified source code
|
||||
|
||||
-- clickhouse-release <clickhouse-release@yandex-team.ru> Thu, 05 Nov 2020 21:52:47 +0300
|
||||
-- clickhouse-release <clickhouse-release@yandex-team.ru> Mon, 23 Nov 2020 10:29:24 +0300
|
||||
|
@ -1,7 +1,7 @@
|
||||
FROM ubuntu:18.04
|
||||
|
||||
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
|
||||
ARG version=20.12.1.*
|
||||
ARG version=20.13.1.*
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install --yes --no-install-recommends \
|
||||
|
@ -1,7 +1,7 @@
|
||||
FROM ubuntu:20.04
|
||||
|
||||
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
|
||||
ARG version=20.12.1.*
|
||||
ARG version=20.13.1.*
|
||||
ARG gosu_ver=1.10
|
||||
|
||||
RUN apt-get update \
|
||||
|
@ -1,7 +1,7 @@
|
||||
FROM ubuntu:18.04
|
||||
|
||||
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
|
||||
ARG version=20.12.1.*
|
||||
ARG version=20.13.1.*
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y apt-transport-https dirmngr && \
|
||||
|
@ -7,8 +7,10 @@ ENV SOURCE_DIR=/build
|
||||
ENV OUTPUT_DIR=/output
|
||||
ENV IGNORE='.*contrib.*'
|
||||
|
||||
CMD mkdir -p /build/obj-x86_64-linux-gnu && cd /build/obj-x86_64-linux-gnu && CC=clang-10 CXX=clang++-10 cmake .. && cd /; \
|
||||
RUN apt-get update && apt-get install cmake --yes --no-install-recommends
|
||||
|
||||
CMD mkdir -p /build/obj-x86_64-linux-gnu && cd /build/obj-x86_64-linux-gnu && CC=clang-11 CXX=clang++-11 cmake .. && cd /; \
|
||||
dpkg -i /package_folder/clickhouse-common-static_*.deb; \
|
||||
llvm-profdata-10 merge -sparse ${COVERAGE_DIR}/* -o clickhouse.profdata && \
|
||||
llvm-cov-10 export /usr/bin/clickhouse -instr-profile=clickhouse.profdata -j=16 -format=lcov -skip-functions -ignore-filename-regex $IGNORE > output.lcov && \
|
||||
llvm-profdata-11 merge -sparse ${COVERAGE_DIR}/* -o clickhouse.profdata && \
|
||||
llvm-cov-11 export /usr/bin/clickhouse -instr-profile=clickhouse.profdata -j=16 -format=lcov -skip-functions -ignore-filename-regex $IGNORE > output.lcov && \
|
||||
genhtml output.lcov --ignore-errors source --output-directory ${OUTPUT_DIR}
|
||||
|
@ -15,6 +15,9 @@ stage=${stage:-}
|
||||
# empty parameter.
|
||||
read -ra FASTTEST_CMAKE_FLAGS <<< "${FASTTEST_CMAKE_FLAGS:-}"
|
||||
|
||||
# Run only matching tests.
|
||||
FASTTEST_FOCUS=${FASTTEST_FOCUS:-""}
|
||||
|
||||
FASTTEST_WORKSPACE=$(readlink -f "${FASTTEST_WORKSPACE:-.}")
|
||||
FASTTEST_SOURCE=$(readlink -f "${FASTTEST_SOURCE:-$FASTTEST_WORKSPACE/ch}")
|
||||
FASTTEST_BUILD=$(readlink -f "${FASTTEST_BUILD:-${BUILD:-$FASTTEST_WORKSPACE/build}}")
|
||||
@ -287,9 +290,11 @@ TESTS_TO_SKIP=(
|
||||
01322_ttest_scipy
|
||||
|
||||
01545_system_errors
|
||||
# Checks system.errors
|
||||
01563_distributed_query_finish
|
||||
)
|
||||
|
||||
time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
|
||||
time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
|
||||
|
||||
# substr is to remove semicolon after test name
|
||||
readarray -t FAILED_TESTS < <(awk '/FAIL|TIMEOUT|ERROR/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt")
|
||||
|
@ -30,7 +30,7 @@ RUN apt-get update \
|
||||
tzdata \
|
||||
vim \
|
||||
wget \
|
||||
&& pip3 --no-cache-dir install clickhouse_driver scipy \
|
||||
&& pip3 --no-cache-dir install 'clickhouse-driver>=0.1.5' scipy \
|
||||
&& apt-get purge --yes python3-dev g++ \
|
||||
&& apt-get autoremove --yes \
|
||||
&& apt-get clean \
|
||||
|
@ -14,10 +14,12 @@ import string
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
import logging
|
||||
import xml.etree.ElementTree as et
|
||||
from threading import Thread
|
||||
from scipy import stats
|
||||
|
||||
logging.basicConfig(format='%(asctime)s: %(levelname)s: %(module)s: %(message)s', level='WARNING')
|
||||
|
||||
total_start_seconds = time.perf_counter()
|
||||
stage_start_seconds = total_start_seconds
|
||||
@ -46,7 +48,8 @@ parser.add_argument('--profile-seconds', type=int, default=0, help='For how many
|
||||
parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.')
|
||||
parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.')
|
||||
parser.add_argument('--print-settings', action='store_true', help='Print test settings and exit.')
|
||||
parser.add_argument('--keep-tables', action='store_true', help="Don't drop the created tables after the test.")
|
||||
parser.add_argument('--keep-created-tables', action='store_true', help="Don't drop the created tables after the test.")
|
||||
parser.add_argument('--use-existing-tables', action='store_true', help="Don't create or drop the tables, use the existing ones instead.")
|
||||
args = parser.parse_args()
|
||||
|
||||
reportStageEnd('start')
|
||||
@ -147,20 +150,21 @@ for i, s in enumerate(servers):
|
||||
|
||||
reportStageEnd('connect')
|
||||
|
||||
# Run drop queries, ignoring errors. Do this before all other activity, because
|
||||
# clickhouse_driver disconnects on error (this is not configurable), and the new
|
||||
# connection loses the changes in settings.
|
||||
drop_query_templates = [q.text for q in root.findall('drop_query')]
|
||||
drop_queries = substitute_parameters(drop_query_templates)
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
for q in drop_queries:
|
||||
try:
|
||||
c.execute(q)
|
||||
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
except:
|
||||
pass
|
||||
if not args.use_existing_tables:
|
||||
# Run drop queries, ignoring errors. Do this before all other activity,
|
||||
# because clickhouse_driver disconnects on error (this is not configurable),
|
||||
# and the new connection loses the changes in settings.
|
||||
drop_query_templates = [q.text for q in root.findall('drop_query')]
|
||||
drop_queries = substitute_parameters(drop_query_templates)
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
for q in drop_queries:
|
||||
try:
|
||||
c.execute(q)
|
||||
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
except:
|
||||
pass
|
||||
|
||||
reportStageEnd('drop-1')
|
||||
reportStageEnd('drop-1')
|
||||
|
||||
# Apply settings.
|
||||
# If there are errors, report them and continue -- maybe a new test uses a setting
|
||||
@ -172,12 +176,9 @@ reportStageEnd('drop-1')
|
||||
settings = root.findall('settings/*')
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
for s in settings:
|
||||
try:
|
||||
q = f"set {s.tag} = '{s.text}'"
|
||||
c.execute(q)
|
||||
print(f'set\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
except:
|
||||
print(traceback.format_exc(), file=sys.stderr)
|
||||
# requires clickhouse-driver >= 1.1.5 to accept arbitrary new settings
|
||||
# (https://github.com/mymarilyn/clickhouse-driver/pull/142)
|
||||
c.settings[s.tag] = s.text
|
||||
|
||||
reportStageEnd('settings')
|
||||
|
||||
@ -195,37 +196,40 @@ for t in tables:
|
||||
|
||||
reportStageEnd('preconditions')
|
||||
|
||||
# Run create and fill queries. We will run them simultaneously for both servers,
|
||||
# to save time.
|
||||
# The weird search is to keep the relative order of elements, which matters, and
|
||||
# etree doesn't support the appropriate xpath query.
|
||||
create_query_templates = [q.text for q in root.findall('./*') if q.tag in ('create_query', 'fill_query')]
|
||||
create_queries = substitute_parameters(create_query_templates)
|
||||
if not args.use_existing_tables:
|
||||
# Run create and fill queries. We will run them simultaneously for both
|
||||
# servers, to save time. The weird XML search + filter is because we want to
|
||||
# keep the relative order of elements, and etree doesn't support the
|
||||
# appropriate xpath query.
|
||||
create_query_templates = [q.text for q in root.findall('./*')
|
||||
if q.tag in ('create_query', 'fill_query')]
|
||||
create_queries = substitute_parameters(create_query_templates)
|
||||
|
||||
# Disallow temporary tables, because the clickhouse_driver reconnects on errors,
|
||||
# and temporary tables are destroyed. We want to be able to continue after some
|
||||
# errors.
|
||||
for q in create_queries:
|
||||
if re.search('create temporary table', q, flags=re.IGNORECASE):
|
||||
print(f"Temporary tables are not allowed in performance tests: '{q}'",
|
||||
file = sys.stderr)
|
||||
sys.exit(1)
|
||||
# Disallow temporary tables, because the clickhouse_driver reconnects on
|
||||
# errors, and temporary tables are destroyed. We want to be able to continue
|
||||
# after some errors.
|
||||
for q in create_queries:
|
||||
if re.search('create temporary table', q, flags=re.IGNORECASE):
|
||||
print(f"Temporary tables are not allowed in performance tests: '{q}'",
|
||||
file = sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
def do_create(connection, index, queries):
|
||||
for q in queries:
|
||||
connection.execute(q)
|
||||
print(f'create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
def do_create(connection, index, queries):
|
||||
for q in queries:
|
||||
connection.execute(q)
|
||||
print(f'create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
|
||||
threads = [Thread(target = do_create, args = (connection, index, create_queries))
|
||||
for index, connection in enumerate(all_connections)]
|
||||
threads = [
|
||||
Thread(target = do_create, args = (connection, index, create_queries))
|
||||
for index, connection in enumerate(all_connections)]
|
||||
|
||||
for t in threads:
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.start()
|
||||
|
||||
for t in threads:
|
||||
t.join()
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
reportStageEnd('create')
|
||||
reportStageEnd('create')
|
||||
|
||||
# By default, test all queries.
|
||||
queries_to_run = range(0, len(test_queries))
|
||||
@ -404,7 +408,7 @@ print(f'profile-total\t{profile_total_seconds}')
|
||||
reportStageEnd('run')
|
||||
|
||||
# Run drop queries
|
||||
if not args.keep_tables:
|
||||
if not args.keep_created_tables and not args.use_existing_tables:
|
||||
drop_queries = substitute_parameters(drop_query_templates)
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
for q in drop_queries:
|
||||
|
@ -1,12 +1,12 @@
|
||||
# docker build -t yandex/clickhouse-stateful-test-with-coverage .
|
||||
FROM yandex/clickhouse-stateless-test
|
||||
FROM yandex/clickhouse-stateless-test-with-coverage
|
||||
|
||||
RUN echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-9 main" >> /etc/apt/sources.list
|
||||
|
||||
RUN apt-get update -y \
|
||||
&& env DEBIAN_FRONTEND=noninteractive \
|
||||
apt-get install --yes --no-install-recommends \
|
||||
python3-requests
|
||||
python3-requests procps psmisc
|
||||
|
||||
COPY s3downloader /s3downloader
|
||||
COPY run.sh /run.sh
|
||||
|
@ -1,40 +1,44 @@
|
||||
#!/bin/bash
|
||||
|
||||
kill_clickhouse () {
|
||||
kill "$(pgrep -u clickhouse)" 2>/dev/null
|
||||
echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S'
|
||||
pkill -f "clickhouse-server" 2>/dev/null
|
||||
|
||||
for _ in {1..10}
|
||||
|
||||
for _ in {1..120}
|
||||
do
|
||||
if ! kill -0 "$(pgrep -u clickhouse)"; then
|
||||
echo "No clickhouse process"
|
||||
break
|
||||
else
|
||||
echo "Process $(pgrep -u clickhouse) still alive"
|
||||
sleep 10
|
||||
fi
|
||||
if ! pkill -0 -f "clickhouse-server" ; then break ; fi
|
||||
echo "ClickHouse still alive" | ts '%Y-%m-%d %H:%M:%S'
|
||||
sleep 1
|
||||
done
|
||||
|
||||
if pkill -0 -f "clickhouse-server"
|
||||
then
|
||||
pstree -apgT
|
||||
jobs
|
||||
echo "Failed to kill the ClickHouse server" | ts '%Y-%m-%d %H:%M:%S'
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
start_clickhouse () {
|
||||
LLVM_PROFILE_FILE='server_%h_%p_%m.profraw' sudo -Eu clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml &
|
||||
}
|
||||
|
||||
wait_llvm_profdata () {
|
||||
while kill -0 "$(pgrep llvm-profdata-10)"
|
||||
counter=0
|
||||
until clickhouse-client --query "SELECT 1"
|
||||
do
|
||||
echo "Waiting for profdata $(pgrep llvm-profdata-10) still alive"
|
||||
sleep 3
|
||||
if [ "$counter" -gt 120 ]
|
||||
then
|
||||
echo "Cannot start clickhouse-server"
|
||||
cat /var/log/clickhouse-server/stdout.log
|
||||
tail -n1000 /var/log/clickhouse-server/stderr.log
|
||||
tail -n1000 /var/log/clickhouse-server/clickhouse-server.log
|
||||
break
|
||||
fi
|
||||
sleep 0.5
|
||||
counter=$((counter + 1))
|
||||
done
|
||||
}
|
||||
|
||||
merge_client_files_in_background () {
|
||||
client_files=$(ls /client_*profraw 2>/dev/null)
|
||||
if [ -n "$client_files" ]
|
||||
then
|
||||
llvm-profdata-10 merge -sparse "$client_files" -o "merged_client_$(date +%s).profraw"
|
||||
rm "$client_files"
|
||||
fi
|
||||
}
|
||||
|
||||
chmod 777 /
|
||||
|
||||
@ -51,26 +55,7 @@ chmod 777 -R /var/log/clickhouse-server/
|
||||
# install test configs
|
||||
/usr/share/clickhouse-test/config/install.sh
|
||||
|
||||
function start()
|
||||
{
|
||||
counter=0
|
||||
until clickhouse-client --query "SELECT 1"
|
||||
do
|
||||
if [ "$counter" -gt 120 ]
|
||||
then
|
||||
echo "Cannot start clickhouse-server"
|
||||
cat /var/log/clickhouse-server/stdout.log
|
||||
tail -n1000 /var/log/clickhouse-server/stderr.log
|
||||
tail -n1000 /var/log/clickhouse-server/clickhouse-server.log
|
||||
break
|
||||
fi
|
||||
timeout 120 service clickhouse-server start
|
||||
sleep 0.5
|
||||
counter=$((counter + 1))
|
||||
done
|
||||
}
|
||||
|
||||
start
|
||||
start_clickhouse
|
||||
|
||||
# shellcheck disable=SC2086 # No quotes because I want to split it into words.
|
||||
if ! /s3downloader --dataset-names $DATASETS; then
|
||||
@ -81,25 +66,20 @@ fi
|
||||
|
||||
chmod 777 -R /var/lib/clickhouse
|
||||
|
||||
while /bin/true; do
|
||||
merge_client_files_in_background
|
||||
sleep 2
|
||||
done &
|
||||
|
||||
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW DATABASES"
|
||||
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "ATTACH DATABASE datasets ENGINE = Ordinary"
|
||||
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "CREATE DATABASE test"
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW DATABASES"
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "ATTACH DATABASE datasets ENGINE = Ordinary"
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "CREATE DATABASE test"
|
||||
|
||||
kill_clickhouse
|
||||
start_clickhouse
|
||||
|
||||
sleep 10
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW TABLES FROM datasets"
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW TABLES FROM test"
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits"
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW TABLES FROM test"
|
||||
|
||||
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABLES FROM datasets"
|
||||
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABLES FROM test"
|
||||
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits"
|
||||
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
|
||||
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABLES FROM test"
|
||||
|
||||
if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then
|
||||
SKIP_LIST_OPT="--use-skip-list"
|
||||
@ -109,15 +89,10 @@ fi
|
||||
# more idiologically correct.
|
||||
read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}"
|
||||
|
||||
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
|
||||
|
||||
kill_clickhouse
|
||||
|
||||
wait_llvm_profdata
|
||||
|
||||
sleep 3
|
||||
|
||||
wait_llvm_profdata # 100% merged all parts
|
||||
|
||||
|
||||
cp /*.profraw /profraw ||:
|
||||
|
@ -29,7 +29,7 @@ def dowload_with_progress(url, path):
|
||||
logging.info("Downloading from %s to temp path %s", url, path)
|
||||
for i in range(RETRIES_COUNT):
|
||||
try:
|
||||
with open(path, 'w') as f:
|
||||
with open(path, 'wb') as f:
|
||||
response = requests.get(url, stream=True)
|
||||
response.raise_for_status()
|
||||
total_length = response.headers.get('content-length')
|
||||
|
@ -1,4 +1,4 @@
|
||||
# docker build -t yandex/clickhouse-stateless-with-coverage-test .
|
||||
# docker build -t yandex/clickhouse-stateless-test-with-coverage .
|
||||
# TODO: that can be based on yandex/clickhouse-stateless-test (llvm version and CMD differs)
|
||||
FROM yandex/clickhouse-test-base
|
||||
|
||||
@ -28,7 +28,9 @@ RUN apt-get update -y \
|
||||
lsof \
|
||||
unixodbc \
|
||||
wget \
|
||||
qemu-user-static
|
||||
qemu-user-static \
|
||||
procps \
|
||||
psmisc
|
||||
|
||||
RUN mkdir -p /tmp/clickhouse-odbc-tmp \
|
||||
&& wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \
|
||||
|
@ -2,27 +2,41 @@
|
||||
|
||||
kill_clickhouse () {
|
||||
echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S'
|
||||
kill "$(pgrep -u clickhouse)" 2>/dev/null
|
||||
pkill -f "clickhouse-server" 2>/dev/null
|
||||
|
||||
for _ in {1..10}
|
||||
|
||||
for _ in {1..120}
|
||||
do
|
||||
if ! kill -0 "$(pgrep -u clickhouse)"; then
|
||||
echo "No clickhouse process" | ts '%Y-%m-%d %H:%M:%S'
|
||||
break
|
||||
else
|
||||
echo "Process $(pgrep -u clickhouse) still alive" | ts '%Y-%m-%d %H:%M:%S'
|
||||
sleep 10
|
||||
fi
|
||||
if ! pkill -0 -f "clickhouse-server" ; then break ; fi
|
||||
echo "ClickHouse still alive" | ts '%Y-%m-%d %H:%M:%S'
|
||||
sleep 1
|
||||
done
|
||||
|
||||
echo "Will try to send second kill signal for sure"
|
||||
kill "$(pgrep -u clickhouse)" 2>/dev/null
|
||||
sleep 5
|
||||
echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S'
|
||||
if pkill -0 -f "clickhouse-server"
|
||||
then
|
||||
pstree -apgT
|
||||
jobs
|
||||
echo "Failed to kill the ClickHouse server" | ts '%Y-%m-%d %H:%M:%S'
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
start_clickhouse () {
|
||||
LLVM_PROFILE_FILE='server_%h_%p_%m.profraw' sudo -Eu clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml &
|
||||
counter=0
|
||||
until clickhouse-client --query "SELECT 1"
|
||||
do
|
||||
if [ "$counter" -gt 120 ]
|
||||
then
|
||||
echo "Cannot start clickhouse-server"
|
||||
cat /var/log/clickhouse-server/stdout.log
|
||||
tail -n1000 /var/log/clickhouse-server/stderr.log
|
||||
tail -n1000 /var/log/clickhouse-server/clickhouse-server.log
|
||||
break
|
||||
fi
|
||||
sleep 0.5
|
||||
counter=$((counter + 1))
|
||||
done
|
||||
}
|
||||
|
||||
chmod 777 /
|
||||
@ -44,9 +58,6 @@ chmod 777 -R /var/log/clickhouse-server/
|
||||
|
||||
start_clickhouse
|
||||
|
||||
sleep 10
|
||||
|
||||
|
||||
if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then
|
||||
SKIP_LIST_OPT="--use-skip-list"
|
||||
fi
|
||||
|
@ -35,7 +35,7 @@ RUN apt-get update \
|
||||
ENV TZ=Europe/Moscow
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
|
||||
RUN pip3 install urllib3 testflows==1.6.62 docker-compose docker dicttoxml kazoo tzlocal
|
||||
RUN pip3 install urllib3 testflows==1.6.65 docker-compose docker dicttoxml kazoo tzlocal
|
||||
|
||||
ENV DOCKER_CHANNEL stable
|
||||
ENV DOCKER_VERSION 17.09.1-ce
|
||||
|
@ -2317,4 +2317,10 @@ Possible values:
|
||||
|
||||
Default value: `1`.
|
||||
|
||||
## output_format_tsv_null_representation {#output_format_tsv_null_representation}
|
||||
|
||||
Allows configurable `NULL` representation for [TSV](../../interfaces/formats.md#tabseparated) output format. The setting only controls output format and `\N` is the only supported `NULL` representation for TSV input format.
|
||||
|
||||
Default value: `\N`.
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->
|
||||
|
70
docs/en/operations/system-tables/replicated_fetches.md
Normal file
70
docs/en/operations/system-tables/replicated_fetches.md
Normal file
@ -0,0 +1,70 @@
|
||||
# system.replicated_fetches {#system_tables-replicated_fetches}
|
||||
|
||||
Contains information about currently running background fetches.
|
||||
|
||||
Columns:
|
||||
|
||||
- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database.
|
||||
|
||||
- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table.
|
||||
|
||||
- `elapsed` ([Float64](../../sql-reference/data-types/float.md)) — The time elapsed (in seconds) since showing currently running background fetches started.
|
||||
|
||||
- `progress` ([Float64](../../sql-reference/data-types/float.md)) — The percentage of completed work from 0 to 1.
|
||||
|
||||
- `result_part_name` ([String](../../sql-reference/data-types/string.md)) — The name of the part that will be formed as the result of showing currently running background fetches.
|
||||
|
||||
- `result_part_path` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the part that will be formed as the result of showing currently running background fetches.
|
||||
|
||||
- `partition_id` ([String](../../sql-reference/data-types/string.md)) — ID of the partition.
|
||||
|
||||
- `total_size_bytes_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The total size (in bytes) of the compressed data in the result part.
|
||||
|
||||
- `bytes_read_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of compressed bytes read from the result part.
|
||||
|
||||
- `source_replica_path` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the source replica.
|
||||
|
||||
- `source_replica_hostname` ([String](../../sql-reference/data-types/string.md)) — Hostname of the source replica.
|
||||
|
||||
- `source_replica_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — Port number of the source replica.
|
||||
|
||||
- `interserver_scheme` ([String](../../sql-reference/data-types/string.md)) — Name of the interserver scheme.
|
||||
|
||||
- `URI` ([String](../../sql-reference/data-types/string.md)) — Uniform resource identifier.
|
||||
|
||||
- `to_detached` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The flag indicates whether the currently running background fetch is being performed using the `TO DETACHED` expression.
|
||||
|
||||
- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Thread identifier.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT * FROM system.replicated_fetches LIMIT 1 FORMAT Vertical;
|
||||
```
|
||||
|
||||
``` text
|
||||
Row 1:
|
||||
──────
|
||||
database: default
|
||||
table: t
|
||||
elapsed: 7.243039876
|
||||
progress: 0.41832135995612835
|
||||
result_part_name: all_0_0_0
|
||||
result_part_path: /var/lib/clickhouse/store/700/70080a04-b2de-4adf-9fa5-9ea210e81766/all_0_0_0/
|
||||
partition_id: all
|
||||
total_size_bytes_compressed: 1052783726
|
||||
bytes_read_compressed: 440401920
|
||||
source_replica_path: /clickhouse/test/t/replicas/1
|
||||
source_replica_hostname: node1
|
||||
source_replica_port: 9009
|
||||
interserver_scheme: http
|
||||
URI: http://node1:9009/?endpoint=DataPartsExchange%3A%2Fclickhouse%2Ftest%2Ft%2Freplicas%2F1&part=all_0_0_0&client_protocol_version=4&compress=false
|
||||
to_detached: 0
|
||||
thread_id: 54
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Managing ReplicatedMergeTree Tables](../../sql-reference/statements/system/#query-language-system-replicated)
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/replicated_fetches) <!--hide-->
|
@ -1,42 +1,42 @@
|
||||
# ClickHouse obfuscator
|
||||
|
||||
Simple tool for table data obfuscation.
|
||||
|
||||
It reads input table and produces output table, that retain some properties of input, but contains different data.
|
||||
It allows to publish almost real production data for usage in benchmarks.
|
||||
|
||||
It is designed to retain the following properties of data:
|
||||
- cardinalities of values (number of distinct values) for every column and for every tuple of columns;
|
||||
- conditional cardinalities: number of distinct values of one column under condition on value of another column;
|
||||
- probability distributions of absolute value of integers; sign of signed integers; exponent and sign for floats;
|
||||
- probability distributions of length of strings;
|
||||
- probability of zero values of numbers; empty strings and arrays, NULLs;
|
||||
- data compression ratio when compressed with LZ77 and entropy family of codecs;
|
||||
- continuity (magnitude of difference) of time values across table; continuity of floating point values.
|
||||
- date component of DateTime values;
|
||||
- UTF-8 validity of string values;
|
||||
- string values continue to look somewhat natural.
|
||||
|
||||
Most of the properties above are viable for performance testing:
|
||||
|
||||
reading data, filtering, aggregation and sorting will work at almost the same speed
|
||||
as on original data due to saved cardinalities, magnitudes, compression ratios, etc.
|
||||
|
||||
It works in deterministic fashion: you define a seed value and transform is totally determined by input data and by seed.
|
||||
Some transforms are one to one and could be reversed, so you need to have large enough seed and keep it in secret.
|
||||
|
||||
It use some cryptographic primitives to transform data, but from the cryptographic point of view,
|
||||
It doesn't do anything properly and you should never consider the result as secure, unless you have other reasons for it.
|
||||
|
||||
It may retain some data you don't want to publish.
|
||||
|
||||
It always leave numbers 0, 1, -1 as is. Also it leaves dates, lengths of arrays and null flags exactly as in source data.
|
||||
For example, you have a column IsMobile in your table with values 0 and 1. In transformed data, it will have the same value.
|
||||
So, the user will be able to count exact ratio of mobile traffic.
|
||||
|
||||
Another example, suppose you have some private data in your table, like user email and you don't want to publish any single email address.
|
||||
If your table is large enough and contain multiple different emails and there is no email that have very high frequency than all others,
|
||||
It will perfectly anonymize all data. But if you have small amount of different values in a column, it can possibly reproduce some of them.
|
||||
And you should take care and look at exact algorithm, how this tool works, and probably fine tune some of it command line parameters.
|
||||
|
||||
This tool works fine only with reasonable amount of data (at least 1000s of rows).
|
||||
# ClickHouse obfuscator
|
||||
|
||||
A simple tool for table data obfuscation.
|
||||
|
||||
It reads an input table and produces an output table, that retains some properties of input, but contains different data.
|
||||
It allows publishing almost real production data for usage in benchmarks.
|
||||
|
||||
It is designed to retain the following properties of data:
|
||||
- cardinalities of values (number of distinct values) for every column and every tuple of columns;
|
||||
- conditional cardinalities: number of distinct values of one column under the condition on the value of another column;
|
||||
- probability distributions of the absolute value of integers; the sign of signed integers; exponent and sign for floats;
|
||||
- probability distributions of the length of strings;
|
||||
- probability of zero values of numbers; empty strings and arrays, `NULL`s;
|
||||
|
||||
- data compression ratio when compressed with LZ77 and entropy family of codecs;
|
||||
- continuity (magnitude of difference) of time values across the table; continuity of floating-point values;
|
||||
- date component of `DateTime` values;
|
||||
|
||||
- UTF-8 validity of string values;
|
||||
- string values look natural.
|
||||
|
||||
Most of the properties above are viable for performance testing:
|
||||
|
||||
reading data, filtering, aggregatio, and sorting will work at almost the same speed
|
||||
as on original data due to saved cardinalities, magnitudes, compression ratios, etc.
|
||||
|
||||
It works in a deterministic fashion: you define a seed value and the transformation is determined by input data and by seed.
|
||||
Some transformations are one to one and could be reversed, so you need to have a large seed and keep it in secret.
|
||||
|
||||
It uses some cryptographic primitives to transform data but from the cryptographic point of view, it doesn't do it properly, that is why you should not consider the result as secure unless you have another reason. The result may retain some data you don't want to publish.
|
||||
|
||||
|
||||
It always leaves 0, 1, -1 numbers, dates, lengths of arrays, and null flags exactly as in source data.
|
||||
For example, you have a column `IsMobile` in your table with values 0 and 1. In transformed data, it will have the same value.
|
||||
|
||||
So, the user will be able to count the exact ratio of mobile traffic.
|
||||
|
||||
Let's give another example. When you have some private data in your table, like user email and you don't want to publish any single email address.
|
||||
If your table is large enough and contains multiple different emails and no email has a very high frequency than all others, it will anonymize all data. But if you have a small number of different values in a column, it can reproduce some of them.
|
||||
You should look at the working algorithm of this tool works, and fine-tune its command line parameters.
|
||||
|
||||
This tool works fine only with an average amount of data (at least 1000s of rows).
|
||||
|
@ -44,8 +44,6 @@ SELECT sum(y) FROM t_null_big
|
||||
└────────┘
|
||||
```
|
||||
|
||||
The `sum` function interprets `NULL` as `0`. In particular, this means that if the function receives input of a selection where all the values are `NULL`, then the result will be `0`, not `NULL`.
|
||||
|
||||
Now you can use the `groupArray` function to create an array from the `y` column:
|
||||
|
||||
``` sql
|
||||
|
@ -0,0 +1,37 @@
|
||||
---
|
||||
toc_priority: 150
|
||||
---
|
||||
|
||||
## initializeAggregation {#initializeaggregation}
|
||||
|
||||
Initializes aggregation for your input rows. It is intended for the functions with the suffix `State`.
|
||||
Use it for tests or to process columns of types `AggregateFunction` and `AggregationgMergeTree`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
initializeAggregation (aggregate_function, column_1, column_2);
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `aggregate_function` — Name of the aggregation function. The state of this function — the creating one. [String](../../../sql-reference/data-types/string.md#string).
|
||||
- `column_n` — The column to translate it into the function as it's argument. [String](../../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Returned value(s)**
|
||||
|
||||
Returns the result of the aggregation for your input rows. The return type will be the same as the return type of function, that `initializeAgregation` takes as first argument.
|
||||
For example for functions with the suffix `State` the return type will be `AggregateFunction`.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM system.numbers LIMIT 10000);
|
||||
```
|
||||
Result:
|
||||
|
||||
┌─uniqMerge(state)─┐
|
||||
│ 3 │
|
||||
└──────────────────┘
|
@ -535,18 +535,7 @@ dateDiff('unit', startdate, enddate, [timezone])
|
||||
|
||||
- `unit` — Time unit, in which the returned value is expressed. [String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
|
||||
Supported values:
|
||||
|
||||
| unit |
|
||||
| ---- |
|
||||
|second |
|
||||
|minute |
|
||||
|hour |
|
||||
|day |
|
||||
|week |
|
||||
|month |
|
||||
|quarter |
|
||||
|year |
|
||||
Supported values: second, minute, hour, day, week, month, quarter, year.
|
||||
|
||||
- `startdate` — The first time value to compare. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
|
||||
|
||||
|
381
docs/en/sql-reference/functions/encryption-functions.md
Normal file
381
docs/en/sql-reference/functions/encryption-functions.md
Normal file
@ -0,0 +1,381 @@
|
||||
---
|
||||
toc_priority: 67
|
||||
toc_title: Encryption
|
||||
---
|
||||
|
||||
# Encryption functions {#encryption-functions}
|
||||
|
||||
These functions implement encryption and decryption of data with AES (Advanced Encryption Standard) algorithm.
|
||||
|
||||
Key length depends on encryption mode. It is 16, 24, and 32 bytes long for `-128-`, `-196-`, and `-256-` modes respectively.
|
||||
|
||||
Initialization vector length is always 16 bytes (bytes in excess of 16 are ignored).
|
||||
|
||||
Note that these functions work slowly.
|
||||
|
||||
## encrypt {#encrypt}
|
||||
|
||||
This function encrypts data using these modes:
|
||||
|
||||
- aes-128-ecb, aes-192-ecb, aes-256-ecb
|
||||
- aes-128-cbc, aes-192-cbc, aes-256-cbc
|
||||
- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1
|
||||
- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8
|
||||
- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128
|
||||
- aes-128-ofb, aes-192-ofb, aes-256-ofb
|
||||
- aes-128-gcm, aes-192-gcm, aes-256-gcm
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
encrypt('mode', 'plaintext', 'key' [, iv, aad])
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `plaintext` — Text thats need to be encrypted. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — Initialization vector. Required for `-gcm` modes, optinal for others. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `aad` — Additional authenticated data. It isn't encrypted, but it affects decryption. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Ciphered String. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Examples**
|
||||
|
||||
Create this table:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE encryption_test
|
||||
(
|
||||
input String,
|
||||
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
|
||||
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
|
||||
key32 String DEFAULT substring(key, 1, 32),
|
||||
key24 String DEFAULT substring(key, 1, 24),
|
||||
key16 String DEFAULT substring(key, 1, 16)
|
||||
) Engine = Memory;
|
||||
```
|
||||
|
||||
Insert this data:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
|
||||
```
|
||||
|
||||
Example without `iv`:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-128-ecb' AS mode, hex(encrypt(mode, input, key16)) FROM encryption_test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(encrypt('aes-128-ecb', input, key16))────────────────────────┐
|
||||
│ aes-128-ecb │ 4603E6862B0D94BBEC68E0B0DF51D60F │
|
||||
│ aes-128-ecb │ 3004851B86D3F3950672DE7085D27C03 │
|
||||
│ aes-128-ecb │ E807F8C8D40A11F65076361AFC7D8B68D8658C5FAA6457985CAA380F16B3F7E4 │
|
||||
└─────────────┴──────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Example with `iv`:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-256-ctr' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(encrypt('aes-256-ctr', input, key32, iv))─┐
|
||||
│ aes-256-ctr │ │
|
||||
│ aes-256-ctr │ 7FB039F7 │
|
||||
│ aes-256-ctr │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2B325949 │
|
||||
└─────────────┴───────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Example with `-gcm`:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-256-gcm' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(encrypt('aes-256-gcm', input, key32, iv))──────────────────────────┐
|
||||
│ aes-256-gcm │ E99DBEBC01F021758352D7FBD9039EFA │
|
||||
│ aes-256-gcm │ 8742CE3A7B0595B281C712600D274CA881F47414 │
|
||||
│ aes-256-gcm │ A44FD73ACEB1A64BDE2D03808A2576EDBB60764CC6982DB9AF2C33C893D91B00C60DC5 │
|
||||
└─────────────┴────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Example with `-gcm` mode and with `aad`:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-192-gcm' AS mode, hex(encrypt(mode, input, key24, iv, 'AAD')) FROM encryption_test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(encrypt('aes-192-gcm', input, key24, iv, 'AAD'))───────────────────┐
|
||||
│ aes-192-gcm │ 04C13E4B1D62481ED22B3644595CB5DB │
|
||||
│ aes-192-gcm │ 9A6CF0FD2B329B04EAD18301818F016DF8F77447 │
|
||||
│ aes-192-gcm │ B961E9FD9B940EBAD7ADDA75C9F198A40797A5EA1722D542890CC976E21113BBB8A7AA │
|
||||
└─────────────┴────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## aes_encrypt_mysql {#aes_encrypt_mysql}
|
||||
|
||||
Compatible with mysql encryption and can be decrypted with [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt) function.
|
||||
|
||||
Supported encryption modes:
|
||||
|
||||
- aes-128-ecb, aes-192-ecb, aes-256-ecb
|
||||
- aes-128-cbc, aes-192-cbc, aes-256-cbc
|
||||
- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1
|
||||
- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8
|
||||
- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128
|
||||
- aes-128-ofb, aes-192-ofb, aes-256-ofb
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv])
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `plaintext` — Text that needs to be encrypted. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — Initialization vector. Optinal. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Ciphered String. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Examples**
|
||||
|
||||
Create this table:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE encryption_test
|
||||
(
|
||||
input String,
|
||||
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
|
||||
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
|
||||
key32 String DEFAULT substring(key, 1, 32),
|
||||
key24 String DEFAULT substring(key, 1, 24),
|
||||
key16 String DEFAULT substring(key, 1, 16)
|
||||
) Engine = Memory;
|
||||
```
|
||||
|
||||
Insert this data:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
|
||||
```
|
||||
|
||||
Example without `iv`:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-128-cbc' AS mode, hex(aes_encrypt_mysql(mode, input, key32)) FROM encryption_test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(aes_encrypt_mysql('aes-128-cbc', input, key32))──────────────┐
|
||||
│ aes-128-cbc │ FEA8CFDE6EE2C6E7A2CC6ADDC9F62C83 │
|
||||
│ aes-128-cbc │ 78B16CD4BE107660156124C5FEE6454A │
|
||||
│ aes-128-cbc │ 67C0B119D96F18E2823968D42871B3D179221B1E7EE642D628341C2B29BA2E18 │
|
||||
└─────────────┴──────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Example with `iv`:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-256-cfb128' AS mode, hex(aes_encrypt_mysql(mode, input, key32, iv)) FROM encryption_test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode───────────┬─hex(aes_encrypt_mysql('aes-256-cfb128', input, key32, iv))─┐
|
||||
│ aes-256-cfb128 │ │
|
||||
│ aes-256-cfb128 │ 7FB039F7 │
|
||||
│ aes-256-cfb128 │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2BB5174F │
|
||||
└────────────────┴────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## decrypt {#decrypt}
|
||||
|
||||
This function decrypts data using these modes:
|
||||
|
||||
- aes-128-ecb, aes-192-ecb, aes-256-ecb
|
||||
- aes-128-cbc, aes-192-cbc, aes-256-cbc
|
||||
- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1
|
||||
- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8
|
||||
- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128
|
||||
- aes-128-ofb, aes-192-ofb, aes-256-ofb
|
||||
- aes-128-gcm, aes-192-gcm, aes-256-gcm
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
decrypt('mode', 'ciphertext', 'key' [, iv, aad])
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — Initialization vector. Required for `-gcm` modes, optinal for others. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `aad` — Additional authenticated data. Won't decrypt if this value is incorrect. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Decrypted String. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Examples**
|
||||
|
||||
Create this table:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE encryption_test
|
||||
(
|
||||
input String,
|
||||
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
|
||||
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
|
||||
key32 String DEFAULT substring(key, 1, 32),
|
||||
key24 String DEFAULT substring(key, 1, 24),
|
||||
key16 String DEFAULT substring(key, 1, 16)
|
||||
) Engine = Memory;
|
||||
```
|
||||
|
||||
Insert this data:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
|
||||
SELECT 'aes-128-ecb' AS mode, decrypt(mode, encrypt(mode, input, key16), key16) FROM encryption_test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─mode────────┬─decrypt('aes-128-ecb', encrypt('aes-128-ecb', input, key16), key16)─┐
|
||||
│ aes-128-ecb │ │
|
||||
│ aes-128-ecb │ text │
|
||||
│ aes-128-ecb │ What Is ClickHouse? │
|
||||
└─────────────┴─────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## aes_decrypt_mysql {#aes_decrypt_mysql}
|
||||
|
||||
Compatible with mysql encryption and decrypts data encrypted with [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt) function.
|
||||
|
||||
Supported decryption modes:
|
||||
|
||||
- aes-128-ecb, aes-192-ecb, aes-256-ecb
|
||||
- aes-128-cbc, aes-192-cbc, aes-256-cbc
|
||||
- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1
|
||||
- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8
|
||||
- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128
|
||||
- aes-128-ofb, aes-192-ofb, aes-256-ofb
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — Initialization vector. Optinal. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Decrypted String. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Examples**
|
||||
|
||||
Create this table:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE encryption_test
|
||||
(
|
||||
input String,
|
||||
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
|
||||
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
|
||||
key32 String DEFAULT substring(key, 1, 32),
|
||||
key24 String DEFAULT substring(key, 1, 24),
|
||||
key16 String DEFAULT substring(key, 1, 16)
|
||||
) Engine = Memory;
|
||||
```
|
||||
|
||||
Insert this data:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-128-cbc' AS mode, aes_decrypt_mysql(mode, aes_encrypt_mysql(mode, input, key), key) FROM encryption_test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─aes_decrypt_mysql('aes-128-cbc', aes_encrypt_mysql('aes-128-cbc', input, key), key)─┐
|
||||
│ aes-128-cbc │ │
|
||||
│ aes-128-cbc │ text │
|
||||
│ aes-128-cbc │ What Is ClickHouse? │
|
||||
└─────────────┴─────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/encryption_functions/) <!--hide-->
|
@ -306,3 +306,67 @@ execute_native_thread_routine
|
||||
start_thread
|
||||
clone
|
||||
```
|
||||
## tid {#tid}
|
||||
|
||||
Returns id of the thread, in which current [Block](https://clickhouse.tech/docs/en/development/architecture/#block) is processed.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
tid()
|
||||
```
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Current thread id. [Uint64](../../sql-reference/data-types/int-uint.md#uint-ranges).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT tid();
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─tid()─┐
|
||||
│ 3878 │
|
||||
└───────┘
|
||||
```
|
||||
## logTrace {#logtrace}
|
||||
|
||||
Emits trace log message to server log for each [Block](https://clickhouse.tech/docs/en/development/architecture/#block).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
logTrace('message')
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `message` — Message that is emitted to server log. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Always returns 0.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT logTrace('logTrace message');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─logTrace('logTrace message')─┐
|
||||
│ 0 │
|
||||
└──────────────────────────────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/query_language/functions/introspection/) <!--hide-->
|
||||
|
@ -115,7 +115,21 @@ Returns the “first significant subdomain”. This is a non-standard concept sp
|
||||
|
||||
Returns the part of the domain that includes top-level subdomains up to the “first significant subdomain” (see the explanation above).
|
||||
|
||||
For example, `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`.
|
||||
For example:
|
||||
|
||||
- `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`.
|
||||
- `cutToFirstSignificantSubdomain('www.tr') = 'tr'`.
|
||||
- `cutToFirstSignificantSubdomain('tr') = ''`.
|
||||
|
||||
### cutToFirstSignificantSubdomainWithWWW {#cuttofirstsignificantsubdomainwithwww}
|
||||
|
||||
Returns the part of the domain that includes top-level subdomains up to the “first significant subdomain”, without stripping "www".
|
||||
|
||||
For example:
|
||||
|
||||
- `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`.
|
||||
- `cutToFirstSignificantSubdomain('www.tr') = 'www.tr'`.
|
||||
- `cutToFirstSignificantSubdomain('tr') = ''`.
|
||||
|
||||
### port(URL\[, default_port = 0\]) {#port}
|
||||
|
||||
|
@ -20,7 +20,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster]
|
||||
PRIMARY KEY key1, key2
|
||||
SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN]))
|
||||
LAYOUT(LAYOUT_NAME([param_name param_value]))
|
||||
LIFETIME([MIN val1] MAX val2)
|
||||
LIFETIME({MIN min_val MAX max_val | max_val})
|
||||
```
|
||||
|
||||
External dictionary structure consists of attributes. Dictionary attributes are specified similarly to table columns. The only required attribute property is its type, all other properties may have default values.
|
||||
|
@ -27,9 +27,9 @@ It is applicable when selecting data from tables that use the [MergeTree](../../
|
||||
|
||||
### Drawbacks {#drawbacks}
|
||||
|
||||
Queries that use `FINAL` are executed not as fast as similar queries that don’t, because:
|
||||
Queries that use `FINAL` are executed slightly slower than similar queries that don’t, because:
|
||||
|
||||
- Query is executed in a single thread and data is merged during query execution.
|
||||
- Data is merged during query execution.
|
||||
- Queries with `FINAL` read primary key columns in addition to the columns specified in the query.
|
||||
|
||||
**In most cases, avoid using `FINAL`.** The common approach is to use different queries that assume the background processes of the `MergeTree` engine have’t happened yet and deal with it by applying aggregation (for example, to discard duplicates). {## TODO: examples ##}
|
||||
|
@ -6,7 +6,7 @@ toc_title: GROUP BY
|
||||
|
||||
`GROUP BY` clause switches the `SELECT` query into an aggregation mode, which works as follows:
|
||||
|
||||
- `GROUP BY` clause contains a list of expressions (or a single expression, which is considered to be the list of length one). This list acts as a “grouping key”, while each individual expression will be referred to as a “key expressions”.
|
||||
- `GROUP BY` clause contains a list of expressions (or a single expression, which is considered to be the list of length one). This list acts as a “grouping key”, while each individual expression will be referred to as a “key expression”.
|
||||
- All the expressions in the [SELECT](../../../sql-reference/statements/select/index.md), [HAVING](../../../sql-reference/statements/select/having.md), and [ORDER BY](../../../sql-reference/statements/select/order-by.md) clauses **must** be calculated based on key expressions **or** on [aggregate functions](../../../sql-reference/aggregate-functions/index.md) over non-key expressions (including plain columns). In other words, each column selected from the table must be used either in a key expression or inside an aggregate function, but not both.
|
||||
- Result of aggregating `SELECT` query will contain as many rows as there were unique values of “grouping key” in source table. Usually this signficantly reduces the row count, often by orders of magnitude, but not necessarily: row count stays the same if all “grouping key” values were distinct.
|
||||
|
||||
@ -45,6 +45,154 @@ You can see that `GROUP BY` for `y = NULL` summed up `x`, as if `NULL` is this v
|
||||
|
||||
If you pass several keys to `GROUP BY`, the result will give you all the combinations of the selection, as if `NULL` were a specific value.
|
||||
|
||||
## WITH ROLLUP Modifier {#with-rollup-modifier}
|
||||
|
||||
`WITH ROLLUP` modifier is used to calculate subtotals for the key expressions, based on their order in the `GROUP BY` list. The subtotals rows are added after the result table.
|
||||
|
||||
The subtotals are calculated in the reverse order: at first subtotals are calculated for the last key expression in the list, then for the previous one, and so on up to the first key expression.
|
||||
|
||||
In the subtotals rows the values of already "grouped" key expressions are set to `0` or empty line.
|
||||
|
||||
!!! note "Note"
|
||||
Mind that [HAVING](../../../sql-reference/statements/select/having.md) clause can affect the subtotals results.
|
||||
|
||||
**Example**
|
||||
|
||||
Consider the table t:
|
||||
|
||||
```text
|
||||
┌─year─┬─month─┬─day─┐
|
||||
│ 2019 │ 1 │ 5 │
|
||||
│ 2019 │ 1 │ 15 │
|
||||
│ 2020 │ 1 │ 5 │
|
||||
│ 2020 │ 1 │ 15 │
|
||||
│ 2020 │ 10 │ 5 │
|
||||
│ 2020 │ 10 │ 15 │
|
||||
└──────┴───────┴─────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH ROLLUP;
|
||||
```
|
||||
As `GROUP BY` section has three key expressions, the result contains four tables with subtotals "rolled up" from right to left:
|
||||
|
||||
- `GROUP BY year, month, day`;
|
||||
- `GROUP BY year, month` (and `day` column is filled with zeros);
|
||||
- `GROUP BY year` (now `month, day` columns are both filled with zeros);
|
||||
- and totals (and all three key expression columns are zeros).
|
||||
|
||||
```text
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2020 │ 10 │ 15 │ 1 │
|
||||
│ 2020 │ 1 │ 5 │ 1 │
|
||||
│ 2019 │ 1 │ 5 │ 1 │
|
||||
│ 2020 │ 1 │ 15 │ 1 │
|
||||
│ 2019 │ 1 │ 15 │ 1 │
|
||||
│ 2020 │ 10 │ 5 │ 1 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2019 │ 1 │ 0 │ 2 │
|
||||
│ 2020 │ 1 │ 0 │ 2 │
|
||||
│ 2020 │ 10 │ 0 │ 2 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2019 │ 0 │ 0 │ 2 │
|
||||
│ 2020 │ 0 │ 0 │ 4 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 0 │ 0 │ 6 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
```
|
||||
|
||||
## WITH CUBE Modifier {#with-cube-modifier}
|
||||
|
||||
`WITH CUBE` modifier is used to calculate subtotals for every combination of the key expressions in the `GROUP BY` list. The subtotals rows are added after the result table.
|
||||
|
||||
In the subtotals rows the values of all "grouped" key expressions are set to `0` or empty line.
|
||||
|
||||
!!! note "Note"
|
||||
Mind that [HAVING](../../../sql-reference/statements/select/having.md) clause can affect the subtotals results.
|
||||
|
||||
**Example**
|
||||
|
||||
Consider the table t:
|
||||
|
||||
```text
|
||||
┌─year─┬─month─┬─day─┐
|
||||
│ 2019 │ 1 │ 5 │
|
||||
│ 2019 │ 1 │ 15 │
|
||||
│ 2020 │ 1 │ 5 │
|
||||
│ 2020 │ 1 │ 15 │
|
||||
│ 2020 │ 10 │ 5 │
|
||||
│ 2020 │ 10 │ 15 │
|
||||
└──────┴───────┴─────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH CUBE;
|
||||
```
|
||||
|
||||
As `GROUP BY` section has three key expressions, the result contains eight tables with subtotals for all key expression combinations:
|
||||
|
||||
- `GROUP BY year, month, day`
|
||||
- `GROUP BY year, month`
|
||||
- `GROUP BY year, day`
|
||||
- `GROUP BY year`
|
||||
- `GROUP BY month, day`
|
||||
- `GROUP BY month`
|
||||
- `GROUP BY day`
|
||||
- and totals.
|
||||
|
||||
Columns, excluded from `GROUP BY`, are filled with zeros.
|
||||
|
||||
```text
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2020 │ 10 │ 15 │ 1 │
|
||||
│ 2020 │ 1 │ 5 │ 1 │
|
||||
│ 2019 │ 1 │ 5 │ 1 │
|
||||
│ 2020 │ 1 │ 15 │ 1 │
|
||||
│ 2019 │ 1 │ 15 │ 1 │
|
||||
│ 2020 │ 10 │ 5 │ 1 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2019 │ 1 │ 0 │ 2 │
|
||||
│ 2020 │ 1 │ 0 │ 2 │
|
||||
│ 2020 │ 10 │ 0 │ 2 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2020 │ 0 │ 5 │ 2 │
|
||||
│ 2019 │ 0 │ 5 │ 1 │
|
||||
│ 2020 │ 0 │ 15 │ 2 │
|
||||
│ 2019 │ 0 │ 15 │ 1 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2019 │ 0 │ 0 │ 2 │
|
||||
│ 2020 │ 0 │ 0 │ 4 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 1 │ 5 │ 2 │
|
||||
│ 0 │ 10 │ 15 │ 1 │
|
||||
│ 0 │ 10 │ 5 │ 1 │
|
||||
│ 0 │ 1 │ 15 │ 2 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 1 │ 0 │ 4 │
|
||||
│ 0 │ 10 │ 0 │ 2 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 0 │ 5 │ 3 │
|
||||
│ 0 │ 0 │ 15 │ 3 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 0 │ 0 │ 6 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
```
|
||||
|
||||
|
||||
## WITH TOTALS Modifier {#with-totals-modifier}
|
||||
|
||||
If the `WITH TOTALS` modifier is specified, another row will be calculated. This row will have key columns containing default values (zeros or empty lines), and columns of aggregate functions with the values calculated across all the rows (the “total” values).
|
||||
@ -88,8 +236,6 @@ SELECT
|
||||
FROM hits
|
||||
```
|
||||
|
||||
However, in contrast to standard SQL, if the table doesn’t have any rows (either there aren’t any at all, or there aren’t any after using WHERE to filter), an empty result is returned, and not the result from one of the rows containing the initial values of aggregate functions.
|
||||
|
||||
As opposed to MySQL (and conforming to standard SQL), you can’t get some value of some column that is not in a key or aggregate function (except constant expressions). To work around this, you can use the ‘any’ aggregate function (get the first encountered value) or ‘min/max’.
|
||||
|
||||
Example:
|
||||
@ -105,10 +251,6 @@ GROUP BY domain
|
||||
|
||||
For every different key value encountered, `GROUP BY` calculates a set of aggregate function values.
|
||||
|
||||
`GROUP BY` is not supported for array columns.
|
||||
|
||||
A constant can’t be specified as arguments for aggregate functions. Example: `sum(1)`. Instead of this, you can get rid of the constant. Example: `count()`.
|
||||
|
||||
## Implementation Details {#implementation-details}
|
||||
|
||||
Aggregation is one of the most important features of a column-oriented DBMS, and thus it’s implementation is one of the most heavily optimized parts of ClickHouse. By default, aggregation is done in memory using a hash-table. It has 40+ specializations that are chosen automatically depending on “grouping key” data types.
|
||||
|
@ -20,7 +20,7 @@ SELECT [DISTINCT] expr_list
|
||||
[GLOBAL] [ANY|ALL|ASOF] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER|SEMI|ANTI] JOIN (subquery)|table (ON <expr_list>)|(USING <column_list>)
|
||||
[PREWHERE expr]
|
||||
[WHERE expr]
|
||||
[GROUP BY expr_list] [WITH TOTALS]
|
||||
[GROUP BY expr_list] [WITH ROLLUP|WITH CUBE] [WITH TOTALS]
|
||||
[HAVING expr]
|
||||
[ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr]
|
||||
[LIMIT [offset_value, ]n BY columns]
|
||||
@ -159,4 +159,111 @@ If the query omits the `DISTINCT`, `GROUP BY` and `ORDER BY` clauses and the `IN
|
||||
|
||||
For more information, see the section “Settings”. It is possible to use external sorting (saving temporary tables to a disk) and external aggregation.
|
||||
|
||||
{## [Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/) ##}
|
||||
## SELECT modifiers {#select-modifiers}
|
||||
|
||||
You can use the following modifiers in `SELECT` queries.
|
||||
|
||||
### APPLY {#apply-modifier}
|
||||
|
||||
Allows you to invoke some function for each row returned by an outer table expression of a query.
|
||||
|
||||
**Syntax:**
|
||||
|
||||
``` sql
|
||||
SELECT <expr> APPLY( <func> ) FROM [db.]table_name
|
||||
```
|
||||
|
||||
**Example:**
|
||||
|
||||
``` sql
|
||||
CREATE TABLE columns_transformers (i Int64, j Int16, k Int64) ENGINE = MergeTree ORDER by (i);
|
||||
INSERT INTO columns_transformers VALUES (100, 10, 324), (120, 8, 23);
|
||||
SELECT * APPLY(sum) FROM columns_transformers;
|
||||
```
|
||||
|
||||
```
|
||||
┌─sum(i)─┬─sum(j)─┬─sum(k)─┐
|
||||
│ 220 │ 18 │ 347 │
|
||||
└────────┴────────┴────────┘
|
||||
```
|
||||
|
||||
### EXCEPT {#except-modifier}
|
||||
|
||||
Specifies the names of one or more columns to exclude from the result. All matching column names are omitted from the output.
|
||||
|
||||
**Syntax:**
|
||||
|
||||
``` sql
|
||||
SELECT <expr> EXCEPT ( col_name1 [, col_name2, col_name3, ...] ) FROM [db.]table_name
|
||||
```
|
||||
|
||||
**Example:**
|
||||
|
||||
``` sql
|
||||
SELECT * EXCEPT (i) from columns_transformers;
|
||||
```
|
||||
|
||||
```
|
||||
┌──j─┬───k─┐
|
||||
│ 10 │ 324 │
|
||||
│ 8 │ 23 │
|
||||
└────┴─────┘
|
||||
```
|
||||
|
||||
### REPLACE {#replace-modifier}
|
||||
|
||||
Specifies one or more [expression aliases](../../../sql-reference/syntax.md#syntax-expression_aliases). Each alias must match a column name from the `SELECT *` statement. In the output column list, the column that matches the alias is replaced by the expression in that `REPLACE`.
|
||||
|
||||
This modifier does not change the names or order of columns. However, it can change the value and the value type.
|
||||
|
||||
**Syntax:**
|
||||
|
||||
``` sql
|
||||
SELECT <expr> REPLACE( <expr> AS col_name) from [db.]table_name
|
||||
```
|
||||
|
||||
**Example:**
|
||||
|
||||
``` sql
|
||||
SELECT * REPLACE(i + 1 AS i) from columns_transformers;
|
||||
```
|
||||
|
||||
```
|
||||
┌───i─┬──j─┬───k─┐
|
||||
│ 101 │ 10 │ 324 │
|
||||
│ 121 │ 8 │ 23 │
|
||||
└─────┴────┴─────┘
|
||||
```
|
||||
|
||||
### Modifier Combinations {#modifier-combinations}
|
||||
|
||||
You can use each modifier separately or combine them.
|
||||
|
||||
**Examples:**
|
||||
|
||||
Using the same modifier multiple times.
|
||||
|
||||
``` sql
|
||||
SELECT COLUMNS('[jk]') APPLY(toString) APPLY(length) APPLY(max) from columns_transformers;
|
||||
```
|
||||
|
||||
```
|
||||
┌─max(length(toString(j)))─┬─max(length(toString(k)))─┐
|
||||
│ 2 │ 3 │
|
||||
└──────────────────────────┴──────────────────────────┘
|
||||
```
|
||||
|
||||
Using multiple modifiers in a single query.
|
||||
|
||||
``` sql
|
||||
SELECT * REPLACE(i + 1 AS i) EXCEPT (j) APPLY(sum) from columns_transformers;
|
||||
```
|
||||
|
||||
```
|
||||
┌─sum(plus(i, 1))─┬─sum(k)─┐
|
||||
│ 222 │ 347 │
|
||||
└─────────────────┴────────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/)
|
||||
<!--hide-->
|
||||
|
@ -291,7 +291,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster]
|
||||
PRIMARY KEY key1, key2
|
||||
SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN]))
|
||||
LAYOUT(LAYOUT_NAME([param_name param_value]))
|
||||
LIFETIME([MIN val1] MAX val2)
|
||||
LIFETIME({MIN min_val MAX max_val | max_val})
|
||||
```
|
||||
|
||||
Crear [diccionario externo](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) con dado [estructura](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [fuente](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [diseño](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) y [vida](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md).
|
||||
|
@ -291,7 +291,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster]
|
||||
PRIMARY KEY key1, key2
|
||||
SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN]))
|
||||
LAYOUT(LAYOUT_NAME([param_name param_value]))
|
||||
LIFETIME([MIN val1] MAX val2)
|
||||
LIFETIME({MIN min_val MAX max_val | max_val})
|
||||
```
|
||||
|
||||
ایجاد [فرهنگ لغت خارجی](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) با توجه به [ساختار](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [متن](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [طرحبندی](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) و [طول عمر](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md).
|
||||
|
@ -291,7 +291,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster]
|
||||
PRIMARY KEY key1, key2
|
||||
SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN]))
|
||||
LAYOUT(LAYOUT_NAME([param_name param_value]))
|
||||
LIFETIME([MIN val1] MAX val2)
|
||||
LIFETIME({MIN min_val MAX max_val | max_val})
|
||||
```
|
||||
|
||||
Crée [externe dictionnaire](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) avec le [structure](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [source](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [disposition](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) et [vie](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md).
|
||||
|
@ -291,7 +291,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster]
|
||||
PRIMARY KEY key1, key2
|
||||
SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN]))
|
||||
LAYOUT(LAYOUT_NAME([param_name param_value]))
|
||||
LIFETIME([MIN val1] MAX val2)
|
||||
LIFETIME({MIN min_val MAX max_val | max_val})
|
||||
```
|
||||
|
||||
作成 [外部辞書](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) 与えられたと [構造](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [ソース](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [レイアウト](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) と [生涯](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md).
|
||||
|
@ -2187,4 +2187,10 @@ SELECT CAST(toNullable(toInt32(0)) AS Int32) as x, toTypeName(x);
|
||||
|
||||
Значение по умолчанию: `1`.
|
||||
|
||||
## output_format_tsv_null_representation {#output_format_tsv_null_representation}
|
||||
|
||||
Позволяет настраивать представление `NULL` для формата выходных данных [TSV](../../interfaces/formats.md#tabseparated). Настройка управляет форматом выходных данных, `\N` является единственным поддерживаемым представлением для формата входных данных TSV.
|
||||
|
||||
Значение по умолчанию: `\N`.
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) <!--hide-->
|
||||
|
70
docs/ru/operations/system-tables/replicated_fetches.md
Normal file
70
docs/ru/operations/system-tables/replicated_fetches.md
Normal file
@ -0,0 +1,70 @@
|
||||
# system.replicated_fetches {#system_tables-replicated_fetches}
|
||||
|
||||
Содержит информацию о выполняемых в данный момент фоновых операциях скачивания кусков данных с других реплик.
|
||||
|
||||
Столбцы:
|
||||
|
||||
- `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных.
|
||||
|
||||
- `table` ([String](../../sql-reference/data-types/string.md)) — имя таблицы.
|
||||
|
||||
- `elapsed` ([Float64](../../sql-reference/data-types/float.md)) — время, прошедшее от момента начала скачивания куска, в секундах.
|
||||
|
||||
- `progress` ([Float64](../../sql-reference/data-types/float.md)) — доля выполненной работы от 0 до 1.
|
||||
|
||||
- `result_part_name` ([String](../../sql-reference/data-types/string.md)) — имя скачиваемого куска.
|
||||
|
||||
- `result_part_path` ([String](../../sql-reference/data-types/string.md)) — абсолютный путь к скачиваемому куску.
|
||||
|
||||
- `partition_id` ([String](../../sql-reference/data-types/string.md)) — идентификатор партиции.
|
||||
|
||||
- `total_size_bytes_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — общий размер сжатой информации в скачиваемом куске в байтах.
|
||||
|
||||
- `bytes_read_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — размер сжатой информации, считанной из скачиваемого куска, в байтах.
|
||||
|
||||
- `source_replica_path` ([String](../../sql-reference/data-types/string.md)) — абсолютный путь к исходной реплике.
|
||||
|
||||
- `source_replica_hostname` ([String](../../sql-reference/data-types/string.md)) — имя хоста исходной реплики.
|
||||
|
||||
- `source_replica_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — номер порта исходной реплики.
|
||||
|
||||
- `interserver_scheme` ([String](../../sql-reference/data-types/string.md)) — имя межсерверной схемы.
|
||||
|
||||
- `URI` ([String](../../sql-reference/data-types/string.md)) — универсальный идентификатор ресурса.
|
||||
|
||||
- `to_detached` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, указывающий на использование выражения `TO DETACHED` в текущих фоновых операциях.
|
||||
|
||||
- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — идентификатор потока.
|
||||
|
||||
**Пример**
|
||||
|
||||
``` sql
|
||||
SELECT * FROM system.replicated_fetches LIMIT 1 FORMAT Vertical;
|
||||
```
|
||||
|
||||
``` text
|
||||
Row 1:
|
||||
──────
|
||||
database: default
|
||||
table: t
|
||||
elapsed: 7.243039876
|
||||
progress: 0.41832135995612835
|
||||
result_part_name: all_0_0_0
|
||||
result_part_path: /var/lib/clickhouse/store/700/70080a04-b2de-4adf-9fa5-9ea210e81766/all_0_0_0/
|
||||
partition_id: all
|
||||
total_size_bytes_compressed: 1052783726
|
||||
bytes_read_compressed: 440401920
|
||||
source_replica_path: /clickhouse/test/t/replicas/1
|
||||
source_replica_hostname: node1
|
||||
source_replica_port: 9009
|
||||
interserver_scheme: http
|
||||
URI: http://node1:9009/?endpoint=DataPartsExchange%3A%2Fclickhouse%2Ftest%2Ft%2Freplicas%2F1&part=all_0_0_0&client_protocol_version=4&compress=false
|
||||
to_detached: 0
|
||||
thread_id: 54
|
||||
```
|
||||
|
||||
**Смотрите также**
|
||||
|
||||
- [Управление таблицами ReplicatedMergeTree](../../sql-reference/statements/system/#query-language-system-replicated)
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/en/operations/system_tables/replicated_fetches) <!--hide-->
|
43
docs/ru/operations/utilities/clickhouse-obfuscator.md
Normal file
43
docs/ru/operations/utilities/clickhouse-obfuscator.md
Normal file
@ -0,0 +1,43 @@
|
||||
# Обфускатор ClickHouse
|
||||
|
||||
Простой инструмент для обфускации табличных данных.
|
||||
|
||||
Он считывает данные входной таблицы и создает выходную таблицу, которая сохраняет некоторые свойства входных данных, но при этом содержит другие данные.
|
||||
|
||||
Это позволяет публиковать практически реальные данные и использовать их в тестах на производительность.
|
||||
|
||||
Обфускатор предназначен для сохранения следующих свойств данных:
|
||||
- кардинальность (количество уникальных данных) для каждого столбца и каждого кортежа столбцов;
|
||||
- условная кардинальность: количество уникальных данных одного столбца в соответствии со значением другого столбца;
|
||||
- вероятностные распределения абсолютного значения целых чисел; знак числа типа Int; показатель степени и знак для чисел с плавающей запятой;
|
||||
- вероятностное распределение длины строк;
|
||||
- вероятность нулевых значений чисел; пустые строки и массивы, `NULL`;
|
||||
- степень сжатия данных алгоритмом LZ77 и семейством энтропийных кодеков;
|
||||
|
||||
- непрерывность (величина разницы) значений времени в таблице; непрерывность значений с плавающей запятой;
|
||||
- дату из значений `DateTime`;
|
||||
|
||||
- кодировка UTF-8 значений строки;
|
||||
- строковые значения выглядят естественным образом.
|
||||
|
||||
|
||||
Большинство перечисленных выше свойств пригодны для тестирования производительности. Чтение данных, фильтрация, агрегирование и сортировка будут работать почти с той же скоростью, что и исходные данные, благодаря сохраненной кардинальности, величине, степени сжатия и т. д.
|
||||
|
||||
Он работает детерминированно. Вы задаёте значение инициализатора, а преобразование полностью определяется входными данными и инициализатором.
|
||||
|
||||
Некоторые преобразования выполняются один к одному, и их можно отменить. Поэтому нужно использовать большое значение инициализатора и хранить его в секрете.
|
||||
|
||||
|
||||
Обфускатор использует некоторые криптографические примитивы для преобразования данных, но, с криптографической точки зрения, результат будет небезопасным. В нем могут сохраниться данные, которые не следует публиковать.
|
||||
|
||||
|
||||
Он всегда оставляет без изменений числа 0, 1, -1, даты, длины массивов и нулевые флаги.
|
||||
Например, если у вас есть столбец `IsMobile` в таблице со значениями 0 и 1, то в преобразованных данных он будет иметь то же значение.
|
||||
|
||||
Таким образом, пользователь сможет посчитать точное соотношение мобильного трафика.
|
||||
|
||||
Давайте рассмотрим случай, когда у вас есть какие-то личные данные в таблице (например, электронная почта пользователя), и вы не хотите их публиковать.
|
||||
Если ваша таблица достаточно большая и содержит несколько разных электронных почтовых адресов, и ни один из них не встречается часто, то обфускатор полностью анонимизирует все данные. Но, если у вас есть небольшое количество разных значений в столбце, он может скопировать некоторые из них.
|
||||
В этом случае вам следует посмотреть на алгоритм работы инструмента и настроить параметры командной строки.
|
||||
|
||||
Обфускатор полезен в работе со средним объемом данных (не менее 1000 строк).
|
@ -44,8 +44,6 @@ SELECT sum(y) FROM t_null_big
|
||||
└────────┘
|
||||
```
|
||||
|
||||
Функция `sum` работает с `NULL` как с `0`. В частности, это означает, что если на вход в функцию подать выборку, где все значения `NULL`, то результат будет `0`, а не `NULL`.
|
||||
|
||||
Теперь с помощью функции `groupArray` сформируем массив из столбца `y`:
|
||||
|
||||
``` sql
|
||||
|
@ -0,0 +1,40 @@
|
||||
---
|
||||
toc_priority: 150
|
||||
---
|
||||
|
||||
## initializeAggregation {#initializeaggregation}
|
||||
|
||||
Инициализирует агрегацию для введеных строчек. Предназначена для функций с суффиксом `State`.
|
||||
Поможет вам проводить тесты или работать со столбцами типов: `AggregateFunction` и `AggregationgMergeTree`.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
initializeAggregation (aggregate_function, column_1, column_2);
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `aggregate_function` — название функции агрегации, состояние которой нужно создать. [String](../../../sql-reference/data-types/string.md#string).
|
||||
- `column_n` — столбец, который передается в функцию агрегации как аргумент. [String](../../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
Возвращает результат агрегации введенной информации. Тип возвращаемого значения такой же, как и для функции, которая становится первым аргументом для `initializeAgregation`.
|
||||
|
||||
Пример:
|
||||
|
||||
Возвращаемый тип функций с суффиксом `State` — `AggregateFunction`.
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
```sql
|
||||
SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM system.numbers LIMIT 10000);
|
||||
```
|
||||
Результат:
|
||||
|
||||
┌─uniqMerge(state)─┐
|
||||
│ 3 │
|
||||
└──────────────────┘
|
382
docs/ru/sql-reference/functions/encryption-functions.md
Normal file
382
docs/ru/sql-reference/functions/encryption-functions.md
Normal file
@ -0,0 +1,382 @@
|
||||
---
|
||||
toc_priority: 67
|
||||
toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438 \u0434\u043b\u044f \u0448\u0438\u0444\u0440\u043e\u0432\u0430\u043d\u0438\u044f"
|
||||
---
|
||||
|
||||
# Функции шифрования {#encryption-functions}
|
||||
|
||||
Даннвые функции реализуют шифрование и расшифровку данных с помощью AES (Advanced Encryption Standard) алгоритма.
|
||||
|
||||
Длина ключа зависит от режима шифрования. Он может быть длинной в 16, 24 и 32 байта для режимов шифрования `-128-`, `-196-` и `-256-` соответственно.
|
||||
|
||||
Длина инициализирующего вектора всегда 16 байт (лишнии байты игнорируются).
|
||||
|
||||
Обратите внимание, что эти функции работают медленно.
|
||||
|
||||
## encrypt {#encrypt}
|
||||
|
||||
Функция поддерживает шифрование данных следующими режимами:
|
||||
|
||||
- aes-128-ecb, aes-192-ecb, aes-256-ecb
|
||||
- aes-128-cbc, aes-192-cbc, aes-256-cbc
|
||||
- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1
|
||||
- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8
|
||||
- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128
|
||||
- aes-128-ofb, aes-192-ofb, aes-256-ofb
|
||||
- aes-128-gcm, aes-192-gcm, aes-256-gcm
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
encrypt('mode', 'plaintext', 'key' [, iv, aad])
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `mode` — режим шифрования. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `plaintext` — текст, который будет зашифрован. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `key` — ключ шифрования. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — инициализирующий вектор. Обязателен для `-gcm` режимов, для остальных режимов необязателен. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `aad` — дополнительные аутентифицированные данные. Не шифруются, но влияют на расшифровку. Параметр работает только с `-gcm` режимами. Для остальных вызовет исключение. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Зашифрованная строка. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Примеры**
|
||||
|
||||
Создадим такую таблицу:
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE encryption_test
|
||||
(
|
||||
input String,
|
||||
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
|
||||
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
|
||||
key32 String DEFAULT substring(key, 1, 32),
|
||||
key24 String DEFAULT substring(key, 1, 24),
|
||||
key16 String DEFAULT substring(key, 1, 16)
|
||||
) Engine = Memory;
|
||||
```
|
||||
|
||||
Вставим эти данные:
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
|
||||
```
|
||||
|
||||
Пример без `iv`:
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-128-ecb' AS mode, hex(encrypt(mode, input, key16)) FROM encryption_test;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(encrypt('aes-128-ecb', input, key16))────────────────────────┐
|
||||
│ aes-128-ecb │ 4603E6862B0D94BBEC68E0B0DF51D60F │
|
||||
│ aes-128-ecb │ 3004851B86D3F3950672DE7085D27C03 │
|
||||
│ aes-128-ecb │ E807F8C8D40A11F65076361AFC7D8B68D8658C5FAA6457985CAA380F16B3F7E4 │
|
||||
└─────────────┴──────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Пример с `iv`:
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-256-ctr' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(encrypt('aes-256-ctr', input, key32, iv))─┐
|
||||
│ aes-256-ctr │ │
|
||||
│ aes-256-ctr │ 7FB039F7 │
|
||||
│ aes-256-ctr │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2B325949 │
|
||||
└─────────────┴───────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Пример в режиме `-gcm`:
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-256-gcm' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(encrypt('aes-256-gcm', input, key32, iv))──────────────────────────┐
|
||||
│ aes-256-gcm │ E99DBEBC01F021758352D7FBD9039EFA │
|
||||
│ aes-256-gcm │ 8742CE3A7B0595B281C712600D274CA881F47414 │
|
||||
│ aes-256-gcm │ A44FD73ACEB1A64BDE2D03808A2576EDBB60764CC6982DB9AF2C33C893D91B00C60DC5 │
|
||||
└─────────────┴────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Пример в режиме `-gcm` и с `aad`:
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-192-gcm' AS mode, hex(encrypt(mode, input, key24, iv, 'AAD')) FROM encryption_test;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(encrypt('aes-192-gcm', input, key24, iv, 'AAD'))───────────────────┐
|
||||
│ aes-192-gcm │ 04C13E4B1D62481ED22B3644595CB5DB │
|
||||
│ aes-192-gcm │ 9A6CF0FD2B329B04EAD18301818F016DF8F77447 │
|
||||
│ aes-192-gcm │ B961E9FD9B940EBAD7ADDA75C9F198A40797A5EA1722D542890CC976E21113BBB8A7AA │
|
||||
└─────────────┴────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## aes_encrypt_mysql {#aes_encrypt_mysql}
|
||||
|
||||
Совместима с шифрованием myqsl, результат может быть расшифрован функцией [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt).
|
||||
|
||||
Функция поддерживает шифрофание данных следующими режимами:
|
||||
|
||||
- aes-128-ecb, aes-192-ecb, aes-256-ecb
|
||||
- aes-128-cbc, aes-192-cbc, aes-256-cbc
|
||||
- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1
|
||||
- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8
|
||||
- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128
|
||||
- aes-128-ofb, aes-192-ofb, aes-256-ofb
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
```sql
|
||||
aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv])
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `mode` — режим шифрования. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `plaintext` — текст, который будет зашифрован. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `key` — ключ шифрования. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — инициализирующий вектор. Необязателен. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Зашифрованная строка. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Примеры**
|
||||
|
||||
Создадим такую таблицу:
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE encryption_test
|
||||
(
|
||||
input String,
|
||||
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
|
||||
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
|
||||
key32 String DEFAULT substring(key, 1, 32),
|
||||
key24 String DEFAULT substring(key, 1, 24),
|
||||
key16 String DEFAULT substring(key, 1, 16)
|
||||
) Engine = Memory;
|
||||
```
|
||||
|
||||
Вставим эти данные:
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
|
||||
```
|
||||
|
||||
Пример без `iv`:
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-128-cbc' AS mode, hex(aes_encrypt_mysql(mode, input, key32)) FROM encryption_test;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(aes_encrypt_mysql('aes-128-cbc', input, key32))──────────────┐
|
||||
│ aes-128-cbc │ FEA8CFDE6EE2C6E7A2CC6ADDC9F62C83 │
|
||||
│ aes-128-cbc │ 78B16CD4BE107660156124C5FEE6454A │
|
||||
│ aes-128-cbc │ 67C0B119D96F18E2823968D42871B3D179221B1E7EE642D628341C2B29BA2E18 │
|
||||
└─────────────┴──────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Пример с `iv`:
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-256-cfb128' AS mode, hex(aes_encrypt_mysql(mode, input, key32, iv)) FROM encryption_test;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─mode───────────┬─hex(aes_encrypt_mysql('aes-256-cfb128', input, key32, iv))─┐
|
||||
│ aes-256-cfb128 │ │
|
||||
│ aes-256-cfb128 │ 7FB039F7 │
|
||||
│ aes-256-cfb128 │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2BB5174F │
|
||||
└────────────────┴────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## decrypt {#decrypt}
|
||||
|
||||
Функция поддерживает расшифровку данных следующими режимами:
|
||||
|
||||
- aes-128-ecb, aes-192-ecb, aes-256-ecb
|
||||
- aes-128-cbc, aes-192-cbc, aes-256-cbc
|
||||
- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1
|
||||
- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8
|
||||
- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128
|
||||
- aes-128-ofb, aes-192-ofb, aes-256-ofb
|
||||
- aes-128-gcm, aes-192-gcm, aes-256-gcm
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
```sql
|
||||
decrypt('mode', 'ciphertext', 'key' [, iv, aad])
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `mode` — режим шифрования. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `ciphertext` — зашифрованный текст, который будет расшифрован. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `key` — ключ шифрования. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — инициализирующий вектор. Обязателен для `-gcm` режимов, для остальных режимов опциональный. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `aad` — дополнительные аутентифицированные данные. Текст не будет расшифрован, если это значение неверно. Работает только с `-gcm` режимами. Для остальных вызовет исключение. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Расшифрованная строка. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Примеры**
|
||||
|
||||
Создадим такую таблицу:
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE encryption_test
|
||||
(
|
||||
input String,
|
||||
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
|
||||
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
|
||||
key32 String DEFAULT substring(key, 1, 32),
|
||||
key24 String DEFAULT substring(key, 1, 24),
|
||||
key16 String DEFAULT substring(key, 1, 16)
|
||||
) Engine = Memory;
|
||||
```
|
||||
|
||||
Вставим эти данные:
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
|
||||
SELECT 'aes-128-ecb' AS mode, decrypt(mode, encrypt(mode, input, key16), key16) FROM encryption_test;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
```text
|
||||
┌─mode────────┬─decrypt('aes-128-ecb', encrypt('aes-128-ecb', input, key16), key16)─┐
|
||||
│ aes-128-ecb │ │
|
||||
│ aes-128-ecb │ text │
|
||||
│ aes-128-ecb │ What Is ClickHouse? │
|
||||
└─────────────┴─────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## aes_decrypt_mysql {#aes_decrypt_mysql}
|
||||
|
||||
Совместима с шифрованием myqsl и может расшифровать данные, зашифрованные функцией [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt).
|
||||
|
||||
Функция поддерживает расшифровку данных следующими режимами:
|
||||
|
||||
- aes-128-ecb, aes-192-ecb, aes-256-ecb
|
||||
- aes-128-cbc, aes-192-cbc, aes-256-cbc
|
||||
- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1
|
||||
- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8
|
||||
- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128
|
||||
- aes-128-ofb, aes-192-ofb, aes-256-ofb
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
```sql
|
||||
aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `mode` — режим шифрования. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `ciphertext` — зашифрованный текст, который будет расшифрован. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `key` — ключ шифрования. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — инициализирующий вектор. Необязателен. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Расшифрованная строка. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Примеры**
|
||||
|
||||
Создадим такую таблицу:
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE encryption_test
|
||||
(
|
||||
input String,
|
||||
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
|
||||
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
|
||||
key32 String DEFAULT substring(key, 1, 32),
|
||||
key24 String DEFAULT substring(key, 1, 24),
|
||||
key16 String DEFAULT substring(key, 1, 16)
|
||||
) Engine = Memory;
|
||||
```
|
||||
|
||||
Вставим эти данные:
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-128-cbc' AS mode, aes_decrypt_mysql(mode, aes_encrypt_mysql(mode, input, key), key) FROM encryption_test;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─aes_decrypt_mysql('aes-128-cbc', aes_encrypt_mysql('aes-128-cbc', input, key), key)─┐
|
||||
│ aes-128-cbc │ │
|
||||
│ aes-128-cbc │ text │
|
||||
│ aes-128-cbc │ What Is ClickHouse? │
|
||||
└─────────────┴─────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/ru/sql-reference/functions/encryption_functions/) <!--hide-->
|
@ -306,3 +306,68 @@ execute_native_thread_routine
|
||||
start_thread
|
||||
clone
|
||||
```
|
||||
|
||||
## tid {#tid}
|
||||
|
||||
Возвращает id потока, в котором обрабатывается текущий [Block](https://clickhouse.tech/docs/ru/development/architecture/#block).
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
tid()
|
||||
```
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Id текущего потока. [Uint64](../../sql-reference/data-types/int-uint.md#uint-ranges).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT tid();
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─tid()─┐
|
||||
│ 3878 │
|
||||
└───────┘
|
||||
```
|
||||
## logTrace {#logtrace}
|
||||
|
||||
Выводит сообщение в лог сервера для каждого [Block](https://clickhouse.tech/docs/ru/development/architecture/#block).
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
logTrace('message')
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `message` — сообщение, которое отправляется в серверный лог. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Всегда возвращает 0.
|
||||
|
||||
**Example**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT logTrace('logTrace message');
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─logTrace('logTrace message')─┐
|
||||
│ 0 │
|
||||
└──────────────────────────────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/query_language/functions/introspection/) <!--hide-->
|
@ -16,7 +16,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster]
|
||||
PRIMARY KEY key1, key2
|
||||
SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN]))
|
||||
LAYOUT(LAYOUT_NAME([param_name param_value]))
|
||||
LIFETIME([MIN val1] MAX val2)
|
||||
LIFETIME({MIN min_val MAX max_val | max_val})
|
||||
```
|
||||
|
||||
Создаёт [внешний словарь](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) с заданной [структурой](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [источником](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [способом размещения в памяти](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) и [периодом обновления](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md).
|
||||
@ -27,5 +27,5 @@ LIFETIME([MIN val1] MAX val2)
|
||||
|
||||
Смотрите [Внешние словари](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md).
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/dictionary)
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/dictionary)
|
||||
<!--hide-->
|
@ -27,9 +27,9 @@ toc_title: FROM
|
||||
|
||||
### Недостатки {#drawbacks}
|
||||
|
||||
Запросы, которые используют `FINAL` выполняются не так быстро, как аналогичные запросы без него, потому что:
|
||||
Запросы, которые используют `FINAL` выполняются немного медленее, чем аналогичные запросы без него, потому что:
|
||||
|
||||
- Запрос выполняется в одном потоке, и данные мёржатся во время выполнения запроса.
|
||||
- Данные мёржатся во время выполнения запроса.
|
||||
- Запросы с модификатором `FINAL` читают столбцы первичного ключа в дополнение к столбцам, используемым в запросе.
|
||||
|
||||
**В большинстве случаев избегайте использования `FINAL`.** Общий подход заключается в использовании агрегирующих запросов, которые предполагают, что фоновые процессы движков семейства `MergeTree` ещё не случились (например, сами отбрасывают дубликаты). {## TODO: examples ##}
|
||||
|
@ -43,6 +43,153 @@ toc_title: GROUP BY
|
||||
|
||||
Если в `GROUP BY` передать несколько ключей, то в результате мы получим все комбинации выборки, как если бы `NULL` был конкретным значением.
|
||||
|
||||
## Модификатор WITH ROLLUP {#with-rollup-modifier}
|
||||
|
||||
Модификатор `WITH ROLLUP` применяется для подсчета подытогов для ключевых выражений. При этом учитывается порядок следования ключевых выражений в списке `GROUP BY`. Подытоги подсчитываются в обратном порядке: сначала для последнего ключевого выражения в списке, потом для предпоследнего и так далее вплоть до самого первого ключевого выражения.
|
||||
|
||||
Строки с подытогами добавляются в конец результирующей таблицы. В колонках, по которым строки уже сгруппированы, указывается значение `0` или пустая строка.
|
||||
|
||||
!!! note "Примечание"
|
||||
Если в запросе есть секция [HAVING](../../../sql-reference/statements/select/having.md), она может повлиять на результаты расчета подытогов.
|
||||
|
||||
**Пример**
|
||||
|
||||
Рассмотрим таблицу t:
|
||||
|
||||
```text
|
||||
┌─year─┬─month─┬─day─┐
|
||||
│ 2019 │ 1 │ 5 │
|
||||
│ 2019 │ 1 │ 15 │
|
||||
│ 2020 │ 1 │ 5 │
|
||||
│ 2020 │ 1 │ 15 │
|
||||
│ 2020 │ 10 │ 5 │
|
||||
│ 2020 │ 10 │ 15 │
|
||||
└──────┴───────┴─────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
```sql
|
||||
SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH ROLLUP;
|
||||
```
|
||||
|
||||
Поскольку секция `GROUP BY` содержит три ключевых выражения, результат состоит из четырех таблиц с подытогами, которые как бы "сворачиваются" справа налево:
|
||||
|
||||
- `GROUP BY year, month, day`;
|
||||
- `GROUP BY year, month` (а колонка `day` заполнена нулями);
|
||||
- `GROUP BY year` (теперь обе колонки `month, day` заполнены нулями);
|
||||
- и общий итог (все три колонки с ключевыми выражениями заполнены нулями).
|
||||
|
||||
```text
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2020 │ 10 │ 15 │ 1 │
|
||||
│ 2020 │ 1 │ 5 │ 1 │
|
||||
│ 2019 │ 1 │ 5 │ 1 │
|
||||
│ 2020 │ 1 │ 15 │ 1 │
|
||||
│ 2019 │ 1 │ 15 │ 1 │
|
||||
│ 2020 │ 10 │ 5 │ 1 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2019 │ 1 │ 0 │ 2 │
|
||||
│ 2020 │ 1 │ 0 │ 2 │
|
||||
│ 2020 │ 10 │ 0 │ 2 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2019 │ 0 │ 0 │ 2 │
|
||||
│ 2020 │ 0 │ 0 │ 4 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 0 │ 0 │ 6 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
```
|
||||
|
||||
## Модификатор WITH CUBE {#with-cube-modifier}
|
||||
|
||||
Модификатор `WITH CUBE` применятеся для расчета подытогов по всем комбинациям группировки ключевых выражений в списке `GROUP BY`.
|
||||
|
||||
Строки с подытогами добавляются в конец результирующей таблицы. В колонках, по которым выполняется группировка, указывается значение `0` или пустая строка.
|
||||
|
||||
!!! note "Примечание"
|
||||
Если в запросе есть секция [HAVING](../../../sql-reference/statements/select/having.md), она может повлиять на результаты расчета подытогов.
|
||||
|
||||
**Пример**
|
||||
|
||||
Рассмотрим таблицу t:
|
||||
|
||||
```text
|
||||
┌─year─┬─month─┬─day─┐
|
||||
│ 2019 │ 1 │ 5 │
|
||||
│ 2019 │ 1 │ 15 │
|
||||
│ 2020 │ 1 │ 5 │
|
||||
│ 2020 │ 1 │ 15 │
|
||||
│ 2020 │ 10 │ 5 │
|
||||
│ 2020 │ 10 │ 15 │
|
||||
└──────┴───────┴─────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH CUBE;
|
||||
```
|
||||
|
||||
Поскольку секция `GROUP BY` содержит три ключевых выражения, результат состоит из восьми таблиц с подытогами — по таблице для каждой комбинации ключевых выражений:
|
||||
|
||||
- `GROUP BY year, month, day`
|
||||
- `GROUP BY year, month`
|
||||
- `GROUP BY year, day`
|
||||
- `GROUP BY year`
|
||||
- `GROUP BY month, day`
|
||||
- `GROUP BY month`
|
||||
- `GROUP BY day`
|
||||
- и общий итог.
|
||||
|
||||
Колонки, которые не участвуют в `GROUP BY`, заполнены нулями.
|
||||
|
||||
```text
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2020 │ 10 │ 15 │ 1 │
|
||||
│ 2020 │ 1 │ 5 │ 1 │
|
||||
│ 2019 │ 1 │ 5 │ 1 │
|
||||
│ 2020 │ 1 │ 15 │ 1 │
|
||||
│ 2019 │ 1 │ 15 │ 1 │
|
||||
│ 2020 │ 10 │ 5 │ 1 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2019 │ 1 │ 0 │ 2 │
|
||||
│ 2020 │ 1 │ 0 │ 2 │
|
||||
│ 2020 │ 10 │ 0 │ 2 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2020 │ 0 │ 5 │ 2 │
|
||||
│ 2019 │ 0 │ 5 │ 1 │
|
||||
│ 2020 │ 0 │ 15 │ 2 │
|
||||
│ 2019 │ 0 │ 15 │ 1 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2019 │ 0 │ 0 │ 2 │
|
||||
│ 2020 │ 0 │ 0 │ 4 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 1 │ 5 │ 2 │
|
||||
│ 0 │ 10 │ 15 │ 1 │
|
||||
│ 0 │ 10 │ 5 │ 1 │
|
||||
│ 0 │ 1 │ 15 │ 2 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 1 │ 0 │ 4 │
|
||||
│ 0 │ 10 │ 0 │ 2 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 0 │ 5 │ 3 │
|
||||
│ 0 │ 0 │ 15 │ 3 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 0 │ 0 │ 6 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
```
|
||||
|
||||
|
||||
## Модификатор WITH TOTALS {#with-totals-modifier}
|
||||
|
||||
Если указан модификатор `WITH TOTALS`, то будет посчитана ещё одна строчка, в которой в столбцах-ключах будут содержаться значения по умолчанию (нули, пустые строки), а в столбцах агрегатных функций - значения, посчитанные по всем строкам («тотальные» значения).
|
||||
@ -86,8 +233,6 @@ SELECT
|
||||
FROM hits
|
||||
```
|
||||
|
||||
Но, в отличие от стандартного SQL, если в таблице нет строк (вообще нет или после фильтрации с помощью WHERE), в качестве результата возвращается пустой результат, а не результат из одной строки, содержащий «начальные» значения агрегатных функций.
|
||||
|
||||
В отличие от MySQL (и в соответствии со стандартом SQL), вы не можете получить какое-нибудь значение некоторого столбца, не входящего в ключ или агрегатную функцию (за исключением константных выражений). Для обхода этого вы можете воспользоваться агрегатной функцией any (получить первое попавшееся значение) или min/max.
|
||||
|
||||
Пример:
|
||||
@ -103,10 +248,6 @@ GROUP BY domain
|
||||
|
||||
GROUP BY вычисляет для каждого встретившегося различного значения ключей, набор значений агрегатных функций.
|
||||
|
||||
Не поддерживается GROUP BY по столбцам-массивам.
|
||||
|
||||
Не поддерживается указание констант в качестве аргументов агрегатных функций. Пример: `sum(1)`. Вместо этого, вы можете избавиться от констант. Пример: `count()`.
|
||||
|
||||
## Детали реализации {#implementation-details}
|
||||
|
||||
Агрегация является одной из наиболее важных возможностей столбцовых СУБД, и поэтому её реализация является одной из наиболее сильно оптимизированных частей ClickHouse. По умолчанию агрегирование выполняется в памяти с помощью хэш-таблицы. Она имеет более 40 специализаций, которые выбираются автоматически в зависимости от типов данных ключа группировки.
|
||||
|
@ -18,7 +18,7 @@ SELECT [DISTINCT] expr_list
|
||||
[GLOBAL] [ANY|ALL|ASOF] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER|SEMI|ANTI] JOIN (subquery)|table (ON <expr_list>)|(USING <column_list>)
|
||||
[PREWHERE expr]
|
||||
[WHERE expr]
|
||||
[GROUP BY expr_list] [WITH TOTALS]
|
||||
[GROUP BY expr_list] [WITH ROLLUP|WITH CUBE] [WITH TOTALS]
|
||||
[HAVING expr]
|
||||
[ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr]
|
||||
[LIMIT [offset_value, ]n BY columns]
|
||||
|
@ -291,7 +291,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster]
|
||||
PRIMARY KEY key1, key2
|
||||
SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN]))
|
||||
LAYOUT(LAYOUT_NAME([param_name param_value]))
|
||||
LIFETIME([MIN val1] MAX val2)
|
||||
LIFETIME({MIN min_val MAX max_val | max_val})
|
||||
```
|
||||
|
||||
Oluşturuyor [dış sözlük](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) verilen ile [yapılı](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [kaynaklı](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [düzen](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) ve [ömür](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md).
|
||||
|
@ -21,15 +21,15 @@ toc_title: "\u266A\u64CD\u573A\u266A"
|
||||
|
||||
ClickHouse体验还有如下:
|
||||
[ClickHouse管理服务](https://cloud.yandex.com/services/managed-clickhouse)
|
||||
实例托管 [Yandex云](https://cloud.yandex.com/).
|
||||
更多信息 [云提供商](../commercial/cloud.md).
|
||||
实例托管 [Yandex云](https://cloud.yandex.com/)。
|
||||
更多信息 [云提供商](../commercial/cloud.md)。
|
||||
|
||||
ClickHouse体验平台界面实际上是通过ClickHouse [HTTP API](../interfaces/http.md)接口实现的.
|
||||
体验平台后端只是一个ClickHouse集群,没有任何额外的服务器端应用程序。
|
||||
体验平台也同样提供了ClickHouse HTTPS服务端口。
|
||||
|
||||
您可以使用任何HTTP客户端向体验平台进行查询,例如 [curl](https://curl.haxx.se) 或 [wget](https://www.gnu.org/software/wget/),或使用以下方式建立连接 [JDBC](../interfaces/jdbc.md) 或 [ODBC](../interfaces/odbc.md) 司机
|
||||
有关支持ClickHouse的软件产品的更多信息,请访问 [这里](../interfaces/index.md).
|
||||
您可以使用任何HTTP客户端向体验平台进行查询,例如 [curl](https://curl.haxx.se) 或 [wget](https://www.gnu.org/software/wget/),或使用以下方式建立连接 [JDBC](../interfaces/jdbc.md) 或 [ODBC](../interfaces/odbc.md) 驱动。
|
||||
有关支持ClickHouse的软件产品的更多信息,请访问 [这里](../interfaces/index.md)。
|
||||
|
||||
| 参数 | 值 |
|
||||
|:---------|:--------------------------------------|
|
||||
|
@ -33,10 +33,10 @@ ClickHouse 收集的指标项:
|
||||
- 服务用于计算的资源占用的各种指标。
|
||||
- 关于查询处理的常见统计信息。
|
||||
|
||||
可以在 [系统指标](system-tables/metrics.md#system_tables-metrics) ,[系统事件](system-tables/events.md#system_tables-events) 以及[系统异步指标](system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) 等系统表查看所有的指标项。
|
||||
可以在[系统指标](system-tables/metrics.md#system_tables-metrics),[系统事件](system-tables/events.md#system_tables-events)以及[系统异步指标](system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics)等系统表查看所有的指标项。
|
||||
|
||||
可以配置ClickHouse 往 [石墨](https://github.com/graphite-project)导入指标。 参考 [石墨部分](server-configuration-parameters/settings.md#server_configuration_parameters-graphite) 配置文件。在配置指标导出之前,需要参考Graphite[官方教程](https://graphite.readthedocs.io/en/latest/install.html)搭建服务。
|
||||
可以配置ClickHouse向[Graphite](https://github.com/graphite-project)推送监控信息并导入指标。参考[Graphite监控](server-configuration-parameters/settings.md#server_configuration_parameters-graphite)配置文件。在配置指标导出之前,需要参考[Graphite官方教程](https://graphite.readthedocs.io/en/latest/install.html)搭建Graphite服务。
|
||||
|
||||
此外,您可以通过HTTP API监视服务器可用性。 将HTTP GET请求发送到 `/ping`。 如果服务器可用,它将以 `200 OK` 响应。
|
||||
此外,您可以通过HTTP API监视服务器可用性。将HTTP GET请求发送到`/ping`。如果服务器可用,它将以 `200 OK` 响应。
|
||||
|
||||
要监视服务器集群的配置,应设置[max_replica_delay_for_distributed_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries)参数并使用HTTP资源`/replicas_status`。 如果副本可用,并且不延迟在其他副本之后,则对`/replicas_status`的请求将返回200 OK。 如果副本滞后,请求将返回 `503 HTTP_SERVICE_UNAVAILABLE`,包括有关待办事项大小的信息。
|
||||
要监视服务器集群的配置,应设置[max_replica_delay_for_distributed_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries)参数并使用HTTP资源`/replicas_status`。 如果副本可用,并且不延迟在其他副本之后,则对`/replicas_status`的请求将返回`200 OK`。 如果副本滞后,请求将返回`503 HTTP_SERVICE_UNAVAILABLE`,包括有关待办事项大小的信息。
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
我们使用RoaringBitmap实际存储位图对象,当基数小于或等于32时,它使用Set保存。当基数大于32时,它使用RoaringBitmap保存。这也是为什么低基数集的存储更快的原因。
|
||||
|
||||
有关RoaringBitmap的更多信息,请参阅:[呻吟声](https://github.com/RoaringBitmap/CRoaring)。
|
||||
有关RoaringBitmap的更多信息,请参阅:[RoaringBitmap](https://github.com/RoaringBitmap/CRoaring)。
|
||||
|
||||
## bitmapBuild {#bitmapbuild}
|
||||
|
||||
|
@ -259,5 +259,5 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster]
|
||||
PRIMARY KEY key1, key2
|
||||
SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN]))
|
||||
LAYOUT(LAYOUT_NAME([param_name param_value]))
|
||||
LIFETIME([MIN val1] MAX val2)
|
||||
LIFETIME({MIN min_val MAX max_val | max_val})
|
||||
```
|
||||
|
1
programs/server/config.d/logging_no_rotate.xml
Symbolic link
1
programs/server/config.d/logging_no_rotate.xml
Symbolic link
@ -0,0 +1 @@
|
||||
../../../tests/config/config.d/logging_no_rotate.xml
|
@ -11,6 +11,9 @@
|
||||
<level>trace</level>
|
||||
<log>/var/log/clickhouse-server/clickhouse-server.log</log>
|
||||
<errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>
|
||||
<!-- Rotation policy
|
||||
See https://github.com/pocoproject/poco/blob/poco-1.9.4-release/Foundation/include/Poco/FileChannel.h#L54-L85
|
||||
-->
|
||||
<size>1000M</size>
|
||||
<count>10</count>
|
||||
<!-- <console>1</console> --> <!-- Default behavior is autodetection (log to console if not daemon mode and is tty) -->
|
||||
|
@ -1,6 +1,7 @@
|
||||
<html> <!-- TODO If I write DOCTYPE HTML something changes but I don't know what. -->
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<link rel="icon" href="">
|
||||
<title>ClickHouse Query</title>
|
||||
|
||||
<!-- Code Style:
|
||||
@ -21,26 +22,11 @@
|
||||
|
||||
<!-- Development Roadmap:
|
||||
|
||||
1. Add indication that the query was sent and when the query has been finished.
|
||||
Do not use any animated spinners. Just a text or check mark.
|
||||
Eliminate race conditions (results from the previous query should be ignored on arrival, the previous request should be cancelled).
|
||||
|
||||
2. Support readonly servers.
|
||||
1. Support readonly servers.
|
||||
Check if readonly = 1 (with SELECT FROM system.settings) to avoid sending settings. It can be done once on address/credentials change.
|
||||
It can be done in background, e.g. wait 100 ms after address/credentials change and do the check.
|
||||
Also it can provide visual indication that credentials are correct.
|
||||
|
||||
3. Add history in localstorage. Integrate with history API.
|
||||
There can be a counter in localstorage, that will be appended to location #fragment.
|
||||
The 'back', 'forward' buttons in browser should work.
|
||||
Also there should be UI element to list all the queries from history and select from the list.
|
||||
|
||||
4. Trivial sharing capabilities.
|
||||
Sharing is only possible when system.query_log is accessible. Read the X-ClickHouse-QueryId from the response.
|
||||
Share button will: - emit SYSTEM FLUSH LOGS if not readonly; - find the query in the query_log;
|
||||
- generate an URL with the query id and: server address if not equal to the URL's host; user name if not default;
|
||||
indication that password should be entered in case of non-empty password.
|
||||
|
||||
-->
|
||||
|
||||
<style type="text/css">
|
||||
@ -273,6 +259,22 @@
|
||||
{
|
||||
color: var(--null-color);
|
||||
}
|
||||
|
||||
#hourglass
|
||||
{
|
||||
display: none;
|
||||
padding-left: 1rem;
|
||||
font-size: 110%;
|
||||
color: #888;
|
||||
}
|
||||
|
||||
#check-mark
|
||||
{
|
||||
display: none;
|
||||
padding-left: 1rem;
|
||||
font-size: 110%;
|
||||
color: #080;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
|
||||
@ -286,6 +288,8 @@
|
||||
<div id="run_div">
|
||||
<button class="shadow" id="run">Run</button>
|
||||
<span class="hint"> (Ctrl+Enter)</span>
|
||||
<span id="hourglass">⧗</span>
|
||||
<span id="check-mark">✔</span>
|
||||
<span id="stats"></span>
|
||||
<span id="toggle-dark">🌑</span><span id="toggle-light">🌞</span>
|
||||
</div>
|
||||
@ -299,50 +303,117 @@
|
||||
|
||||
<script type="text/javascript">
|
||||
|
||||
/// Incremental request number. When response is received,
|
||||
/// if it's request number does not equal to the current request number, response will be ignored.
|
||||
/// This is to avoid race conditions.
|
||||
var request_num = 0;
|
||||
|
||||
/// Save query in history only if it is different.
|
||||
var previous_query = '';
|
||||
|
||||
/// Substitute the address of the server where the page is served.
|
||||
if (location.protocol != 'file:') {
|
||||
document.getElementById('url').value = location.origin;
|
||||
}
|
||||
|
||||
function post()
|
||||
/// Substitute user name if it's specified in the query string
|
||||
var user_from_url = (new URL(window.location)).searchParams.get('user');
|
||||
if (user_from_url) {
|
||||
document.getElementById('user').value = user_from_url;
|
||||
}
|
||||
|
||||
function postImpl(posted_request_num, query)
|
||||
{
|
||||
/// TODO: Avoid race condition on subsequent requests when responses may come out of order.
|
||||
/// TODO: Check if URL already contains query string (append parameters).
|
||||
|
||||
var user = document.getElementById('user').value;
|
||||
var password = document.getElementById('password').value;
|
||||
|
||||
var url = document.getElementById('url').value +
|
||||
/// Ask server to allow cross-domain requests.
|
||||
'?add_http_cors_header=1' +
|
||||
'&user=' + encodeURIComponent(document.getElementById('user').value) +
|
||||
'&password=' + encodeURIComponent(document.getElementById('password').value) +
|
||||
'&user=' + encodeURIComponent(user) +
|
||||
'&password=' + encodeURIComponent(password) +
|
||||
'&default_format=JSONCompact' +
|
||||
/// Safety settings to prevent results that browser cannot display.
|
||||
'&max_result_rows=1000&max_result_bytes=10000000&result_overflow_mode=break';
|
||||
|
||||
var query = document.getElementById('query').value;
|
||||
var xhr = new XMLHttpRequest;
|
||||
|
||||
xhr.open('POST', url, true);
|
||||
xhr.send(query);
|
||||
|
||||
xhr.onreadystatechange = function()
|
||||
{
|
||||
if (this.readyState === XMLHttpRequest.DONE) {
|
||||
if (this.status === 200) {
|
||||
var json;
|
||||
try { json = JSON.parse(this.response); } catch (e) {}
|
||||
if (json !== undefined && json.statistics !== undefined) {
|
||||
renderResult(json);
|
||||
} else {
|
||||
renderUnparsedResult(this.response);
|
||||
}
|
||||
} else {
|
||||
/// TODO: Proper rendering of network errors.
|
||||
renderError(this.response);
|
||||
if (posted_request_num != request_num) {
|
||||
return;
|
||||
} else if (this.readyState === XMLHttpRequest.DONE) {
|
||||
renderResponse(this.status, this.response);
|
||||
|
||||
/// The query is saved in browser history (in state JSON object)
|
||||
/// as well as in URL fragment identifier.
|
||||
if (query != previous_query) {
|
||||
previous_query = query;
|
||||
var title = "ClickHouse Query: " + query;
|
||||
history.pushState(
|
||||
{
|
||||
query: query,
|
||||
status: this.status,
|
||||
response: this.response.length > 100000 ? null : this.response /// Lower than the browser's limit.
|
||||
},
|
||||
title,
|
||||
window.location.pathname + '?user=' + encodeURIComponent(user) + '#' + window.btoa(query));
|
||||
document.title = title;
|
||||
}
|
||||
} else {
|
||||
//console.log(this);
|
||||
}
|
||||
}
|
||||
|
||||
document.getElementById('check-mark').style.display = 'none';
|
||||
document.getElementById('hourglass').style.display = 'inline';
|
||||
|
||||
xhr.send(query);
|
||||
}
|
||||
|
||||
function renderResponse(status, response) {
|
||||
document.getElementById('hourglass').style.display = 'none';
|
||||
|
||||
if (status === 200) {
|
||||
var json;
|
||||
try { json = JSON.parse(response); } catch (e) {}
|
||||
if (json !== undefined && json.statistics !== undefined) {
|
||||
renderResult(json);
|
||||
} else {
|
||||
renderUnparsedResult(response);
|
||||
}
|
||||
document.getElementById('check-mark').style.display = 'inline';
|
||||
} else {
|
||||
/// TODO: Proper rendering of network errors.
|
||||
renderError(response);
|
||||
}
|
||||
}
|
||||
|
||||
window.onpopstate = function(event) {
|
||||
if (!event.state) {
|
||||
return;
|
||||
}
|
||||
document.getElementById('query').value = event.state.query;
|
||||
if (!event.state.response) {
|
||||
clear();
|
||||
return;
|
||||
}
|
||||
renderResponse(event.state.status, event.state.response);
|
||||
};
|
||||
|
||||
if (window.location.hash) {
|
||||
document.getElementById('query').value = window.atob(window.location.hash.substr(1));
|
||||
}
|
||||
|
||||
function post()
|
||||
{
|
||||
++request_num;
|
||||
var query = document.getElementById('query').value;
|
||||
postImpl(request_num, query);
|
||||
}
|
||||
|
||||
document.getElementById('run').onclick = function()
|
||||
@ -350,7 +421,7 @@
|
||||
post();
|
||||
}
|
||||
|
||||
document.getElementById('query').onkeypress = function(event)
|
||||
document.onkeypress = function(event)
|
||||
{
|
||||
/// Firefox has code 13 for Enter and Chromium has code 10.
|
||||
if (event.ctrlKey && (event.charCode == 13 || event.charCode == 10)) {
|
||||
@ -372,6 +443,9 @@
|
||||
document.getElementById('error').style.display = 'none';
|
||||
|
||||
document.getElementById('stats').innerText = '';
|
||||
|
||||
document.getElementById('hourglass').style.display = 'none';
|
||||
document.getElementById('check-mark').style.display = 'none';
|
||||
}
|
||||
|
||||
function renderResult(response)
|
||||
@ -443,7 +517,7 @@
|
||||
function renderError(response)
|
||||
{
|
||||
clear();
|
||||
document.getElementById('error').innerText = response;
|
||||
document.getElementById('error').innerText = response ? response : "No response.";
|
||||
document.getElementById('error').style.display = 'block';
|
||||
}
|
||||
|
||||
|
@ -1,3 +1,5 @@
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
PROGRAM(clickhouse-server)
|
||||
|
||||
PEERDIR(
|
||||
|
@ -1,3 +1,5 @@
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
PROGRAM(clickhouse)
|
||||
|
||||
CFLAGS(
|
||||
|
@ -1,4 +1,6 @@
|
||||
# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it.
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
PEERDIR(
|
||||
|
@ -1,3 +1,5 @@
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
PEERDIR(
|
||||
|
@ -80,7 +80,7 @@ public:
|
||||
}
|
||||
|
||||
if (!isUnsignedInteger(arguments[1]))
|
||||
throw Exception("Second argument of aggregate function " + getName() + " must be integer.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
throw Exception("Second argument of aggregate function " + getName() + " must be unsigned integer.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
if (default_value.isNull())
|
||||
default_value = type->getDefault();
|
||||
|
@ -143,7 +143,7 @@ void LinearModelData::updateState()
|
||||
|
||||
void LinearModelData::predict(
|
||||
ColumnVector<Float64>::Container & container,
|
||||
ColumnsWithTypeAndName & arguments,
|
||||
const ColumnsWithTypeAndName & arguments,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
const Context & context) const
|
||||
@ -264,8 +264,8 @@ void Adam::merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac)
|
||||
average_gradient[i] = average_gradient[i] * frac + adam_rhs.average_gradient[i] * rhs_frac;
|
||||
average_squared_gradient[i] = average_squared_gradient[i] * frac + adam_rhs.average_squared_gradient[i] * rhs_frac;
|
||||
}
|
||||
beta1_powered_ *= adam_rhs.beta1_powered_;
|
||||
beta2_powered_ *= adam_rhs.beta2_powered_;
|
||||
beta1_powered *= adam_rhs.beta1_powered;
|
||||
beta2_powered *= adam_rhs.beta2_powered;
|
||||
}
|
||||
|
||||
void Adam::update(UInt64 batch_size, std::vector<Float64> & weights, Float64 & bias, Float64 learning_rate, const std::vector<Float64> & batch_gradient)
|
||||
@ -282,21 +282,21 @@ void Adam::update(UInt64 batch_size, std::vector<Float64> & weights, Float64 & b
|
||||
for (size_t i = 0; i != average_gradient.size(); ++i)
|
||||
{
|
||||
Float64 normed_gradient = batch_gradient[i] / batch_size;
|
||||
average_gradient[i] = beta1_ * average_gradient[i] + (1 - beta1_) * normed_gradient;
|
||||
average_squared_gradient[i] = beta2_ * average_squared_gradient[i] +
|
||||
(1 - beta2_) * normed_gradient * normed_gradient;
|
||||
average_gradient[i] = beta1 * average_gradient[i] + (1 - beta1) * normed_gradient;
|
||||
average_squared_gradient[i] = beta2 * average_squared_gradient[i] +
|
||||
(1 - beta2) * normed_gradient * normed_gradient;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < weights.size(); ++i)
|
||||
{
|
||||
weights[i] += (learning_rate * average_gradient[i]) /
|
||||
((1 - beta1_powered_) * (sqrt(average_squared_gradient[i] / (1 - beta2_powered_)) + eps_));
|
||||
((1 - beta1_powered) * (sqrt(average_squared_gradient[i] / (1 - beta2_powered)) + eps));
|
||||
}
|
||||
bias += (learning_rate * average_gradient[weights.size()]) /
|
||||
((1 - beta1_powered_) * (sqrt(average_squared_gradient[weights.size()] / (1 - beta2_powered_)) + eps_));
|
||||
((1 - beta1_powered) * (sqrt(average_squared_gradient[weights.size()] / (1 - beta2_powered)) + eps));
|
||||
|
||||
beta1_powered_ *= beta1_;
|
||||
beta2_powered_ *= beta2_;
|
||||
beta1_powered *= beta1;
|
||||
beta2_powered *= beta2;
|
||||
}
|
||||
|
||||
void Adam::addToBatch(
|
||||
@ -348,7 +348,7 @@ void Nesterov::update(UInt64 batch_size, std::vector<Float64> & weights, Float64
|
||||
|
||||
for (size_t i = 0; i < batch_gradient.size(); ++i)
|
||||
{
|
||||
accumulated_gradient[i] = accumulated_gradient[i] * alpha_ + (learning_rate * batch_gradient[i]) / batch_size;
|
||||
accumulated_gradient[i] = accumulated_gradient[i] * alpha + (learning_rate * batch_gradient[i]) / batch_size;
|
||||
}
|
||||
for (size_t i = 0; i < weights.size(); ++i)
|
||||
{
|
||||
@ -375,9 +375,9 @@ void Nesterov::addToBatch(
|
||||
std::vector<Float64> shifted_weights(weights.size());
|
||||
for (size_t i = 0; i != shifted_weights.size(); ++i)
|
||||
{
|
||||
shifted_weights[i] = weights[i] + accumulated_gradient[i] * alpha_;
|
||||
shifted_weights[i] = weights[i] + accumulated_gradient[i] * alpha;
|
||||
}
|
||||
auto shifted_bias = bias + accumulated_gradient[weights.size()] * alpha_;
|
||||
auto shifted_bias = bias + accumulated_gradient[weights.size()] * alpha;
|
||||
|
||||
gradient_computer.compute(batch_gradient, shifted_weights, shifted_bias, l2_reg_coef, target, columns, row_num);
|
||||
}
|
||||
@ -411,7 +411,7 @@ void Momentum::update(UInt64 batch_size, std::vector<Float64> & weights, Float64
|
||||
|
||||
for (size_t i = 0; i < batch_gradient.size(); ++i)
|
||||
{
|
||||
accumulated_gradient[i] = accumulated_gradient[i] * alpha_ + (learning_rate * batch_gradient[i]) / batch_size;
|
||||
accumulated_gradient[i] = accumulated_gradient[i] * alpha + (learning_rate * batch_gradient[i]) / batch_size;
|
||||
}
|
||||
for (size_t i = 0; i < weights.size(); ++i)
|
||||
{
|
||||
@ -448,7 +448,7 @@ void IWeightsUpdater::addToBatch(
|
||||
|
||||
void LogisticRegression::predict(
|
||||
ColumnVector<Float64>::Container & container,
|
||||
ColumnsWithTypeAndName & arguments,
|
||||
const ColumnsWithTypeAndName & arguments,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
const std::vector<Float64> & weights,
|
||||
@ -516,7 +516,7 @@ void LogisticRegression::compute(
|
||||
|
||||
void LinearRegression::predict(
|
||||
ColumnVector<Float64>::Container & container,
|
||||
ColumnsWithTypeAndName & arguments,
|
||||
const ColumnsWithTypeAndName & arguments,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
const std::vector<Float64> & weights,
|
||||
|
@ -23,7 +23,7 @@ GradientComputer class computes gradient according to its loss function
|
||||
class IGradientComputer
|
||||
{
|
||||
public:
|
||||
IGradientComputer() {}
|
||||
IGradientComputer() = default;
|
||||
|
||||
virtual ~IGradientComputer() = default;
|
||||
|
||||
@ -39,7 +39,7 @@ public:
|
||||
|
||||
virtual void predict(
|
||||
ColumnVector<Float64>::Container & container,
|
||||
ColumnsWithTypeAndName & arguments,
|
||||
const ColumnsWithTypeAndName & arguments,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
const std::vector<Float64> & weights,
|
||||
@ -51,7 +51,7 @@ public:
|
||||
class LinearRegression : public IGradientComputer
|
||||
{
|
||||
public:
|
||||
LinearRegression() {}
|
||||
LinearRegression() = default;
|
||||
|
||||
void compute(
|
||||
std::vector<Float64> & batch_gradient,
|
||||
@ -64,7 +64,7 @@ public:
|
||||
|
||||
void predict(
|
||||
ColumnVector<Float64>::Container & container,
|
||||
ColumnsWithTypeAndName & arguments,
|
||||
const ColumnsWithTypeAndName & arguments,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
const std::vector<Float64> & weights,
|
||||
@ -76,7 +76,7 @@ public:
|
||||
class LogisticRegression : public IGradientComputer
|
||||
{
|
||||
public:
|
||||
LogisticRegression() {}
|
||||
LogisticRegression() = default;
|
||||
|
||||
void compute(
|
||||
std::vector<Float64> & batch_gradient,
|
||||
@ -89,7 +89,7 @@ public:
|
||||
|
||||
void predict(
|
||||
ColumnVector<Float64>::Container & container,
|
||||
ColumnsWithTypeAndName & arguments,
|
||||
const ColumnsWithTypeAndName & arguments,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
const std::vector<Float64> & weights,
|
||||
@ -147,9 +147,9 @@ public:
|
||||
class Momentum : public IWeightsUpdater
|
||||
{
|
||||
public:
|
||||
Momentum() {}
|
||||
Momentum() = default;
|
||||
|
||||
Momentum(Float64 alpha) : alpha_(alpha) {}
|
||||
explicit Momentum(Float64 alpha_) : alpha(alpha_) {}
|
||||
|
||||
void update(UInt64 batch_size, std::vector<Float64> & weights, Float64 & bias, Float64 learning_rate, const std::vector<Float64> & batch_gradient) override;
|
||||
|
||||
@ -160,7 +160,7 @@ public:
|
||||
void read(ReadBuffer & buf) override;
|
||||
|
||||
private:
|
||||
Float64 alpha_{0.1};
|
||||
Float64 alpha{0.1};
|
||||
std::vector<Float64> accumulated_gradient;
|
||||
};
|
||||
|
||||
@ -168,9 +168,9 @@ private:
|
||||
class Nesterov : public IWeightsUpdater
|
||||
{
|
||||
public:
|
||||
Nesterov() {}
|
||||
Nesterov() = default;
|
||||
|
||||
Nesterov(Float64 alpha) : alpha_(alpha) {}
|
||||
explicit Nesterov(Float64 alpha_) : alpha(alpha_) {}
|
||||
|
||||
void addToBatch(
|
||||
std::vector<Float64> & batch_gradient,
|
||||
@ -191,7 +191,7 @@ public:
|
||||
void read(ReadBuffer & buf) override;
|
||||
|
||||
private:
|
||||
const Float64 alpha_ = 0.9;
|
||||
const Float64 alpha = 0.9;
|
||||
std::vector<Float64> accumulated_gradient;
|
||||
};
|
||||
|
||||
@ -201,8 +201,8 @@ class Adam : public IWeightsUpdater
|
||||
public:
|
||||
Adam()
|
||||
{
|
||||
beta1_powered_ = beta1_;
|
||||
beta2_powered_ = beta2_;
|
||||
beta1_powered = beta1;
|
||||
beta2_powered = beta2;
|
||||
}
|
||||
|
||||
void addToBatch(
|
||||
@ -225,11 +225,11 @@ public:
|
||||
|
||||
private:
|
||||
/// beta1 and beta2 hyperparameters have such recommended values
|
||||
const Float64 beta1_ = 0.9;
|
||||
const Float64 beta2_ = 0.999;
|
||||
const Float64 eps_ = 0.000001;
|
||||
Float64 beta1_powered_;
|
||||
Float64 beta2_powered_;
|
||||
const Float64 beta1 = 0.9;
|
||||
const Float64 beta2 = 0.999;
|
||||
const Float64 eps = 0.000001;
|
||||
Float64 beta1_powered;
|
||||
Float64 beta2_powered;
|
||||
|
||||
std::vector<Float64> average_gradient;
|
||||
std::vector<Float64> average_squared_gradient;
|
||||
@ -241,7 +241,7 @@ private:
|
||||
class LinearModelData
|
||||
{
|
||||
public:
|
||||
LinearModelData() {}
|
||||
LinearModelData() = default;
|
||||
|
||||
LinearModelData(
|
||||
Float64 learning_rate_,
|
||||
@ -261,7 +261,7 @@ public:
|
||||
|
||||
void predict(
|
||||
ColumnVector<Float64>::Container & container,
|
||||
ColumnsWithTypeAndName & arguments,
|
||||
const ColumnsWithTypeAndName & arguments,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
const Context & context) const;
|
||||
@ -360,7 +360,7 @@ public:
|
||||
void predictValues(
|
||||
ConstAggregateDataPtr place,
|
||||
IColumn & to,
|
||||
ColumnsWithTypeAndName & arguments,
|
||||
const ColumnsWithTypeAndName & arguments,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
const Context & context) const override
|
||||
|
@ -61,7 +61,7 @@ public:
|
||||
throw Exception("Prediction is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
virtual ~IAggregateFunction() {}
|
||||
virtual ~IAggregateFunction() = default;
|
||||
|
||||
/** Data manipulating functions. */
|
||||
|
||||
@ -114,7 +114,7 @@ public:
|
||||
virtual void predictValues(
|
||||
ConstAggregateDataPtr /* place */,
|
||||
IColumn & /*to*/,
|
||||
ColumnsWithTypeAndName & /*arguments*/,
|
||||
const ColumnsWithTypeAndName & /*arguments*/,
|
||||
size_t /*offset*/,
|
||||
size_t /*limit*/,
|
||||
const Context & /*context*/) const
|
||||
|
@ -14,6 +14,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int TOO_LARGE_ARRAY_SIZE;
|
||||
extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
|
||||
}
|
||||
|
||||
|
||||
@ -36,10 +37,11 @@ namespace ErrorCodes
|
||||
* uses asin, which slows down the algorithm a bit.
|
||||
*/
|
||||
template <typename T>
|
||||
class TDigest
|
||||
class QuantileTDigest
|
||||
{
|
||||
using Value = Float32;
|
||||
using Count = Float32;
|
||||
using BetterFloat = Float64; // For intermediate results and sum(Count). Must have better precision, than Count
|
||||
|
||||
/** The centroid stores the weight of points around their mean value
|
||||
*/
|
||||
@ -55,13 +57,6 @@ class TDigest
|
||||
, count(count_)
|
||||
{}
|
||||
|
||||
Centroid & operator+=(const Centroid & other)
|
||||
{
|
||||
count += other.count;
|
||||
mean += other.count * (other.mean - mean) / count;
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool operator<(const Centroid & other) const
|
||||
{
|
||||
return mean < other.mean;
|
||||
@ -71,26 +66,42 @@ class TDigest
|
||||
|
||||
/** :param epsilon: value \delta from the article - error in the range
|
||||
* quantile 0.5 (default is 0.01, i.e. 1%)
|
||||
* if you change epsilon, you must also change max_centroids
|
||||
* :param max_centroids: depends on epsilon, the better accuracy, the more centroids you need
|
||||
* to describe data with this accuracy. Read article before changing.
|
||||
* :param max_unmerged: when accumulating count of new points beyond this
|
||||
* value centroid compression is triggered
|
||||
* (default is 2048, the higher the value - the
|
||||
* more memory is required, but amortization of execution time increases)
|
||||
* Change freely anytime.
|
||||
*/
|
||||
struct Params
|
||||
{
|
||||
Value epsilon = 0.01;
|
||||
size_t max_centroids = 2048;
|
||||
size_t max_unmerged = 2048;
|
||||
};
|
||||
/** max_centroids_deserialize should be >= all max_centroids ever used in production.
|
||||
* This is security parameter, preventing allocation of too much centroids in deserialize, so can be relatively large.
|
||||
*/
|
||||
static constexpr size_t max_centroids_deserialize = 65536;
|
||||
|
||||
Params params;
|
||||
static constexpr Params params{};
|
||||
|
||||
/// The memory will be allocated to several elements at once, so that the state occupies 64 bytes.
|
||||
static constexpr size_t bytes_in_arena = 128 - sizeof(PODArray<Centroid>) - sizeof(Count) - sizeof(UInt32);
|
||||
static constexpr size_t bytes_in_arena = 128 - sizeof(PODArray<Centroid>) - sizeof(BetterFloat) - sizeof(size_t); // If alignment is imperfect, sizeof(TDigest) will be more than naively expected
|
||||
using Centroids = PODArrayWithStackMemory<Centroid, bytes_in_arena>;
|
||||
|
||||
Centroids centroids;
|
||||
Count count = 0;
|
||||
UInt32 unmerged = 0;
|
||||
BetterFloat count = 0;
|
||||
size_t unmerged = 0;
|
||||
|
||||
/** Linear interpolation at the point x on the line (x1, y1)..(x2, y2)
|
||||
*/
|
||||
static Value interpolate(Value x, Value x1, Value y1, Value x2, Value y2)
|
||||
{
|
||||
double k = (x - x1) / (x2 - x1);
|
||||
return y1 + k * (y2 - y1);
|
||||
}
|
||||
|
||||
struct RadixSortTraits
|
||||
{
|
||||
@ -111,15 +122,56 @@ class TDigest
|
||||
};
|
||||
|
||||
/** Adds a centroid `c` to the digest
|
||||
* centroid must be valid, validity is checked in add(), deserialize() and is maintained by compress()
|
||||
*/
|
||||
void addCentroid(const Centroid & c)
|
||||
{
|
||||
centroids.push_back(c);
|
||||
count += c.count;
|
||||
++unmerged;
|
||||
if (unmerged >= params.max_unmerged)
|
||||
if (unmerged > params.max_unmerged)
|
||||
compress();
|
||||
}
|
||||
void compressBrute()
|
||||
{
|
||||
if (centroids.size() <= params.max_centroids)
|
||||
return;
|
||||
const size_t batch_size = (centroids.size() + params.max_centroids - 1) / params.max_centroids; // at least 2
|
||||
|
||||
auto l = centroids.begin();
|
||||
auto r = std::next(l);
|
||||
BetterFloat sum = 0;
|
||||
BetterFloat l_mean = l->mean; // We have high-precision temporaries for numeric stability
|
||||
BetterFloat l_count = l->count;
|
||||
size_t batch_pos = 0;
|
||||
for (;r != centroids.end(); ++r)
|
||||
{
|
||||
if (batch_pos < batch_size - 1)
|
||||
{
|
||||
/// The left column "eats" the right. Middle of the batch
|
||||
l_count += r->count;
|
||||
l_mean += r->count * (r->mean - l_mean) / l_count; // Symmetric algo (M1*C1 + M2*C2)/(C1+C2) is numerically better, but slower
|
||||
l->mean = l_mean;
|
||||
l->count = l_count;
|
||||
batch_pos += 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
// End of the batch, start the next one
|
||||
sum += l->count; // Not l_count, otherwise actual sum of elements will be different
|
||||
++l;
|
||||
|
||||
/// We skip all the values "eaten" earlier.
|
||||
*l = *r;
|
||||
l_mean = l->mean;
|
||||
l_count = l->count;
|
||||
batch_pos = 0;
|
||||
}
|
||||
}
|
||||
count = sum + l_count; // Update count, it might be different due to += inaccuracy
|
||||
centroids.resize(l - centroids.begin() + 1);
|
||||
// Here centroids.size() <= params.max_centroids
|
||||
}
|
||||
|
||||
public:
|
||||
/** Performs compression of accumulated centroids
|
||||
@ -128,74 +180,92 @@ public:
|
||||
*/
|
||||
void compress()
|
||||
{
|
||||
if (unmerged > 0)
|
||||
if (unmerged > 0 || centroids.size() > params.max_centroids)
|
||||
{
|
||||
// unmerged > 0 implies centroids.size() > 0, hence *l is valid below
|
||||
RadixSort<RadixSortTraits>::executeLSD(centroids.data(), centroids.size());
|
||||
|
||||
if (centroids.size() > 3)
|
||||
/// A pair of consecutive bars of the histogram.
|
||||
auto l = centroids.begin();
|
||||
auto r = std::next(l);
|
||||
|
||||
const BetterFloat count_epsilon_4 = count * params.epsilon * 4; // Compiler is unable to do this optimization
|
||||
BetterFloat sum = 0;
|
||||
BetterFloat l_mean = l->mean; // We have high-precision temporaries for numeric stability
|
||||
BetterFloat l_count = l->count;
|
||||
while (r != centroids.end())
|
||||
{
|
||||
/// A pair of consecutive bars of the histogram.
|
||||
auto l = centroids.begin();
|
||||
auto r = std::next(l);
|
||||
|
||||
Count sum = 0;
|
||||
while (r != centroids.end())
|
||||
if (l->mean == r->mean) // Perfect aggregation (fast). We compare l->mean, not l_mean, to avoid identical elements after compress
|
||||
{
|
||||
// we use quantile which gives us the smallest error
|
||||
|
||||
/// The ratio of the part of the histogram to l, including the half l to the entire histogram. That is, what level quantile in position l.
|
||||
Value ql = (sum + l->count * 0.5) / count;
|
||||
Value err = ql * (1 - ql);
|
||||
|
||||
/// The ratio of the portion of the histogram to l, including l and half r to the entire histogram. That is, what level is the quantile in position r.
|
||||
Value qr = (sum + l->count + r->count * 0.5) / count;
|
||||
Value err2 = qr * (1 - qr);
|
||||
|
||||
if (err > err2)
|
||||
err = err2;
|
||||
|
||||
Value k = 4 * count * err * params.epsilon;
|
||||
|
||||
/** The ratio of the weight of the glued column pair to all values is not greater,
|
||||
* than epsilon multiply by a certain quadratic coefficient, which in the median is 1 (4 * 1/2 * 1/2),
|
||||
* and at the edges decreases and is approximately equal to the distance to the edge * 4.
|
||||
*/
|
||||
|
||||
if (l->count + r->count <= k)
|
||||
{
|
||||
// it is possible to merge left and right
|
||||
/// The left column "eats" the right.
|
||||
*l += *r;
|
||||
}
|
||||
else
|
||||
{
|
||||
// not enough capacity, check the next pair
|
||||
sum += l->count;
|
||||
++l;
|
||||
|
||||
/// We skip all the values "eaten" earlier.
|
||||
if (l != r)
|
||||
*l = *r;
|
||||
}
|
||||
l_count += r->count;
|
||||
l->count = l_count;
|
||||
++r;
|
||||
continue;
|
||||
}
|
||||
// we use quantile which gives us the smallest error
|
||||
|
||||
/// At the end of the loop, all values to the right of l were "eaten".
|
||||
centroids.resize(l - centroids.begin() + 1);
|
||||
/// The ratio of the part of the histogram to l, including the half l to the entire histogram. That is, what level quantile in position l.
|
||||
BetterFloat ql = (sum + l_count * 0.5) / count;
|
||||
BetterFloat err = ql * (1 - ql);
|
||||
|
||||
/// The ratio of the portion of the histogram to l, including l and half r to the entire histogram. That is, what level is the quantile in position r.
|
||||
BetterFloat qr = (sum + l_count + r->count * 0.5) / count;
|
||||
BetterFloat err2 = qr * (1 - qr);
|
||||
|
||||
if (err > err2)
|
||||
err = err2;
|
||||
|
||||
BetterFloat k = count_epsilon_4 * err;
|
||||
|
||||
/** The ratio of the weight of the glued column pair to all values is not greater,
|
||||
* than epsilon multiply by a certain quadratic coefficient, which in the median is 1 (4 * 1/2 * 1/2),
|
||||
* and at the edges decreases and is approximately equal to the distance to the edge * 4.
|
||||
*/
|
||||
|
||||
if (l_count + r->count <= k)
|
||||
{
|
||||
// it is possible to merge left and right
|
||||
/// The left column "eats" the right.
|
||||
l_count += r->count;
|
||||
l_mean += r->count * (r->mean - l_mean) / l_count; // Symmetric algo (M1*C1 + M2*C2)/(C1+C2) is numerically better, but slower
|
||||
l->mean = l_mean;
|
||||
l->count = l_count;
|
||||
}
|
||||
else
|
||||
{
|
||||
// not enough capacity, check the next pair
|
||||
sum += l->count; // Not l_count, otherwise actual sum of elements will be different
|
||||
++l;
|
||||
|
||||
/// We skip all the values "eaten" earlier.
|
||||
if (l != r)
|
||||
*l = *r;
|
||||
l_mean = l->mean;
|
||||
l_count = l->count;
|
||||
}
|
||||
++r;
|
||||
}
|
||||
count = sum + l_count; // Update count, it might be different due to += inaccuracy
|
||||
|
||||
/// At the end of the loop, all values to the right of l were "eaten".
|
||||
centroids.resize(l - centroids.begin() + 1);
|
||||
unmerged = 0;
|
||||
}
|
||||
// Ensures centroids.size() < max_centroids, independent of unprovable floating point blackbox above
|
||||
compressBrute();
|
||||
}
|
||||
|
||||
/** Adds to the digest a change in `x` with a weight of `cnt` (default 1)
|
||||
*/
|
||||
void add(T x, UInt64 cnt = 1)
|
||||
{
|
||||
addCentroid(Centroid(Value(x), Count(cnt)));
|
||||
auto vx = static_cast<Value>(x);
|
||||
if (cnt == 0 || std::isnan(vx))
|
||||
return; // Count 0 breaks compress() assumptions, Nan breaks sort(). We treat them as no sample.
|
||||
addCentroid(Centroid{vx, static_cast<Count>(cnt)});
|
||||
}
|
||||
|
||||
void merge(const TDigest & other)
|
||||
void merge(const QuantileTDigest & other)
|
||||
{
|
||||
for (const auto & c : other.centroids)
|
||||
addCentroid(c);
|
||||
@ -213,89 +283,23 @@ public:
|
||||
size_t size = 0;
|
||||
readVarUInt(size, buf);
|
||||
|
||||
if (size > params.max_unmerged)
|
||||
if (size > max_centroids_deserialize)
|
||||
throw Exception("Too large t-digest centroids size", ErrorCodes::TOO_LARGE_ARRAY_SIZE);
|
||||
|
||||
centroids.resize(size);
|
||||
buf.read(reinterpret_cast<char *>(centroids.data()), size * sizeof(centroids[0]));
|
||||
|
||||
count = 0;
|
||||
for (const auto & c : centroids)
|
||||
count += c.count;
|
||||
}
|
||||
|
||||
Count getCount()
|
||||
{
|
||||
return count;
|
||||
}
|
||||
|
||||
const Centroids & getCentroids() const
|
||||
{
|
||||
return centroids;
|
||||
}
|
||||
|
||||
void reset()
|
||||
{
|
||||
centroids.resize(0);
|
||||
count = 0;
|
||||
unmerged = 0;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class QuantileTDigest
|
||||
{
|
||||
using Value = Float32;
|
||||
using Count = Float32;
|
||||
centroids.resize(size);
|
||||
// From now, TDigest will be in invalid state if exception is thrown.
|
||||
buf.read(reinterpret_cast<char *>(centroids.data()), size * sizeof(centroids[0]));
|
||||
|
||||
/** We store two t-digests. When an amount of elements in sub_tdigest become more than merge_threshold
|
||||
* we merge sub_tdigest in main_tdigest and reset sub_tdigest. This method is needed to decrease an amount of
|
||||
* centroids in t-digest (experiments show that after merge_threshold the size of t-digest significantly grows,
|
||||
* but merging two big t-digest decreases it).
|
||||
*/
|
||||
TDigest<T> main_tdigest;
|
||||
TDigest<T> sub_tdigest;
|
||||
size_t merge_threshold = 1e7;
|
||||
|
||||
/** Linear interpolation at the point x on the line (x1, y1)..(x2, y2)
|
||||
*/
|
||||
static Value interpolate(Value x, Value x1, Value y1, Value x2, Value y2)
|
||||
{
|
||||
double k = (x - x1) / (x2 - x1);
|
||||
return y1 + k * (y2 - y1);
|
||||
}
|
||||
|
||||
void mergeTDigests()
|
||||
{
|
||||
main_tdigest.merge(sub_tdigest);
|
||||
sub_tdigest.reset();
|
||||
}
|
||||
|
||||
public:
|
||||
void add(T x, UInt64 cnt = 1)
|
||||
{
|
||||
if (sub_tdigest.getCount() >= merge_threshold)
|
||||
mergeTDigests();
|
||||
sub_tdigest.add(x, cnt);
|
||||
}
|
||||
|
||||
void merge(const QuantileTDigest & other)
|
||||
{
|
||||
mergeTDigests();
|
||||
main_tdigest.merge(other.main_tdigest);
|
||||
main_tdigest.merge(other.sub_tdigest);
|
||||
}
|
||||
|
||||
void serialize(WriteBuffer & buf)
|
||||
{
|
||||
mergeTDigests();
|
||||
main_tdigest.serialize(buf);
|
||||
}
|
||||
|
||||
void deserialize(ReadBuffer & buf)
|
||||
{
|
||||
sub_tdigest.reset();
|
||||
main_tdigest.deserialize(buf);
|
||||
for (const auto & c : centroids)
|
||||
{
|
||||
if (c.count <= 0 || std::isnan(c.count) || std::isnan(c.mean)) // invalid count breaks compress(), invalid mean breaks sort()
|
||||
throw Exception("Invalid centroid " + std::to_string(c.count) + ":" + std::to_string(c.mean), ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED);
|
||||
count += c.count;
|
||||
}
|
||||
compress(); // Allows reading/writing TDigests with different epsilon/max_centroids params
|
||||
}
|
||||
|
||||
/** Calculates the quantile q [0, 1] based on the digest.
|
||||
@ -304,18 +308,15 @@ public:
|
||||
template <typename ResultType>
|
||||
ResultType getImpl(Float64 level)
|
||||
{
|
||||
mergeTDigests();
|
||||
|
||||
auto & centroids = main_tdigest.getCentroids();
|
||||
if (centroids.empty())
|
||||
return std::is_floating_point_v<ResultType> ? NAN : 0;
|
||||
|
||||
main_tdigest.compress();
|
||||
compress();
|
||||
|
||||
if (centroids.size() == 1)
|
||||
return centroids.front().mean;
|
||||
|
||||
Float64 x = level * main_tdigest.getCount();
|
||||
Float64 x = level * count;
|
||||
Float64 prev_x = 0;
|
||||
Count sum = 0;
|
||||
Value prev_mean = centroids.front().mean;
|
||||
@ -343,9 +344,6 @@ public:
|
||||
template <typename ResultType>
|
||||
void getManyImpl(const Float64 * levels, const size_t * levels_permutation, size_t size, ResultType * result)
|
||||
{
|
||||
mergeTDigests();
|
||||
|
||||
auto & centroids = main_tdigest.getCentroids();
|
||||
if (centroids.empty())
|
||||
{
|
||||
for (size_t result_num = 0; result_num < size; ++result_num)
|
||||
@ -353,7 +351,7 @@ public:
|
||||
return;
|
||||
}
|
||||
|
||||
main_tdigest.compress();
|
||||
compress();
|
||||
|
||||
if (centroids.size() == 1)
|
||||
{
|
||||
@ -362,7 +360,7 @@ public:
|
||||
return;
|
||||
}
|
||||
|
||||
Float64 x = levels[levels_permutation[0]] * main_tdigest.getCount();
|
||||
Float64 x = levels[levels_permutation[0]] * count;
|
||||
Float64 prev_x = 0;
|
||||
Count sum = 0;
|
||||
Value prev_mean = centroids.front().mean;
|
||||
@ -380,7 +378,7 @@ public:
|
||||
if (result_num >= size)
|
||||
return;
|
||||
|
||||
x = levels[levels_permutation[result_num]] * main_tdigest.getCount();
|
||||
x = levels[levels_permutation[result_num]] * count;
|
||||
}
|
||||
|
||||
sum += c.count;
|
||||
|
@ -1,4 +1,6 @@
|
||||
# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it.
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
PEERDIR(
|
||||
|
@ -1,3 +1,5 @@
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
PEERDIR(
|
||||
|
@ -1,4 +1,6 @@
|
||||
# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it.
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
PEERDIR(
|
||||
|
@ -1,3 +1,5 @@
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
PEERDIR(
|
||||
|
@ -161,7 +161,7 @@ MutableColumnPtr ColumnAggregateFunction::convertToValues(MutableColumnPtr colum
|
||||
return res;
|
||||
}
|
||||
|
||||
MutableColumnPtr ColumnAggregateFunction::predictValues(ColumnsWithTypeAndName & arguments, const Context & context) const
|
||||
MutableColumnPtr ColumnAggregateFunction::predictValues(const ColumnsWithTypeAndName & arguments, const Context & context) const
|
||||
{
|
||||
MutableColumnPtr res = func->getReturnTypeToPredict()->createColumn();
|
||||
res->reserve(data.size());
|
||||
|
@ -119,7 +119,7 @@ public:
|
||||
const char * getFamilyName() const override { return "AggregateFunction"; }
|
||||
TypeIndex getDataType() const override { return TypeIndex::AggregateFunction; }
|
||||
|
||||
MutableColumnPtr predictValues(ColumnsWithTypeAndName & arguments, const Context & context) const;
|
||||
MutableColumnPtr predictValues(const ColumnsWithTypeAndName & arguments, const Context & context) const;
|
||||
|
||||
size_t size() const override
|
||||
{
|
||||
|
@ -138,4 +138,12 @@ void ColumnConst::updateWeakHash32(WeakHash32 & hash) const
|
||||
value = intHashCRC32(data_hash, value);
|
||||
}
|
||||
|
||||
void ColumnConst::compareColumn(
|
||||
const IColumn & rhs, size_t, PaddedPODArray<UInt64> *, PaddedPODArray<Int8> & compare_results, int, int nan_direction_hint)
|
||||
const
|
||||
{
|
||||
Int8 res = compareAt(1, 1, rhs, nan_direction_hint);
|
||||
std::fill(compare_results.begin(), compare_results.end(), res);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -199,11 +199,7 @@ public:
|
||||
|
||||
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
|
||||
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
|
||||
int direction, int nan_direction_hint) const override
|
||||
{
|
||||
return data->compareColumn(rhs, rhs_row_num, row_indexes,
|
||||
compare_results, direction, nan_direction_hint);
|
||||
}
|
||||
int direction, int nan_direction_hint) const override;
|
||||
|
||||
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
|
||||
|
||||
|
@ -1,4 +1,6 @@
|
||||
# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it.
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
ADDINCL(
|
||||
|
@ -1,3 +1,5 @@
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
ADDINCL(
|
||||
|
@ -511,19 +511,30 @@ void TestKeeper::processingThread()
|
||||
if (expired)
|
||||
break;
|
||||
|
||||
if (info.watch)
|
||||
{
|
||||
auto & watches_type = dynamic_cast<const ListRequest *>(info.request.get())
|
||||
? list_watches
|
||||
: watches;
|
||||
|
||||
watches_type[info.request->getPath()].emplace_back(std::move(info.watch));
|
||||
}
|
||||
|
||||
++zxid;
|
||||
|
||||
info.request->addRootPath(root_path);
|
||||
auto [response, _] = info.request->process(container, zxid);
|
||||
|
||||
if (info.watch)
|
||||
{
|
||||
/// To be compatible with real ZooKeeper we add watch if request was successful (i.e. node exists)
|
||||
/// or if it was exists request which allows to add watches for non existing nodes.
|
||||
if (response->error == Error::ZOK)
|
||||
{
|
||||
auto & watches_type = dynamic_cast<const ListRequest *>(info.request.get())
|
||||
? list_watches
|
||||
: watches;
|
||||
|
||||
watches_type[info.request->getPath()].emplace_back(std::move(info.watch));
|
||||
}
|
||||
else if (response->error == Error::ZNONODE && dynamic_cast<const ExistsRequest *>(info.request.get()))
|
||||
{
|
||||
watches[info.request->getPath()].emplace_back(std::move(info.watch));
|
||||
}
|
||||
}
|
||||
|
||||
if (response->error == Error::ZOK)
|
||||
info.request->processWatches(watches, list_watches);
|
||||
|
||||
|
@ -1,4 +1,6 @@
|
||||
# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it.
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
ADDINCL (
|
||||
|
@ -1,3 +1,5 @@
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
ADDINCL (
|
||||
|
@ -1,4 +1,6 @@
|
||||
# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it.
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
ADDINCL(
|
||||
|
@ -1,3 +1,5 @@
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
ADDINCL(
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <Core/MySQL/PacketsProtocolText.h>
|
||||
#include <Core/MySQL/PacketsReplication.h>
|
||||
#include <Core/MySQL/MySQLReplication.h>
|
||||
#include <Poco/String.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -132,11 +133,19 @@ void MySQLClient::ping()
|
||||
writeCommand(Command::COM_PING, "");
|
||||
}
|
||||
|
||||
void MySQLClient::startBinlogDumpGTID(UInt32 slave_id, String replicate_db, String gtid_str)
|
||||
void MySQLClient::setBinlogChecksum(const String & binlog_checksum)
|
||||
{
|
||||
/// Set binlog checksum to CRC32.
|
||||
String checksum = "CRC32";
|
||||
writeCommand(Command::COM_QUERY, "SET @master_binlog_checksum = '" + checksum + "'");
|
||||
replication.setChecksumSignatureLength(Poco::toUpper(binlog_checksum) == "NONE" ? 0 : 4);
|
||||
}
|
||||
|
||||
void MySQLClient::startBinlogDumpGTID(UInt32 slave_id, String replicate_db, String gtid_str, const String & binlog_checksum)
|
||||
{
|
||||
/// Maybe CRC32 or NONE. mysqlbinlog.cc use NONE, see its below comments:
|
||||
/// Make a notice to the server that this client is checksum-aware.
|
||||
/// It does not need the first fake Rotate necessary checksummed.
|
||||
writeCommand(Command::COM_QUERY, "SET @master_binlog_checksum = 'CRC32'");
|
||||
|
||||
setBinlogChecksum(binlog_checksum);
|
||||
|
||||
/// Set heartbeat 1s.
|
||||
UInt64 period_ns = (1 * 1e9);
|
||||
|
@ -29,10 +29,12 @@ public:
|
||||
void disconnect();
|
||||
void ping();
|
||||
|
||||
void setBinlogChecksum(const String & binlog_checksum);
|
||||
|
||||
/// Start replication stream by GTID.
|
||||
/// replicate_db: replication database schema, events from other databases will be ignored.
|
||||
/// gtid: executed gtid sets format like 'hhhhhhhh-hhhh-hhhh-hhhh-hhhhhhhhhhhh:x-y'.
|
||||
void startBinlogDumpGTID(UInt32 slave_id, String replicate_db, String gtid);
|
||||
void startBinlogDumpGTID(UInt32 slave_id, String replicate_db, String gtid, const String & binlog_checksum);
|
||||
|
||||
BinlogEventPtr readOneBinlogEvent(UInt64 milliseconds = 0);
|
||||
Position getPosition() const { return replication.getPosition(); }
|
||||
|
@ -57,7 +57,6 @@ namespace MySQLReplication
|
||||
payload.readStrict(reinterpret_cast<char *>(&create_timestamp), 4);
|
||||
payload.readStrict(reinterpret_cast<char *>(&event_header_length), 1);
|
||||
assert(event_header_length == EVENT_HEADER_LENGTH);
|
||||
|
||||
readStringUntilEOF(event_type_header_length, payload);
|
||||
}
|
||||
|
||||
@ -745,7 +744,7 @@ namespace MySQLReplication
|
||||
// skip the generic response packets header flag.
|
||||
payload.ignore(1);
|
||||
|
||||
MySQLBinlogEventReadBuffer event_payload(payload);
|
||||
MySQLBinlogEventReadBuffer event_payload(payload, checksum_signature_length);
|
||||
|
||||
EventHeader event_header;
|
||||
event_header.parse(event_payload);
|
||||
|
@ -526,6 +526,8 @@ namespace MySQLReplication
|
||||
virtual BinlogEventPtr readOneEvent() = 0;
|
||||
virtual void setReplicateDatabase(String db) = 0;
|
||||
virtual void setGTIDSets(GTIDSets sets) = 0;
|
||||
virtual void setChecksumSignatureLength(size_t checksum_signature_length_) = 0;
|
||||
|
||||
virtual ~IFlavor() override = default;
|
||||
};
|
||||
|
||||
@ -538,12 +540,14 @@ namespace MySQLReplication
|
||||
BinlogEventPtr readOneEvent() override { return event; }
|
||||
void setReplicateDatabase(String db) override { replicate_do_db = std::move(db); }
|
||||
void setGTIDSets(GTIDSets sets) override { position.gtid_sets = std::move(sets); }
|
||||
void setChecksumSignatureLength(size_t checksum_signature_length_) override { checksum_signature_length = checksum_signature_length_; }
|
||||
|
||||
private:
|
||||
Position position;
|
||||
BinlogEventPtr event;
|
||||
String replicate_do_db;
|
||||
std::shared_ptr<TableMapEvent> table_map;
|
||||
size_t checksum_signature_length = 4;
|
||||
|
||||
inline bool do_replicate() { return (replicate_do_db.empty() || table_map->schema == replicate_do_db); }
|
||||
};
|
||||
|
@ -65,6 +65,7 @@ class IColumn;
|
||||
M(UInt64, distributed_connections_pool_size, DBMS_DEFAULT_DISTRIBUTED_CONNECTIONS_POOL_SIZE, "Maximum number of connections with one remote server in the pool.", 0) \
|
||||
M(UInt64, connections_with_failover_max_tries, DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, "The maximum number of attempts to connect to replicas.", 0) \
|
||||
M(UInt64, s3_min_upload_part_size, 512*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \
|
||||
M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \
|
||||
M(Bool, extremes, false, "Calculate minimums and maximums of the result columns. They can be output in JSON-formats.", IMPORTANT) \
|
||||
M(Bool, use_uncompressed_cache, true, "Whether to use the cache of uncompressed blocks.", 0) \
|
||||
M(Bool, replace_running_query, false, "Whether the running request should be canceled with the same id as the new one.", 0) \
|
||||
@ -441,6 +442,8 @@ class IColumn;
|
||||
M(Bool, output_format_json_quote_denormals, false, "Enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format.", 0) \
|
||||
\
|
||||
M(Bool, output_format_json_escape_forward_slashes, true, "Controls escaping forward slashes for string outputs in JSON output format. This is intended for compatibility with JavaScript. Don't confuse with backslashes that are always escaped.", 0) \
|
||||
M(Bool, output_format_json_named_tuples_as_objects, false, "Serialize named tuple columns as JSON objects.", 0) \
|
||||
M(Bool, output_format_json_array_of_rows, false, "Output a JSON array of all rows in JSONEachRow(Compact) format.", 0) \
|
||||
\
|
||||
M(UInt64, output_format_pretty_max_rows, 10000, "Rows limit for Pretty formats.", 0) \
|
||||
M(UInt64, output_format_pretty_max_column_pad_width, 250, "Maximum width to pad all values in a column in Pretty formats.", 0) \
|
||||
@ -517,7 +520,7 @@ struct Settings : public BaseSettings<SettingsTraits>
|
||||
};
|
||||
|
||||
/*
|
||||
* User-specified file format settings for File and ULR engines.
|
||||
* User-specified file format settings for File and URL engines.
|
||||
*/
|
||||
DECLARE_SETTINGS_TRAITS(FormatFactorySettingsTraits, FORMAT_FACTORY_SETTINGS)
|
||||
|
||||
|
@ -304,7 +304,8 @@ int main(int argc, char ** argv)
|
||||
"user", boost::program_options::value<std::string>()->default_value("root"), "master user")(
|
||||
"password", boost::program_options::value<std::string>()->required(), "master password")(
|
||||
"gtid", boost::program_options::value<std::string>()->default_value(""), "executed GTID sets")(
|
||||
"db", boost::program_options::value<std::string>()->required(), "replicate do db");
|
||||
"db", boost::program_options::value<std::string>()->required(), "replicate do db")(
|
||||
"binlog_checksum", boost::program_options::value<std::string>()->default_value("CRC32"), "master binlog_checksum");
|
||||
|
||||
boost::program_options::variables_map options;
|
||||
boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options);
|
||||
@ -319,6 +320,7 @@ int main(int argc, char ** argv)
|
||||
auto master_password = options.at("password").as<DB::String>();
|
||||
auto gtid_sets = options.at("gtid").as<DB::String>();
|
||||
auto replicate_db = options.at("db").as<DB::String>();
|
||||
auto binlog_checksum = options.at("binlog_checksum").as<String>();
|
||||
|
||||
std::cerr << "Master Host: " << host << ", Port: " << port << ", User: " << master_user << ", Password: " << master_password
|
||||
<< ", Replicate DB: " << replicate_db << ", GTID: " << gtid_sets << std::endl;
|
||||
@ -328,7 +330,7 @@ int main(int argc, char ** argv)
|
||||
|
||||
/// Connect to the master.
|
||||
slave.connect();
|
||||
slave.startBinlogDumpGTID(slave_id, replicate_db, gtid_sets);
|
||||
slave.startBinlogDumpGTID(slave_id, replicate_db, gtid_sets, binlog_checksum);
|
||||
|
||||
WriteBufferFromOStream cerr(std::cerr);
|
||||
|
||||
|
@ -1,4 +1,6 @@
|
||||
# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it.
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
PEERDIR(
|
||||
|
@ -1,3 +1,5 @@
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
PEERDIR(
|
||||
|
@ -17,7 +17,7 @@ ParallelParsingBlockInputStream::ParallelParsingBlockInputStream(const Params &
|
||||
// Subtract one thread that we use for segmentation and one for
|
||||
// reading. After that, must have at least two threads left for
|
||||
// parsing. See the assertion below.
|
||||
pool(std::max(2, params.max_threads - 2)),
|
||||
pool(std::max(2, static_cast<int>(params.max_threads) - 2)),
|
||||
file_segmentation_engine(params.file_segmentation_engine)
|
||||
{
|
||||
// See comment above.
|
||||
|
@ -69,7 +69,7 @@ public:
|
||||
const InputProcessorCreator & input_processor_creator;
|
||||
const InputCreatorParams & input_creator_params;
|
||||
FormatFactory::FileSegmentationEngine file_segmentation_engine;
|
||||
int max_threads;
|
||||
size_t max_threads;
|
||||
size_t min_chunk_bytes;
|
||||
};
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user