mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
Merge branch 'master' into ldap-cache-login
* master: (70 commits) Update documentation-issue.md Add an option to use existing tables to perf.py DOCSUP-4280: Update the SELECT query (#17231) DOCSUP-3584 edit and translate (#17176) Fixed flaky test_storage_s3::test_custom_auth_headers Update 01560_merge_distributed_join.sql Minor improvements Slightly more correct Auto version update to [20.13.1.1] [54444] Auto version update to [20.12.1.5236] [54443] Update roadmap Add favicon; add loading indicator Fix race condition; history and sharing capabilities Update bitmap-functions.md Fix exception message Use default value for read-only flag in metadata for Disk3. ISSUES-16605 try fix review comment trigger CI ISSUES-16605 try fix integration failure ISSUES-16605 try fix integration test failure ...
This commit is contained in:
commit
a367abb88f
@ -2,8 +2,7 @@
|
||||
name: Documentation issue
|
||||
about: Report something incorrect or missing in documentation
|
||||
title: ''
|
||||
labels: documentation
|
||||
assignees: BayoNet
|
||||
labels: comp-documentation
|
||||
|
||||
---
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <stdexcept> // for std::logic_error
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
|
@ -1,9 +1,9 @@
|
||||
# This strings autochanged from release_lib.sh:
|
||||
SET(VERSION_REVISION 54443)
|
||||
SET(VERSION_REVISION 54444)
|
||||
SET(VERSION_MAJOR 20)
|
||||
SET(VERSION_MINOR 12)
|
||||
SET(VERSION_MINOR 13)
|
||||
SET(VERSION_PATCH 1)
|
||||
SET(VERSION_GITHASH c53725fb1f846fda074347607ab582fbb9c6f7a1)
|
||||
SET(VERSION_DESCRIBE v20.12.1.1-prestable)
|
||||
SET(VERSION_STRING 20.12.1.1)
|
||||
SET(VERSION_GITHASH e581f9ccfc5c64867b0f488cce72412fd2966471)
|
||||
SET(VERSION_DESCRIBE v20.13.1.1-prestable)
|
||||
SET(VERSION_STRING 20.13.1.1)
|
||||
# end of autochange
|
||||
|
2
contrib/cctz
vendored
2
contrib/cctz
vendored
@ -1 +1 @@
|
||||
Subproject commit 7a2db4ece6e0f1b246173cbdb62711ae258ee841
|
||||
Subproject commit 260ba195ef6c489968bae8c88c62a67cdac5ff9d
|
4
debian/changelog
vendored
4
debian/changelog
vendored
@ -1,5 +1,5 @@
|
||||
clickhouse (20.12.1.1) unstable; urgency=low
|
||||
clickhouse (20.13.1.1) unstable; urgency=low
|
||||
|
||||
* Modified source code
|
||||
|
||||
-- clickhouse-release <clickhouse-release@yandex-team.ru> Thu, 05 Nov 2020 21:52:47 +0300
|
||||
-- clickhouse-release <clickhouse-release@yandex-team.ru> Mon, 23 Nov 2020 10:29:24 +0300
|
||||
|
@ -1,7 +1,7 @@
|
||||
FROM ubuntu:18.04
|
||||
|
||||
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
|
||||
ARG version=20.12.1.*
|
||||
ARG version=20.13.1.*
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install --yes --no-install-recommends \
|
||||
|
@ -1,7 +1,7 @@
|
||||
FROM ubuntu:20.04
|
||||
|
||||
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
|
||||
ARG version=20.12.1.*
|
||||
ARG version=20.13.1.*
|
||||
ARG gosu_ver=1.10
|
||||
|
||||
RUN apt-get update \
|
||||
|
@ -1,7 +1,7 @@
|
||||
FROM ubuntu:18.04
|
||||
|
||||
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
|
||||
ARG version=20.12.1.*
|
||||
ARG version=20.13.1.*
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y apt-transport-https dirmngr && \
|
||||
|
@ -7,8 +7,10 @@ ENV SOURCE_DIR=/build
|
||||
ENV OUTPUT_DIR=/output
|
||||
ENV IGNORE='.*contrib.*'
|
||||
|
||||
CMD mkdir -p /build/obj-x86_64-linux-gnu && cd /build/obj-x86_64-linux-gnu && CC=clang-10 CXX=clang++-10 cmake .. && cd /; \
|
||||
RUN apt-get update && apt-get install cmake --yes --no-install-recommends
|
||||
|
||||
CMD mkdir -p /build/obj-x86_64-linux-gnu && cd /build/obj-x86_64-linux-gnu && CC=clang-11 CXX=clang++-11 cmake .. && cd /; \
|
||||
dpkg -i /package_folder/clickhouse-common-static_*.deb; \
|
||||
llvm-profdata-10 merge -sparse ${COVERAGE_DIR}/* -o clickhouse.profdata && \
|
||||
llvm-cov-10 export /usr/bin/clickhouse -instr-profile=clickhouse.profdata -j=16 -format=lcov -skip-functions -ignore-filename-regex $IGNORE > output.lcov && \
|
||||
llvm-profdata-11 merge -sparse ${COVERAGE_DIR}/* -o clickhouse.profdata && \
|
||||
llvm-cov-11 export /usr/bin/clickhouse -instr-profile=clickhouse.profdata -j=16 -format=lcov -skip-functions -ignore-filename-regex $IGNORE > output.lcov && \
|
||||
genhtml output.lcov --ignore-errors source --output-directory ${OUTPUT_DIR}
|
||||
|
@ -15,6 +15,9 @@ stage=${stage:-}
|
||||
# empty parameter.
|
||||
read -ra FASTTEST_CMAKE_FLAGS <<< "${FASTTEST_CMAKE_FLAGS:-}"
|
||||
|
||||
# Run only matching tests.
|
||||
FASTTEST_FOCUS=${FASTTEST_FOCUS:-""}
|
||||
|
||||
FASTTEST_WORKSPACE=$(readlink -f "${FASTTEST_WORKSPACE:-.}")
|
||||
FASTTEST_SOURCE=$(readlink -f "${FASTTEST_SOURCE:-$FASTTEST_WORKSPACE/ch}")
|
||||
FASTTEST_BUILD=$(readlink -f "${FASTTEST_BUILD:-${BUILD:-$FASTTEST_WORKSPACE/build}}")
|
||||
@ -291,7 +294,7 @@ TESTS_TO_SKIP=(
|
||||
01563_distributed_query_finish
|
||||
)
|
||||
|
||||
time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
|
||||
time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
|
||||
|
||||
# substr is to remove semicolon after test name
|
||||
readarray -t FAILED_TESTS < <(awk '/FAIL|TIMEOUT|ERROR/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt")
|
||||
|
@ -30,7 +30,7 @@ RUN apt-get update \
|
||||
tzdata \
|
||||
vim \
|
||||
wget \
|
||||
&& pip3 --no-cache-dir install clickhouse_driver scipy \
|
||||
&& pip3 --no-cache-dir install 'clickhouse-driver>=0.1.5' scipy \
|
||||
&& apt-get purge --yes python3-dev g++ \
|
||||
&& apt-get autoremove --yes \
|
||||
&& apt-get clean \
|
||||
|
@ -14,10 +14,12 @@ import string
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
import logging
|
||||
import xml.etree.ElementTree as et
|
||||
from threading import Thread
|
||||
from scipy import stats
|
||||
|
||||
logging.basicConfig(format='%(asctime)s: %(levelname)s: %(module)s: %(message)s', level='WARNING')
|
||||
|
||||
total_start_seconds = time.perf_counter()
|
||||
stage_start_seconds = total_start_seconds
|
||||
@ -46,6 +48,8 @@ parser.add_argument('--profile-seconds', type=int, default=0, help='For how many
|
||||
parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.')
|
||||
parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.')
|
||||
parser.add_argument('--print-settings', action='store_true', help='Print test settings and exit.')
|
||||
parser.add_argument('--keep-created-tables', action='store_true', help="Don't drop the created tables after the test.")
|
||||
parser.add_argument('--use-existing-tables', action='store_true', help="Don't create or drop the tables, use the existing ones instead.")
|
||||
args = parser.parse_args()
|
||||
|
||||
reportStageEnd('start')
|
||||
@ -146,20 +150,21 @@ for i, s in enumerate(servers):
|
||||
|
||||
reportStageEnd('connect')
|
||||
|
||||
# Run drop queries, ignoring errors. Do this before all other activity, because
|
||||
# clickhouse_driver disconnects on error (this is not configurable), and the new
|
||||
# connection loses the changes in settings.
|
||||
drop_query_templates = [q.text for q in root.findall('drop_query')]
|
||||
drop_queries = substitute_parameters(drop_query_templates)
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
for q in drop_queries:
|
||||
try:
|
||||
c.execute(q)
|
||||
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
except:
|
||||
pass
|
||||
if not args.use_existing_tables:
|
||||
# Run drop queries, ignoring errors. Do this before all other activity,
|
||||
# because clickhouse_driver disconnects on error (this is not configurable),
|
||||
# and the new connection loses the changes in settings.
|
||||
drop_query_templates = [q.text for q in root.findall('drop_query')]
|
||||
drop_queries = substitute_parameters(drop_query_templates)
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
for q in drop_queries:
|
||||
try:
|
||||
c.execute(q)
|
||||
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
except:
|
||||
pass
|
||||
|
||||
reportStageEnd('drop-1')
|
||||
reportStageEnd('drop-1')
|
||||
|
||||
# Apply settings.
|
||||
# If there are errors, report them and continue -- maybe a new test uses a setting
|
||||
@ -171,12 +176,9 @@ reportStageEnd('drop-1')
|
||||
settings = root.findall('settings/*')
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
for s in settings:
|
||||
try:
|
||||
q = f"set {s.tag} = '{s.text}'"
|
||||
c.execute(q)
|
||||
print(f'set\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
except:
|
||||
print(traceback.format_exc(), file=sys.stderr)
|
||||
# requires clickhouse-driver >= 1.1.5 to accept arbitrary new settings
|
||||
# (https://github.com/mymarilyn/clickhouse-driver/pull/142)
|
||||
c.settings[s.tag] = s.text
|
||||
|
||||
reportStageEnd('settings')
|
||||
|
||||
@ -194,37 +196,40 @@ for t in tables:
|
||||
|
||||
reportStageEnd('preconditions')
|
||||
|
||||
# Run create and fill queries. We will run them simultaneously for both servers,
|
||||
# to save time.
|
||||
# The weird search is to keep the relative order of elements, which matters, and
|
||||
# etree doesn't support the appropriate xpath query.
|
||||
create_query_templates = [q.text for q in root.findall('./*') if q.tag in ('create_query', 'fill_query')]
|
||||
create_queries = substitute_parameters(create_query_templates)
|
||||
if not args.use_existing_tables:
|
||||
# Run create and fill queries. We will run them simultaneously for both
|
||||
# servers, to save time. The weird XML search + filter is because we want to
|
||||
# keep the relative order of elements, and etree doesn't support the
|
||||
# appropriate xpath query.
|
||||
create_query_templates = [q.text for q in root.findall('./*')
|
||||
if q.tag in ('create_query', 'fill_query')]
|
||||
create_queries = substitute_parameters(create_query_templates)
|
||||
|
||||
# Disallow temporary tables, because the clickhouse_driver reconnects on errors,
|
||||
# and temporary tables are destroyed. We want to be able to continue after some
|
||||
# errors.
|
||||
for q in create_queries:
|
||||
if re.search('create temporary table', q, flags=re.IGNORECASE):
|
||||
print(f"Temporary tables are not allowed in performance tests: '{q}'",
|
||||
file = sys.stderr)
|
||||
sys.exit(1)
|
||||
# Disallow temporary tables, because the clickhouse_driver reconnects on
|
||||
# errors, and temporary tables are destroyed. We want to be able to continue
|
||||
# after some errors.
|
||||
for q in create_queries:
|
||||
if re.search('create temporary table', q, flags=re.IGNORECASE):
|
||||
print(f"Temporary tables are not allowed in performance tests: '{q}'",
|
||||
file = sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
def do_create(connection, index, queries):
|
||||
for q in queries:
|
||||
connection.execute(q)
|
||||
print(f'create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
def do_create(connection, index, queries):
|
||||
for q in queries:
|
||||
connection.execute(q)
|
||||
print(f'create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
|
||||
threads = [Thread(target = do_create, args = (connection, index, create_queries))
|
||||
for index, connection in enumerate(all_connections)]
|
||||
threads = [
|
||||
Thread(target = do_create, args = (connection, index, create_queries))
|
||||
for index, connection in enumerate(all_connections)]
|
||||
|
||||
for t in threads:
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.start()
|
||||
|
||||
for t in threads:
|
||||
t.join()
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
reportStageEnd('create')
|
||||
reportStageEnd('create')
|
||||
|
||||
# By default, test all queries.
|
||||
queries_to_run = range(0, len(test_queries))
|
||||
@ -403,10 +408,11 @@ print(f'profile-total\t{profile_total_seconds}')
|
||||
reportStageEnd('run')
|
||||
|
||||
# Run drop queries
|
||||
drop_queries = substitute_parameters(drop_query_templates)
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
for q in drop_queries:
|
||||
c.execute(q)
|
||||
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
if not args.keep_created_tables and not args.use_existing_tables:
|
||||
drop_queries = substitute_parameters(drop_query_templates)
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
for q in drop_queries:
|
||||
c.execute(q)
|
||||
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
|
||||
reportStageEnd('drop-2')
|
||||
|
@ -1,12 +1,12 @@
|
||||
# docker build -t yandex/clickhouse-stateful-test-with-coverage .
|
||||
FROM yandex/clickhouse-stateless-test
|
||||
FROM yandex/clickhouse-stateless-test-with-coverage
|
||||
|
||||
RUN echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-9 main" >> /etc/apt/sources.list
|
||||
|
||||
RUN apt-get update -y \
|
||||
&& env DEBIAN_FRONTEND=noninteractive \
|
||||
apt-get install --yes --no-install-recommends \
|
||||
python3-requests
|
||||
python3-requests procps psmisc
|
||||
|
||||
COPY s3downloader /s3downloader
|
||||
COPY run.sh /run.sh
|
||||
|
@ -1,40 +1,44 @@
|
||||
#!/bin/bash
|
||||
|
||||
kill_clickhouse () {
|
||||
kill "$(pgrep -u clickhouse)" 2>/dev/null
|
||||
echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S'
|
||||
pkill -f "clickhouse-server" 2>/dev/null
|
||||
|
||||
for _ in {1..10}
|
||||
|
||||
for _ in {1..120}
|
||||
do
|
||||
if ! kill -0 "$(pgrep -u clickhouse)"; then
|
||||
echo "No clickhouse process"
|
||||
break
|
||||
else
|
||||
echo "Process $(pgrep -u clickhouse) still alive"
|
||||
sleep 10
|
||||
fi
|
||||
if ! pkill -0 -f "clickhouse-server" ; then break ; fi
|
||||
echo "ClickHouse still alive" | ts '%Y-%m-%d %H:%M:%S'
|
||||
sleep 1
|
||||
done
|
||||
|
||||
if pkill -0 -f "clickhouse-server"
|
||||
then
|
||||
pstree -apgT
|
||||
jobs
|
||||
echo "Failed to kill the ClickHouse server" | ts '%Y-%m-%d %H:%M:%S'
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
start_clickhouse () {
|
||||
LLVM_PROFILE_FILE='server_%h_%p_%m.profraw' sudo -Eu clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml &
|
||||
}
|
||||
|
||||
wait_llvm_profdata () {
|
||||
while kill -0 "$(pgrep llvm-profdata-10)"
|
||||
counter=0
|
||||
until clickhouse-client --query "SELECT 1"
|
||||
do
|
||||
echo "Waiting for profdata $(pgrep llvm-profdata-10) still alive"
|
||||
sleep 3
|
||||
if [ "$counter" -gt 120 ]
|
||||
then
|
||||
echo "Cannot start clickhouse-server"
|
||||
cat /var/log/clickhouse-server/stdout.log
|
||||
tail -n1000 /var/log/clickhouse-server/stderr.log
|
||||
tail -n1000 /var/log/clickhouse-server/clickhouse-server.log
|
||||
break
|
||||
fi
|
||||
sleep 0.5
|
||||
counter=$((counter + 1))
|
||||
done
|
||||
}
|
||||
|
||||
merge_client_files_in_background () {
|
||||
client_files=$(ls /client_*profraw 2>/dev/null)
|
||||
if [ -n "$client_files" ]
|
||||
then
|
||||
llvm-profdata-10 merge -sparse "$client_files" -o "merged_client_$(date +%s).profraw"
|
||||
rm "$client_files"
|
||||
fi
|
||||
}
|
||||
|
||||
chmod 777 /
|
||||
|
||||
@ -51,26 +55,7 @@ chmod 777 -R /var/log/clickhouse-server/
|
||||
# install test configs
|
||||
/usr/share/clickhouse-test/config/install.sh
|
||||
|
||||
function start()
|
||||
{
|
||||
counter=0
|
||||
until clickhouse-client --query "SELECT 1"
|
||||
do
|
||||
if [ "$counter" -gt 120 ]
|
||||
then
|
||||
echo "Cannot start clickhouse-server"
|
||||
cat /var/log/clickhouse-server/stdout.log
|
||||
tail -n1000 /var/log/clickhouse-server/stderr.log
|
||||
tail -n1000 /var/log/clickhouse-server/clickhouse-server.log
|
||||
break
|
||||
fi
|
||||
timeout 120 service clickhouse-server start
|
||||
sleep 0.5
|
||||
counter=$((counter + 1))
|
||||
done
|
||||
}
|
||||
|
||||
start
|
||||
start_clickhouse
|
||||
|
||||
# shellcheck disable=SC2086 # No quotes because I want to split it into words.
|
||||
if ! /s3downloader --dataset-names $DATASETS; then
|
||||
@ -81,10 +66,6 @@ fi
|
||||
|
||||
chmod 777 -R /var/lib/clickhouse
|
||||
|
||||
while /bin/true; do
|
||||
merge_client_files_in_background
|
||||
sleep 2
|
||||
done &
|
||||
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW DATABASES"
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "ATTACH DATABASE datasets ENGINE = Ordinary"
|
||||
@ -93,14 +74,13 @@ LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "CREATE DA
|
||||
kill_clickhouse
|
||||
start_clickhouse
|
||||
|
||||
sleep 10
|
||||
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW TABLES FROM datasets"
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW TABLES FROM test"
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits"
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW TABLES FROM test"
|
||||
|
||||
|
||||
if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then
|
||||
SKIP_LIST_OPT="--use-skip-list"
|
||||
fi
|
||||
@ -113,11 +93,6 @@ LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-test --testname --shard -
|
||||
|
||||
kill_clickhouse
|
||||
|
||||
wait_llvm_profdata
|
||||
|
||||
sleep 3
|
||||
|
||||
wait_llvm_profdata # 100% merged all parts
|
||||
|
||||
|
||||
cp /*.profraw /profraw ||:
|
||||
|
@ -1,4 +1,4 @@
|
||||
# docker build -t yandex/clickhouse-stateless-with-coverage-test .
|
||||
# docker build -t yandex/clickhouse-stateless-test-with-coverage .
|
||||
# TODO: that can be based on yandex/clickhouse-stateless-test (llvm version and CMD differs)
|
||||
FROM yandex/clickhouse-test-base
|
||||
|
||||
@ -28,7 +28,9 @@ RUN apt-get update -y \
|
||||
lsof \
|
||||
unixodbc \
|
||||
wget \
|
||||
qemu-user-static
|
||||
qemu-user-static \
|
||||
procps \
|
||||
psmisc
|
||||
|
||||
RUN mkdir -p /tmp/clickhouse-odbc-tmp \
|
||||
&& wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \
|
||||
|
@ -2,27 +2,41 @@
|
||||
|
||||
kill_clickhouse () {
|
||||
echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S'
|
||||
kill "$(pgrep -u clickhouse)" 2>/dev/null
|
||||
pkill -f "clickhouse-server" 2>/dev/null
|
||||
|
||||
for _ in {1..10}
|
||||
|
||||
for _ in {1..120}
|
||||
do
|
||||
if ! kill -0 "$(pgrep -u clickhouse)"; then
|
||||
echo "No clickhouse process" | ts '%Y-%m-%d %H:%M:%S'
|
||||
break
|
||||
else
|
||||
echo "Process $(pgrep -u clickhouse) still alive" | ts '%Y-%m-%d %H:%M:%S'
|
||||
sleep 10
|
||||
fi
|
||||
if ! pkill -0 -f "clickhouse-server" ; then break ; fi
|
||||
echo "ClickHouse still alive" | ts '%Y-%m-%d %H:%M:%S'
|
||||
sleep 1
|
||||
done
|
||||
|
||||
echo "Will try to send second kill signal for sure"
|
||||
kill "$(pgrep -u clickhouse)" 2>/dev/null
|
||||
sleep 5
|
||||
echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S'
|
||||
if pkill -0 -f "clickhouse-server"
|
||||
then
|
||||
pstree -apgT
|
||||
jobs
|
||||
echo "Failed to kill the ClickHouse server" | ts '%Y-%m-%d %H:%M:%S'
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
start_clickhouse () {
|
||||
LLVM_PROFILE_FILE='server_%h_%p_%m.profraw' sudo -Eu clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml &
|
||||
counter=0
|
||||
until clickhouse-client --query "SELECT 1"
|
||||
do
|
||||
if [ "$counter" -gt 120 ]
|
||||
then
|
||||
echo "Cannot start clickhouse-server"
|
||||
cat /var/log/clickhouse-server/stdout.log
|
||||
tail -n1000 /var/log/clickhouse-server/stderr.log
|
||||
tail -n1000 /var/log/clickhouse-server/clickhouse-server.log
|
||||
break
|
||||
fi
|
||||
sleep 0.5
|
||||
counter=$((counter + 1))
|
||||
done
|
||||
}
|
||||
|
||||
chmod 777 /
|
||||
@ -44,9 +58,6 @@ chmod 777 -R /var/log/clickhouse-server/
|
||||
|
||||
start_clickhouse
|
||||
|
||||
sleep 10
|
||||
|
||||
|
||||
if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then
|
||||
SKIP_LIST_OPT="--use-skip-list"
|
||||
fi
|
||||
|
@ -1,42 +1,42 @@
|
||||
# ClickHouse obfuscator
|
||||
|
||||
Simple tool for table data obfuscation.
|
||||
|
||||
It reads input table and produces output table, that retain some properties of input, but contains different data.
|
||||
It allows to publish almost real production data for usage in benchmarks.
|
||||
|
||||
It is designed to retain the following properties of data:
|
||||
- cardinalities of values (number of distinct values) for every column and for every tuple of columns;
|
||||
- conditional cardinalities: number of distinct values of one column under condition on value of another column;
|
||||
- probability distributions of absolute value of integers; sign of signed integers; exponent and sign for floats;
|
||||
- probability distributions of length of strings;
|
||||
- probability of zero values of numbers; empty strings and arrays, NULLs;
|
||||
- data compression ratio when compressed with LZ77 and entropy family of codecs;
|
||||
- continuity (magnitude of difference) of time values across table; continuity of floating point values.
|
||||
- date component of DateTime values;
|
||||
- UTF-8 validity of string values;
|
||||
- string values continue to look somewhat natural.
|
||||
|
||||
Most of the properties above are viable for performance testing:
|
||||
|
||||
reading data, filtering, aggregation and sorting will work at almost the same speed
|
||||
as on original data due to saved cardinalities, magnitudes, compression ratios, etc.
|
||||
|
||||
It works in deterministic fashion: you define a seed value and transform is totally determined by input data and by seed.
|
||||
Some transforms are one to one and could be reversed, so you need to have large enough seed and keep it in secret.
|
||||
|
||||
It use some cryptographic primitives to transform data, but from the cryptographic point of view,
|
||||
It doesn't do anything properly and you should never consider the result as secure, unless you have other reasons for it.
|
||||
|
||||
It may retain some data you don't want to publish.
|
||||
|
||||
It always leave numbers 0, 1, -1 as is. Also it leaves dates, lengths of arrays and null flags exactly as in source data.
|
||||
For example, you have a column IsMobile in your table with values 0 and 1. In transformed data, it will have the same value.
|
||||
So, the user will be able to count exact ratio of mobile traffic.
|
||||
|
||||
Another example, suppose you have some private data in your table, like user email and you don't want to publish any single email address.
|
||||
If your table is large enough and contain multiple different emails and there is no email that have very high frequency than all others,
|
||||
It will perfectly anonymize all data. But if you have small amount of different values in a column, it can possibly reproduce some of them.
|
||||
And you should take care and look at exact algorithm, how this tool works, and probably fine tune some of it command line parameters.
|
||||
|
||||
This tool works fine only with reasonable amount of data (at least 1000s of rows).
|
||||
# ClickHouse obfuscator
|
||||
|
||||
A simple tool for table data obfuscation.
|
||||
|
||||
It reads an input table and produces an output table, that retains some properties of input, but contains different data.
|
||||
It allows publishing almost real production data for usage in benchmarks.
|
||||
|
||||
It is designed to retain the following properties of data:
|
||||
- cardinalities of values (number of distinct values) for every column and every tuple of columns;
|
||||
- conditional cardinalities: number of distinct values of one column under the condition on the value of another column;
|
||||
- probability distributions of the absolute value of integers; the sign of signed integers; exponent and sign for floats;
|
||||
- probability distributions of the length of strings;
|
||||
- probability of zero values of numbers; empty strings and arrays, `NULL`s;
|
||||
|
||||
- data compression ratio when compressed with LZ77 and entropy family of codecs;
|
||||
- continuity (magnitude of difference) of time values across the table; continuity of floating-point values;
|
||||
- date component of `DateTime` values;
|
||||
|
||||
- UTF-8 validity of string values;
|
||||
- string values look natural.
|
||||
|
||||
Most of the properties above are viable for performance testing:
|
||||
|
||||
reading data, filtering, aggregatio, and sorting will work at almost the same speed
|
||||
as on original data due to saved cardinalities, magnitudes, compression ratios, etc.
|
||||
|
||||
It works in a deterministic fashion: you define a seed value and the transformation is determined by input data and by seed.
|
||||
Some transformations are one to one and could be reversed, so you need to have a large seed and keep it in secret.
|
||||
|
||||
It uses some cryptographic primitives to transform data but from the cryptographic point of view, it doesn't do it properly, that is why you should not consider the result as secure unless you have another reason. The result may retain some data you don't want to publish.
|
||||
|
||||
|
||||
It always leaves 0, 1, -1 numbers, dates, lengths of arrays, and null flags exactly as in source data.
|
||||
For example, you have a column `IsMobile` in your table with values 0 and 1. In transformed data, it will have the same value.
|
||||
|
||||
So, the user will be able to count the exact ratio of mobile traffic.
|
||||
|
||||
Let's give another example. When you have some private data in your table, like user email and you don't want to publish any single email address.
|
||||
If your table is large enough and contains multiple different emails and no email has a very high frequency than all others, it will anonymize all data. But if you have a small number of different values in a column, it can reproduce some of them.
|
||||
You should look at the working algorithm of this tool works, and fine-tune its command line parameters.
|
||||
|
||||
This tool works fine only with an average amount of data (at least 1000s of rows).
|
||||
|
@ -44,8 +44,6 @@ SELECT sum(y) FROM t_null_big
|
||||
└────────┘
|
||||
```
|
||||
|
||||
The `sum` function interprets `NULL` as `0`. In particular, this means that if the function receives input of a selection where all the values are `NULL`, then the result will be `0`, not `NULL`.
|
||||
|
||||
Now you can use the `groupArray` function to create an array from the `y` column:
|
||||
|
||||
``` sql
|
||||
|
@ -0,0 +1,37 @@
|
||||
---
|
||||
toc_priority: 150
|
||||
---
|
||||
|
||||
## initializeAggregation {#initializeaggregation}
|
||||
|
||||
Initializes aggregation for your input rows. It is intended for the functions with the suffix `State`.
|
||||
Use it for tests or to process columns of types `AggregateFunction` and `AggregationgMergeTree`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
initializeAggregation (aggregate_function, column_1, column_2);
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `aggregate_function` — Name of the aggregation function. The state of this function — the creating one. [String](../../../sql-reference/data-types/string.md#string).
|
||||
- `column_n` — The column to translate it into the function as it's argument. [String](../../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Returned value(s)**
|
||||
|
||||
Returns the result of the aggregation for your input rows. The return type will be the same as the return type of function, that `initializeAgregation` takes as first argument.
|
||||
For example for functions with the suffix `State` the return type will be `AggregateFunction`.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM system.numbers LIMIT 10000);
|
||||
```
|
||||
Result:
|
||||
|
||||
┌─uniqMerge(state)─┐
|
||||
│ 3 │
|
||||
└──────────────────┘
|
@ -535,18 +535,7 @@ dateDiff('unit', startdate, enddate, [timezone])
|
||||
|
||||
- `unit` — Time unit, in which the returned value is expressed. [String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
|
||||
Supported values:
|
||||
|
||||
| unit |
|
||||
| ---- |
|
||||
|second |
|
||||
|minute |
|
||||
|hour |
|
||||
|day |
|
||||
|week |
|
||||
|month |
|
||||
|quarter |
|
||||
|year |
|
||||
Supported values: second, minute, hour, day, week, month, quarter, year.
|
||||
|
||||
- `startdate` — The first time value to compare. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
|
||||
|
||||
|
@ -6,7 +6,7 @@ toc_title: GROUP BY
|
||||
|
||||
`GROUP BY` clause switches the `SELECT` query into an aggregation mode, which works as follows:
|
||||
|
||||
- `GROUP BY` clause contains a list of expressions (or a single expression, which is considered to be the list of length one). This list acts as a “grouping key”, while each individual expression will be referred to as a “key expressions”.
|
||||
- `GROUP BY` clause contains a list of expressions (or a single expression, which is considered to be the list of length one). This list acts as a “grouping key”, while each individual expression will be referred to as a “key expression”.
|
||||
- All the expressions in the [SELECT](../../../sql-reference/statements/select/index.md), [HAVING](../../../sql-reference/statements/select/having.md), and [ORDER BY](../../../sql-reference/statements/select/order-by.md) clauses **must** be calculated based on key expressions **or** on [aggregate functions](../../../sql-reference/aggregate-functions/index.md) over non-key expressions (including plain columns). In other words, each column selected from the table must be used either in a key expression or inside an aggregate function, but not both.
|
||||
- Result of aggregating `SELECT` query will contain as many rows as there were unique values of “grouping key” in source table. Usually this signficantly reduces the row count, often by orders of magnitude, but not necessarily: row count stays the same if all “grouping key” values were distinct.
|
||||
|
||||
@ -45,6 +45,154 @@ You can see that `GROUP BY` for `y = NULL` summed up `x`, as if `NULL` is this v
|
||||
|
||||
If you pass several keys to `GROUP BY`, the result will give you all the combinations of the selection, as if `NULL` were a specific value.
|
||||
|
||||
## WITH ROLLUP Modifier {#with-rollup-modifier}
|
||||
|
||||
`WITH ROLLUP` modifier is used to calculate subtotals for the key expressions, based on their order in the `GROUP BY` list. The subtotals rows are added after the result table.
|
||||
|
||||
The subtotals are calculated in the reverse order: at first subtotals are calculated for the last key expression in the list, then for the previous one, and so on up to the first key expression.
|
||||
|
||||
In the subtotals rows the values of already "grouped" key expressions are set to `0` or empty line.
|
||||
|
||||
!!! note "Note"
|
||||
Mind that [HAVING](../../../sql-reference/statements/select/having.md) clause can affect the subtotals results.
|
||||
|
||||
**Example**
|
||||
|
||||
Consider the table t:
|
||||
|
||||
```text
|
||||
┌─year─┬─month─┬─day─┐
|
||||
│ 2019 │ 1 │ 5 │
|
||||
│ 2019 │ 1 │ 15 │
|
||||
│ 2020 │ 1 │ 5 │
|
||||
│ 2020 │ 1 │ 15 │
|
||||
│ 2020 │ 10 │ 5 │
|
||||
│ 2020 │ 10 │ 15 │
|
||||
└──────┴───────┴─────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH ROLLUP;
|
||||
```
|
||||
As `GROUP BY` section has three key expressions, the result contains four tables with subtotals "rolled up" from right to left:
|
||||
|
||||
- `GROUP BY year, month, day`;
|
||||
- `GROUP BY year, month` (and `day` column is filled with zeros);
|
||||
- `GROUP BY year` (now `month, day` columns are both filled with zeros);
|
||||
- and totals (and all three key expression columns are zeros).
|
||||
|
||||
```text
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2020 │ 10 │ 15 │ 1 │
|
||||
│ 2020 │ 1 │ 5 │ 1 │
|
||||
│ 2019 │ 1 │ 5 │ 1 │
|
||||
│ 2020 │ 1 │ 15 │ 1 │
|
||||
│ 2019 │ 1 │ 15 │ 1 │
|
||||
│ 2020 │ 10 │ 5 │ 1 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2019 │ 1 │ 0 │ 2 │
|
||||
│ 2020 │ 1 │ 0 │ 2 │
|
||||
│ 2020 │ 10 │ 0 │ 2 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2019 │ 0 │ 0 │ 2 │
|
||||
│ 2020 │ 0 │ 0 │ 4 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 0 │ 0 │ 6 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
```
|
||||
|
||||
## WITH CUBE Modifier {#with-cube-modifier}
|
||||
|
||||
`WITH CUBE` modifier is used to calculate subtotals for every combination of the key expressions in the `GROUP BY` list. The subtotals rows are added after the result table.
|
||||
|
||||
In the subtotals rows the values of all "grouped" key expressions are set to `0` or empty line.
|
||||
|
||||
!!! note "Note"
|
||||
Mind that [HAVING](../../../sql-reference/statements/select/having.md) clause can affect the subtotals results.
|
||||
|
||||
**Example**
|
||||
|
||||
Consider the table t:
|
||||
|
||||
```text
|
||||
┌─year─┬─month─┬─day─┐
|
||||
│ 2019 │ 1 │ 5 │
|
||||
│ 2019 │ 1 │ 15 │
|
||||
│ 2020 │ 1 │ 5 │
|
||||
│ 2020 │ 1 │ 15 │
|
||||
│ 2020 │ 10 │ 5 │
|
||||
│ 2020 │ 10 │ 15 │
|
||||
└──────┴───────┴─────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH CUBE;
|
||||
```
|
||||
|
||||
As `GROUP BY` section has three key expressions, the result contains eight tables with subtotals for all key expression combinations:
|
||||
|
||||
- `GROUP BY year, month, day`
|
||||
- `GROUP BY year, month`
|
||||
- `GROUP BY year, day`
|
||||
- `GROUP BY year`
|
||||
- `GROUP BY month, day`
|
||||
- `GROUP BY month`
|
||||
- `GROUP BY day`
|
||||
- and totals.
|
||||
|
||||
Columns, excluded from `GROUP BY`, are filled with zeros.
|
||||
|
||||
```text
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2020 │ 10 │ 15 │ 1 │
|
||||
│ 2020 │ 1 │ 5 │ 1 │
|
||||
│ 2019 │ 1 │ 5 │ 1 │
|
||||
│ 2020 │ 1 │ 15 │ 1 │
|
||||
│ 2019 │ 1 │ 15 │ 1 │
|
||||
│ 2020 │ 10 │ 5 │ 1 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2019 │ 1 │ 0 │ 2 │
|
||||
│ 2020 │ 1 │ 0 │ 2 │
|
||||
│ 2020 │ 10 │ 0 │ 2 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2020 │ 0 │ 5 │ 2 │
|
||||
│ 2019 │ 0 │ 5 │ 1 │
|
||||
│ 2020 │ 0 │ 15 │ 2 │
|
||||
│ 2019 │ 0 │ 15 │ 1 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2019 │ 0 │ 0 │ 2 │
|
||||
│ 2020 │ 0 │ 0 │ 4 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 1 │ 5 │ 2 │
|
||||
│ 0 │ 10 │ 15 │ 1 │
|
||||
│ 0 │ 10 │ 5 │ 1 │
|
||||
│ 0 │ 1 │ 15 │ 2 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 1 │ 0 │ 4 │
|
||||
│ 0 │ 10 │ 0 │ 2 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 0 │ 5 │ 3 │
|
||||
│ 0 │ 0 │ 15 │ 3 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 0 │ 0 │ 6 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
```
|
||||
|
||||
|
||||
## WITH TOTALS Modifier {#with-totals-modifier}
|
||||
|
||||
If the `WITH TOTALS` modifier is specified, another row will be calculated. This row will have key columns containing default values (zeros or empty lines), and columns of aggregate functions with the values calculated across all the rows (the “total” values).
|
||||
@ -88,8 +236,6 @@ SELECT
|
||||
FROM hits
|
||||
```
|
||||
|
||||
However, in contrast to standard SQL, if the table doesn’t have any rows (either there aren’t any at all, or there aren’t any after using WHERE to filter), an empty result is returned, and not the result from one of the rows containing the initial values of aggregate functions.
|
||||
|
||||
As opposed to MySQL (and conforming to standard SQL), you can’t get some value of some column that is not in a key or aggregate function (except constant expressions). To work around this, you can use the ‘any’ aggregate function (get the first encountered value) or ‘min/max’.
|
||||
|
||||
Example:
|
||||
@ -105,10 +251,6 @@ GROUP BY domain
|
||||
|
||||
For every different key value encountered, `GROUP BY` calculates a set of aggregate function values.
|
||||
|
||||
`GROUP BY` is not supported for array columns.
|
||||
|
||||
A constant can’t be specified as arguments for aggregate functions. Example: `sum(1)`. Instead of this, you can get rid of the constant. Example: `count()`.
|
||||
|
||||
## Implementation Details {#implementation-details}
|
||||
|
||||
Aggregation is one of the most important features of a column-oriented DBMS, and thus it’s implementation is one of the most heavily optimized parts of ClickHouse. By default, aggregation is done in memory using a hash-table. It has 40+ specializations that are chosen automatically depending on “grouping key” data types.
|
||||
|
@ -20,7 +20,7 @@ SELECT [DISTINCT] expr_list
|
||||
[GLOBAL] [ANY|ALL|ASOF] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER|SEMI|ANTI] JOIN (subquery)|table (ON <expr_list>)|(USING <column_list>)
|
||||
[PREWHERE expr]
|
||||
[WHERE expr]
|
||||
[GROUP BY expr_list] [WITH TOTALS]
|
||||
[GROUP BY expr_list] [WITH ROLLUP|WITH CUBE] [WITH TOTALS]
|
||||
[HAVING expr]
|
||||
[ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr]
|
||||
[LIMIT [offset_value, ]n BY columns]
|
||||
@ -159,4 +159,111 @@ If the query omits the `DISTINCT`, `GROUP BY` and `ORDER BY` clauses and the `IN
|
||||
|
||||
For more information, see the section “Settings”. It is possible to use external sorting (saving temporary tables to a disk) and external aggregation.
|
||||
|
||||
{## [Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/) ##}
|
||||
## SELECT modifiers {#select-modifiers}
|
||||
|
||||
You can use the following modifiers in `SELECT` queries.
|
||||
|
||||
### APPLY {#apply-modifier}
|
||||
|
||||
Allows you to invoke some function for each row returned by an outer table expression of a query.
|
||||
|
||||
**Syntax:**
|
||||
|
||||
``` sql
|
||||
SELECT <expr> APPLY( <func> ) FROM [db.]table_name
|
||||
```
|
||||
|
||||
**Example:**
|
||||
|
||||
``` sql
|
||||
CREATE TABLE columns_transformers (i Int64, j Int16, k Int64) ENGINE = MergeTree ORDER by (i);
|
||||
INSERT INTO columns_transformers VALUES (100, 10, 324), (120, 8, 23);
|
||||
SELECT * APPLY(sum) FROM columns_transformers;
|
||||
```
|
||||
|
||||
```
|
||||
┌─sum(i)─┬─sum(j)─┬─sum(k)─┐
|
||||
│ 220 │ 18 │ 347 │
|
||||
└────────┴────────┴────────┘
|
||||
```
|
||||
|
||||
### EXCEPT {#except-modifier}
|
||||
|
||||
Specifies the names of one or more columns to exclude from the result. All matching column names are omitted from the output.
|
||||
|
||||
**Syntax:**
|
||||
|
||||
``` sql
|
||||
SELECT <expr> EXCEPT ( col_name1 [, col_name2, col_name3, ...] ) FROM [db.]table_name
|
||||
```
|
||||
|
||||
**Example:**
|
||||
|
||||
``` sql
|
||||
SELECT * EXCEPT (i) from columns_transformers;
|
||||
```
|
||||
|
||||
```
|
||||
┌──j─┬───k─┐
|
||||
│ 10 │ 324 │
|
||||
│ 8 │ 23 │
|
||||
└────┴─────┘
|
||||
```
|
||||
|
||||
### REPLACE {#replace-modifier}
|
||||
|
||||
Specifies one or more [expression aliases](../../../sql-reference/syntax.md#syntax-expression_aliases). Each alias must match a column name from the `SELECT *` statement. In the output column list, the column that matches the alias is replaced by the expression in that `REPLACE`.
|
||||
|
||||
This modifier does not change the names or order of columns. However, it can change the value and the value type.
|
||||
|
||||
**Syntax:**
|
||||
|
||||
``` sql
|
||||
SELECT <expr> REPLACE( <expr> AS col_name) from [db.]table_name
|
||||
```
|
||||
|
||||
**Example:**
|
||||
|
||||
``` sql
|
||||
SELECT * REPLACE(i + 1 AS i) from columns_transformers;
|
||||
```
|
||||
|
||||
```
|
||||
┌───i─┬──j─┬───k─┐
|
||||
│ 101 │ 10 │ 324 │
|
||||
│ 121 │ 8 │ 23 │
|
||||
└─────┴────┴─────┘
|
||||
```
|
||||
|
||||
### Modifier Combinations {#modifier-combinations}
|
||||
|
||||
You can use each modifier separately or combine them.
|
||||
|
||||
**Examples:**
|
||||
|
||||
Using the same modifier multiple times.
|
||||
|
||||
``` sql
|
||||
SELECT COLUMNS('[jk]') APPLY(toString) APPLY(length) APPLY(max) from columns_transformers;
|
||||
```
|
||||
|
||||
```
|
||||
┌─max(length(toString(j)))─┬─max(length(toString(k)))─┐
|
||||
│ 2 │ 3 │
|
||||
└──────────────────────────┴──────────────────────────┘
|
||||
```
|
||||
|
||||
Using multiple modifiers in a single query.
|
||||
|
||||
``` sql
|
||||
SELECT * REPLACE(i + 1 AS i) EXCEPT (j) APPLY(sum) from columns_transformers;
|
||||
```
|
||||
|
||||
```
|
||||
┌─sum(plus(i, 1))─┬─sum(k)─┐
|
||||
│ 222 │ 347 │
|
||||
└─────────────────┴────────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/)
|
||||
<!--hide-->
|
||||
|
43
docs/ru/operations/utilities/clickhouse-obfuscator.md
Normal file
43
docs/ru/operations/utilities/clickhouse-obfuscator.md
Normal file
@ -0,0 +1,43 @@
|
||||
# Обфускатор ClickHouse
|
||||
|
||||
Простой инструмент для обфускации табличных данных.
|
||||
|
||||
Он считывает данные входной таблицы и создает выходную таблицу, которая сохраняет некоторые свойства входных данных, но при этом содержит другие данные.
|
||||
|
||||
Это позволяет публиковать практически реальные данные и использовать их в тестах на производительность.
|
||||
|
||||
Обфускатор предназначен для сохранения следующих свойств данных:
|
||||
- кардинальность (количество уникальных данных) для каждого столбца и каждого кортежа столбцов;
|
||||
- условная кардинальность: количество уникальных данных одного столбца в соответствии со значением другого столбца;
|
||||
- вероятностные распределения абсолютного значения целых чисел; знак числа типа Int; показатель степени и знак для чисел с плавающей запятой;
|
||||
- вероятностное распределение длины строк;
|
||||
- вероятность нулевых значений чисел; пустые строки и массивы, `NULL`;
|
||||
- степень сжатия данных алгоритмом LZ77 и семейством энтропийных кодеков;
|
||||
|
||||
- непрерывность (величина разницы) значений времени в таблице; непрерывность значений с плавающей запятой;
|
||||
- дату из значений `DateTime`;
|
||||
|
||||
- кодировка UTF-8 значений строки;
|
||||
- строковые значения выглядят естественным образом.
|
||||
|
||||
|
||||
Большинство перечисленных выше свойств пригодны для тестирования производительности. Чтение данных, фильтрация, агрегирование и сортировка будут работать почти с той же скоростью, что и исходные данные, благодаря сохраненной кардинальности, величине, степени сжатия и т. д.
|
||||
|
||||
Он работает детерминированно. Вы задаёте значение инициализатора, а преобразование полностью определяется входными данными и инициализатором.
|
||||
|
||||
Некоторые преобразования выполняются один к одному, и их можно отменить. Поэтому нужно использовать большое значение инициализатора и хранить его в секрете.
|
||||
|
||||
|
||||
Обфускатор использует некоторые криптографические примитивы для преобразования данных, но, с криптографической точки зрения, результат будет небезопасным. В нем могут сохраниться данные, которые не следует публиковать.
|
||||
|
||||
|
||||
Он всегда оставляет без изменений числа 0, 1, -1, даты, длины массивов и нулевые флаги.
|
||||
Например, если у вас есть столбец `IsMobile` в таблице со значениями 0 и 1, то в преобразованных данных он будет иметь то же значение.
|
||||
|
||||
Таким образом, пользователь сможет посчитать точное соотношение мобильного трафика.
|
||||
|
||||
Давайте рассмотрим случай, когда у вас есть какие-то личные данные в таблице (например, электронная почта пользователя), и вы не хотите их публиковать.
|
||||
Если ваша таблица достаточно большая и содержит несколько разных электронных почтовых адресов, и ни один из них не встречается часто, то обфускатор полностью анонимизирует все данные. Но, если у вас есть небольшое количество разных значений в столбце, он может скопировать некоторые из них.
|
||||
В этом случае вам следует посмотреть на алгоритм работы инструмента и настроить параметры командной строки.
|
||||
|
||||
Обфускатор полезен в работе со средним объемом данных (не менее 1000 строк).
|
@ -44,8 +44,6 @@ SELECT sum(y) FROM t_null_big
|
||||
└────────┘
|
||||
```
|
||||
|
||||
Функция `sum` работает с `NULL` как с `0`. В частности, это означает, что если на вход в функцию подать выборку, где все значения `NULL`, то результат будет `0`, а не `NULL`.
|
||||
|
||||
Теперь с помощью функции `groupArray` сформируем массив из столбца `y`:
|
||||
|
||||
``` sql
|
||||
|
@ -0,0 +1,40 @@
|
||||
---
|
||||
toc_priority: 150
|
||||
---
|
||||
|
||||
## initializeAggregation {#initializeaggregation}
|
||||
|
||||
Инициализирует агрегацию для введеных строчек. Предназначена для функций с суффиксом `State`.
|
||||
Поможет вам проводить тесты или работать со столбцами типов: `AggregateFunction` и `AggregationgMergeTree`.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
initializeAggregation (aggregate_function, column_1, column_2);
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `aggregate_function` — название функции агрегации, состояние которой нужно создать. [String](../../../sql-reference/data-types/string.md#string).
|
||||
- `column_n` — столбец, который передается в функцию агрегации как аргумент. [String](../../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
Возвращает результат агрегации введенной информации. Тип возвращаемого значения такой же, как и для функции, которая становится первым аргументом для `initializeAgregation`.
|
||||
|
||||
Пример:
|
||||
|
||||
Возвращаемый тип функций с суффиксом `State` — `AggregateFunction`.
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
```sql
|
||||
SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM system.numbers LIMIT 10000);
|
||||
```
|
||||
Результат:
|
||||
|
||||
┌─uniqMerge(state)─┐
|
||||
│ 3 │
|
||||
└──────────────────┘
|
@ -43,6 +43,153 @@ toc_title: GROUP BY
|
||||
|
||||
Если в `GROUP BY` передать несколько ключей, то в результате мы получим все комбинации выборки, как если бы `NULL` был конкретным значением.
|
||||
|
||||
## Модификатор WITH ROLLUP {#with-rollup-modifier}
|
||||
|
||||
Модификатор `WITH ROLLUP` применяется для подсчета подытогов для ключевых выражений. При этом учитывается порядок следования ключевых выражений в списке `GROUP BY`. Подытоги подсчитываются в обратном порядке: сначала для последнего ключевого выражения в списке, потом для предпоследнего и так далее вплоть до самого первого ключевого выражения.
|
||||
|
||||
Строки с подытогами добавляются в конец результирующей таблицы. В колонках, по которым строки уже сгруппированы, указывается значение `0` или пустая строка.
|
||||
|
||||
!!! note "Примечание"
|
||||
Если в запросе есть секция [HAVING](../../../sql-reference/statements/select/having.md), она может повлиять на результаты расчета подытогов.
|
||||
|
||||
**Пример**
|
||||
|
||||
Рассмотрим таблицу t:
|
||||
|
||||
```text
|
||||
┌─year─┬─month─┬─day─┐
|
||||
│ 2019 │ 1 │ 5 │
|
||||
│ 2019 │ 1 │ 15 │
|
||||
│ 2020 │ 1 │ 5 │
|
||||
│ 2020 │ 1 │ 15 │
|
||||
│ 2020 │ 10 │ 5 │
|
||||
│ 2020 │ 10 │ 15 │
|
||||
└──────┴───────┴─────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
```sql
|
||||
SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH ROLLUP;
|
||||
```
|
||||
|
||||
Поскольку секция `GROUP BY` содержит три ключевых выражения, результат состоит из четырех таблиц с подытогами, которые как бы "сворачиваются" справа налево:
|
||||
|
||||
- `GROUP BY year, month, day`;
|
||||
- `GROUP BY year, month` (а колонка `day` заполнена нулями);
|
||||
- `GROUP BY year` (теперь обе колонки `month, day` заполнены нулями);
|
||||
- и общий итог (все три колонки с ключевыми выражениями заполнены нулями).
|
||||
|
||||
```text
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2020 │ 10 │ 15 │ 1 │
|
||||
│ 2020 │ 1 │ 5 │ 1 │
|
||||
│ 2019 │ 1 │ 5 │ 1 │
|
||||
│ 2020 │ 1 │ 15 │ 1 │
|
||||
│ 2019 │ 1 │ 15 │ 1 │
|
||||
│ 2020 │ 10 │ 5 │ 1 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2019 │ 1 │ 0 │ 2 │
|
||||
│ 2020 │ 1 │ 0 │ 2 │
|
||||
│ 2020 │ 10 │ 0 │ 2 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2019 │ 0 │ 0 │ 2 │
|
||||
│ 2020 │ 0 │ 0 │ 4 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 0 │ 0 │ 6 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
```
|
||||
|
||||
## Модификатор WITH CUBE {#with-cube-modifier}
|
||||
|
||||
Модификатор `WITH CUBE` применятеся для расчета подытогов по всем комбинациям группировки ключевых выражений в списке `GROUP BY`.
|
||||
|
||||
Строки с подытогами добавляются в конец результирующей таблицы. В колонках, по которым выполняется группировка, указывается значение `0` или пустая строка.
|
||||
|
||||
!!! note "Примечание"
|
||||
Если в запросе есть секция [HAVING](../../../sql-reference/statements/select/having.md), она может повлиять на результаты расчета подытогов.
|
||||
|
||||
**Пример**
|
||||
|
||||
Рассмотрим таблицу t:
|
||||
|
||||
```text
|
||||
┌─year─┬─month─┬─day─┐
|
||||
│ 2019 │ 1 │ 5 │
|
||||
│ 2019 │ 1 │ 15 │
|
||||
│ 2020 │ 1 │ 5 │
|
||||
│ 2020 │ 1 │ 15 │
|
||||
│ 2020 │ 10 │ 5 │
|
||||
│ 2020 │ 10 │ 15 │
|
||||
└──────┴───────┴─────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH CUBE;
|
||||
```
|
||||
|
||||
Поскольку секция `GROUP BY` содержит три ключевых выражения, результат состоит из восьми таблиц с подытогами — по таблице для каждой комбинации ключевых выражений:
|
||||
|
||||
- `GROUP BY year, month, day`
|
||||
- `GROUP BY year, month`
|
||||
- `GROUP BY year, day`
|
||||
- `GROUP BY year`
|
||||
- `GROUP BY month, day`
|
||||
- `GROUP BY month`
|
||||
- `GROUP BY day`
|
||||
- и общий итог.
|
||||
|
||||
Колонки, которые не участвуют в `GROUP BY`, заполнены нулями.
|
||||
|
||||
```text
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2020 │ 10 │ 15 │ 1 │
|
||||
│ 2020 │ 1 │ 5 │ 1 │
|
||||
│ 2019 │ 1 │ 5 │ 1 │
|
||||
│ 2020 │ 1 │ 15 │ 1 │
|
||||
│ 2019 │ 1 │ 15 │ 1 │
|
||||
│ 2020 │ 10 │ 5 │ 1 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2019 │ 1 │ 0 │ 2 │
|
||||
│ 2020 │ 1 │ 0 │ 2 │
|
||||
│ 2020 │ 10 │ 0 │ 2 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2020 │ 0 │ 5 │ 2 │
|
||||
│ 2019 │ 0 │ 5 │ 1 │
|
||||
│ 2020 │ 0 │ 15 │ 2 │
|
||||
│ 2019 │ 0 │ 15 │ 1 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2019 │ 0 │ 0 │ 2 │
|
||||
│ 2020 │ 0 │ 0 │ 4 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 1 │ 5 │ 2 │
|
||||
│ 0 │ 10 │ 15 │ 1 │
|
||||
│ 0 │ 10 │ 5 │ 1 │
|
||||
│ 0 │ 1 │ 15 │ 2 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 1 │ 0 │ 4 │
|
||||
│ 0 │ 10 │ 0 │ 2 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 0 │ 5 │ 3 │
|
||||
│ 0 │ 0 │ 15 │ 3 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 0 │ 0 │ 6 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
```
|
||||
|
||||
|
||||
## Модификатор WITH TOTALS {#with-totals-modifier}
|
||||
|
||||
Если указан модификатор `WITH TOTALS`, то будет посчитана ещё одна строчка, в которой в столбцах-ключах будут содержаться значения по умолчанию (нули, пустые строки), а в столбцах агрегатных функций - значения, посчитанные по всем строкам («тотальные» значения).
|
||||
@ -86,8 +233,6 @@ SELECT
|
||||
FROM hits
|
||||
```
|
||||
|
||||
Но, в отличие от стандартного SQL, если в таблице нет строк (вообще нет или после фильтрации с помощью WHERE), в качестве результата возвращается пустой результат, а не результат из одной строки, содержащий «начальные» значения агрегатных функций.
|
||||
|
||||
В отличие от MySQL (и в соответствии со стандартом SQL), вы не можете получить какое-нибудь значение некоторого столбца, не входящего в ключ или агрегатную функцию (за исключением константных выражений). Для обхода этого вы можете воспользоваться агрегатной функцией any (получить первое попавшееся значение) или min/max.
|
||||
|
||||
Пример:
|
||||
@ -103,10 +248,6 @@ GROUP BY domain
|
||||
|
||||
GROUP BY вычисляет для каждого встретившегося различного значения ключей, набор значений агрегатных функций.
|
||||
|
||||
Не поддерживается GROUP BY по столбцам-массивам.
|
||||
|
||||
Не поддерживается указание констант в качестве аргументов агрегатных функций. Пример: `sum(1)`. Вместо этого, вы можете избавиться от констант. Пример: `count()`.
|
||||
|
||||
## Детали реализации {#implementation-details}
|
||||
|
||||
Агрегация является одной из наиболее важных возможностей столбцовых СУБД, и поэтому её реализация является одной из наиболее сильно оптимизированных частей ClickHouse. По умолчанию агрегирование выполняется в памяти с помощью хэш-таблицы. Она имеет более 40 специализаций, которые выбираются автоматически в зависимости от типов данных ключа группировки.
|
||||
|
@ -18,7 +18,7 @@ SELECT [DISTINCT] expr_list
|
||||
[GLOBAL] [ANY|ALL|ASOF] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER|SEMI|ANTI] JOIN (subquery)|table (ON <expr_list>)|(USING <column_list>)
|
||||
[PREWHERE expr]
|
||||
[WHERE expr]
|
||||
[GROUP BY expr_list] [WITH TOTALS]
|
||||
[GROUP BY expr_list] [WITH ROLLUP|WITH CUBE] [WITH TOTALS]
|
||||
[HAVING expr]
|
||||
[ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr]
|
||||
[LIMIT [offset_value, ]n BY columns]
|
||||
|
@ -21,15 +21,15 @@ toc_title: "\u266A\u64CD\u573A\u266A"
|
||||
|
||||
ClickHouse体验还有如下:
|
||||
[ClickHouse管理服务](https://cloud.yandex.com/services/managed-clickhouse)
|
||||
实例托管 [Yandex云](https://cloud.yandex.com/).
|
||||
更多信息 [云提供商](../commercial/cloud.md).
|
||||
实例托管 [Yandex云](https://cloud.yandex.com/)。
|
||||
更多信息 [云提供商](../commercial/cloud.md)。
|
||||
|
||||
ClickHouse体验平台界面实际上是通过ClickHouse [HTTP API](../interfaces/http.md)接口实现的.
|
||||
体验平台后端只是一个ClickHouse集群,没有任何额外的服务器端应用程序。
|
||||
体验平台也同样提供了ClickHouse HTTPS服务端口。
|
||||
|
||||
您可以使用任何HTTP客户端向体验平台进行查询,例如 [curl](https://curl.haxx.se) 或 [wget](https://www.gnu.org/software/wget/),或使用以下方式建立连接 [JDBC](../interfaces/jdbc.md) 或 [ODBC](../interfaces/odbc.md) 司机
|
||||
有关支持ClickHouse的软件产品的更多信息,请访问 [这里](../interfaces/index.md).
|
||||
您可以使用任何HTTP客户端向体验平台进行查询,例如 [curl](https://curl.haxx.se) 或 [wget](https://www.gnu.org/software/wget/),或使用以下方式建立连接 [JDBC](../interfaces/jdbc.md) 或 [ODBC](../interfaces/odbc.md) 驱动。
|
||||
有关支持ClickHouse的软件产品的更多信息,请访问 [这里](../interfaces/index.md)。
|
||||
|
||||
| 参数 | 值 |
|
||||
|:---------|:--------------------------------------|
|
||||
|
@ -33,10 +33,10 @@ ClickHouse 收集的指标项:
|
||||
- 服务用于计算的资源占用的各种指标。
|
||||
- 关于查询处理的常见统计信息。
|
||||
|
||||
可以在 [系统指标](system-tables/metrics.md#system_tables-metrics) ,[系统事件](system-tables/events.md#system_tables-events) 以及[系统异步指标](system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) 等系统表查看所有的指标项。
|
||||
可以在[系统指标](system-tables/metrics.md#system_tables-metrics),[系统事件](system-tables/events.md#system_tables-events)以及[系统异步指标](system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics)等系统表查看所有的指标项。
|
||||
|
||||
可以配置ClickHouse 往 [石墨](https://github.com/graphite-project)导入指标。 参考 [石墨部分](server-configuration-parameters/settings.md#server_configuration_parameters-graphite) 配置文件。在配置指标导出之前,需要参考Graphite[官方教程](https://graphite.readthedocs.io/en/latest/install.html)搭建服务。
|
||||
可以配置ClickHouse向[Graphite](https://github.com/graphite-project)推送监控信息并导入指标。参考[Graphite监控](server-configuration-parameters/settings.md#server_configuration_parameters-graphite)配置文件。在配置指标导出之前,需要参考[Graphite官方教程](https://graphite.readthedocs.io/en/latest/install.html)搭建Graphite服务。
|
||||
|
||||
此外,您可以通过HTTP API监视服务器可用性。 将HTTP GET请求发送到 `/ping`。 如果服务器可用,它将以 `200 OK` 响应。
|
||||
此外,您可以通过HTTP API监视服务器可用性。将HTTP GET请求发送到`/ping`。如果服务器可用,它将以 `200 OK` 响应。
|
||||
|
||||
要监视服务器集群的配置,应设置[max_replica_delay_for_distributed_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries)参数并使用HTTP资源`/replicas_status`。 如果副本可用,并且不延迟在其他副本之后,则对`/replicas_status`的请求将返回200 OK。 如果副本滞后,请求将返回 `503 HTTP_SERVICE_UNAVAILABLE`,包括有关待办事项大小的信息。
|
||||
要监视服务器集群的配置,应设置[max_replica_delay_for_distributed_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries)参数并使用HTTP资源`/replicas_status`。 如果副本可用,并且不延迟在其他副本之后,则对`/replicas_status`的请求将返回`200 OK`。 如果副本滞后,请求将返回`503 HTTP_SERVICE_UNAVAILABLE`,包括有关待办事项大小的信息。
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
我们使用RoaringBitmap实际存储位图对象,当基数小于或等于32时,它使用Set保存。当基数大于32时,它使用RoaringBitmap保存。这也是为什么低基数集的存储更快的原因。
|
||||
|
||||
有关RoaringBitmap的更多信息,请参阅:[呻吟声](https://github.com/RoaringBitmap/CRoaring)。
|
||||
有关RoaringBitmap的更多信息,请参阅:[RoaringBitmap](https://github.com/RoaringBitmap/CRoaring)。
|
||||
|
||||
## bitmapBuild {#bitmapbuild}
|
||||
|
||||
|
1
programs/server/config.d/logging_no_rotate.xml
Symbolic link
1
programs/server/config.d/logging_no_rotate.xml
Symbolic link
@ -0,0 +1 @@
|
||||
../../../tests/config/config.d/logging_no_rotate.xml
|
@ -11,6 +11,9 @@
|
||||
<level>trace</level>
|
||||
<log>/var/log/clickhouse-server/clickhouse-server.log</log>
|
||||
<errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>
|
||||
<!-- Rotation policy
|
||||
See https://github.com/pocoproject/poco/blob/poco-1.9.4-release/Foundation/include/Poco/FileChannel.h#L54-L85
|
||||
-->
|
||||
<size>1000M</size>
|
||||
<count>10</count>
|
||||
<!-- <console>1</console> --> <!-- Default behavior is autodetection (log to console if not daemon mode and is tty) -->
|
||||
|
@ -1,6 +1,7 @@
|
||||
<html> <!-- TODO If I write DOCTYPE HTML something changes but I don't know what. -->
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<link rel="icon" href="data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSI1NCIgaGVpZ2h0PSI0OCIgdmlld0JveD0iMCAwIDkgOCI+PHN0eWxlPi5ve2ZpbGw6I2ZjMH0ucntmaWxsOnJlZH08L3N0eWxlPjxwYXRoIGQ9Ik0wLDcgaDEgdjEgaC0xIHoiIGNsYXNzPSJyIi8+PHBhdGggZD0iTTAsMCBoMSB2NyBoLTEgeiIgY2xhc3M9Im8iLz48cGF0aCBkPSJNMiwwIGgxIHY4IGgtMSB6IiBjbGFzcz0ibyIvPjxwYXRoIGQ9Ik00LDAgaDEgdjggaC0xIHoiIGNsYXNzPSJvIi8+PHBhdGggZD0iTTYsMCBoMSB2OCBoLTEgeiIgY2xhc3M9Im8iLz48cGF0aCBkPSJNOCwzLjI1IGgxIHYxLjUgaC0xIHoiIGNsYXNzPSJvIi8+PC9zdmc+">
|
||||
<title>ClickHouse Query</title>
|
||||
|
||||
<!-- Code Style:
|
||||
@ -21,26 +22,11 @@
|
||||
|
||||
<!-- Development Roadmap:
|
||||
|
||||
1. Add indication that the query was sent and when the query has been finished.
|
||||
Do not use any animated spinners. Just a text or check mark.
|
||||
Eliminate race conditions (results from the previous query should be ignored on arrival, the previous request should be cancelled).
|
||||
|
||||
2. Support readonly servers.
|
||||
1. Support readonly servers.
|
||||
Check if readonly = 1 (with SELECT FROM system.settings) to avoid sending settings. It can be done once on address/credentials change.
|
||||
It can be done in background, e.g. wait 100 ms after address/credentials change and do the check.
|
||||
Also it can provide visual indication that credentials are correct.
|
||||
|
||||
3. Add history in localstorage. Integrate with history API.
|
||||
There can be a counter in localstorage, that will be appended to location #fragment.
|
||||
The 'back', 'forward' buttons in browser should work.
|
||||
Also there should be UI element to list all the queries from history and select from the list.
|
||||
|
||||
4. Trivial sharing capabilities.
|
||||
Sharing is only possible when system.query_log is accessible. Read the X-ClickHouse-QueryId from the response.
|
||||
Share button will: - emit SYSTEM FLUSH LOGS if not readonly; - find the query in the query_log;
|
||||
- generate an URL with the query id and: server address if not equal to the URL's host; user name if not default;
|
||||
indication that password should be entered in case of non-empty password.
|
||||
|
||||
-->
|
||||
|
||||
<style type="text/css">
|
||||
@ -273,6 +259,22 @@
|
||||
{
|
||||
color: var(--null-color);
|
||||
}
|
||||
|
||||
#hourglass
|
||||
{
|
||||
display: none;
|
||||
padding-left: 1rem;
|
||||
font-size: 110%;
|
||||
color: #888;
|
||||
}
|
||||
|
||||
#check-mark
|
||||
{
|
||||
display: none;
|
||||
padding-left: 1rem;
|
||||
font-size: 110%;
|
||||
color: #080;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
|
||||
@ -286,6 +288,8 @@
|
||||
<div id="run_div">
|
||||
<button class="shadow" id="run">Run</button>
|
||||
<span class="hint"> (Ctrl+Enter)</span>
|
||||
<span id="hourglass">⧗</span>
|
||||
<span id="check-mark">✔</span>
|
||||
<span id="stats"></span>
|
||||
<span id="toggle-dark">🌑</span><span id="toggle-light">🌞</span>
|
||||
</div>
|
||||
@ -299,50 +303,117 @@
|
||||
|
||||
<script type="text/javascript">
|
||||
|
||||
/// Incremental request number. When response is received,
|
||||
/// if it's request number does not equal to the current request number, response will be ignored.
|
||||
/// This is to avoid race conditions.
|
||||
var request_num = 0;
|
||||
|
||||
/// Save query in history only if it is different.
|
||||
var previous_query = '';
|
||||
|
||||
/// Substitute the address of the server where the page is served.
|
||||
if (location.protocol != 'file:') {
|
||||
document.getElementById('url').value = location.origin;
|
||||
}
|
||||
|
||||
function post()
|
||||
/// Substitute user name if it's specified in the query string
|
||||
var user_from_url = (new URL(window.location)).searchParams.get('user');
|
||||
if (user_from_url) {
|
||||
document.getElementById('user').value = user_from_url;
|
||||
}
|
||||
|
||||
function postImpl(posted_request_num, query)
|
||||
{
|
||||
/// TODO: Avoid race condition on subsequent requests when responses may come out of order.
|
||||
/// TODO: Check if URL already contains query string (append parameters).
|
||||
|
||||
var user = document.getElementById('user').value;
|
||||
var password = document.getElementById('password').value;
|
||||
|
||||
var url = document.getElementById('url').value +
|
||||
/// Ask server to allow cross-domain requests.
|
||||
'?add_http_cors_header=1' +
|
||||
'&user=' + encodeURIComponent(document.getElementById('user').value) +
|
||||
'&password=' + encodeURIComponent(document.getElementById('password').value) +
|
||||
'&user=' + encodeURIComponent(user) +
|
||||
'&password=' + encodeURIComponent(password) +
|
||||
'&default_format=JSONCompact' +
|
||||
/// Safety settings to prevent results that browser cannot display.
|
||||
'&max_result_rows=1000&max_result_bytes=10000000&result_overflow_mode=break';
|
||||
|
||||
var query = document.getElementById('query').value;
|
||||
var xhr = new XMLHttpRequest;
|
||||
|
||||
xhr.open('POST', url, true);
|
||||
xhr.send(query);
|
||||
|
||||
xhr.onreadystatechange = function()
|
||||
{
|
||||
if (this.readyState === XMLHttpRequest.DONE) {
|
||||
if (this.status === 200) {
|
||||
var json;
|
||||
try { json = JSON.parse(this.response); } catch (e) {}
|
||||
if (json !== undefined && json.statistics !== undefined) {
|
||||
renderResult(json);
|
||||
} else {
|
||||
renderUnparsedResult(this.response);
|
||||
}
|
||||
} else {
|
||||
/// TODO: Proper rendering of network errors.
|
||||
renderError(this.response);
|
||||
if (posted_request_num != request_num) {
|
||||
return;
|
||||
} else if (this.readyState === XMLHttpRequest.DONE) {
|
||||
renderResponse(this.status, this.response);
|
||||
|
||||
/// The query is saved in browser history (in state JSON object)
|
||||
/// as well as in URL fragment identifier.
|
||||
if (query != previous_query) {
|
||||
previous_query = query;
|
||||
var title = "ClickHouse Query: " + query;
|
||||
history.pushState(
|
||||
{
|
||||
query: query,
|
||||
status: this.status,
|
||||
response: this.response.length > 100000 ? null : this.response /// Lower than the browser's limit.
|
||||
},
|
||||
title,
|
||||
window.location.pathname + '?user=' + encodeURIComponent(user) + '#' + window.btoa(query));
|
||||
document.title = title;
|
||||
}
|
||||
} else {
|
||||
//console.log(this);
|
||||
}
|
||||
}
|
||||
|
||||
document.getElementById('check-mark').style.display = 'none';
|
||||
document.getElementById('hourglass').style.display = 'inline';
|
||||
|
||||
xhr.send(query);
|
||||
}
|
||||
|
||||
function renderResponse(status, response) {
|
||||
document.getElementById('hourglass').style.display = 'none';
|
||||
|
||||
if (status === 200) {
|
||||
var json;
|
||||
try { json = JSON.parse(response); } catch (e) {}
|
||||
if (json !== undefined && json.statistics !== undefined) {
|
||||
renderResult(json);
|
||||
} else {
|
||||
renderUnparsedResult(response);
|
||||
}
|
||||
document.getElementById('check-mark').style.display = 'inline';
|
||||
} else {
|
||||
/// TODO: Proper rendering of network errors.
|
||||
renderError(response);
|
||||
}
|
||||
}
|
||||
|
||||
window.onpopstate = function(event) {
|
||||
if (!event.state) {
|
||||
return;
|
||||
}
|
||||
document.getElementById('query').value = event.state.query;
|
||||
if (!event.state.response) {
|
||||
clear();
|
||||
return;
|
||||
}
|
||||
renderResponse(event.state.status, event.state.response);
|
||||
};
|
||||
|
||||
if (window.location.hash) {
|
||||
document.getElementById('query').value = window.atob(window.location.hash.substr(1));
|
||||
}
|
||||
|
||||
function post()
|
||||
{
|
||||
++request_num;
|
||||
var query = document.getElementById('query').value;
|
||||
postImpl(request_num, query);
|
||||
}
|
||||
|
||||
document.getElementById('run').onclick = function()
|
||||
@ -350,7 +421,7 @@
|
||||
post();
|
||||
}
|
||||
|
||||
document.getElementById('query').onkeypress = function(event)
|
||||
document.onkeypress = function(event)
|
||||
{
|
||||
/// Firefox has code 13 for Enter and Chromium has code 10.
|
||||
if (event.ctrlKey && (event.charCode == 13 || event.charCode == 10)) {
|
||||
@ -372,6 +443,9 @@
|
||||
document.getElementById('error').style.display = 'none';
|
||||
|
||||
document.getElementById('stats').innerText = '';
|
||||
|
||||
document.getElementById('hourglass').style.display = 'none';
|
||||
document.getElementById('check-mark').style.display = 'none';
|
||||
}
|
||||
|
||||
function renderResult(response)
|
||||
@ -443,7 +517,7 @@
|
||||
function renderError(response)
|
||||
{
|
||||
clear();
|
||||
document.getElementById('error').innerText = response;
|
||||
document.getElementById('error').innerText = response ? response : "No response.";
|
||||
document.getElementById('error').style.display = 'block';
|
||||
}
|
||||
|
||||
|
@ -80,7 +80,7 @@ public:
|
||||
}
|
||||
|
||||
if (!isUnsignedInteger(arguments[1]))
|
||||
throw Exception("Second argument of aggregate function " + getName() + " must be integer.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
throw Exception("Second argument of aggregate function " + getName() + " must be unsigned integer.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
if (default_value.isNull())
|
||||
default_value = type->getDefault();
|
||||
|
@ -44,10 +44,10 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
DatabaseDictionary::DatabaseDictionary(const String & name_, const Context & global_context_)
|
||||
DatabaseDictionary::DatabaseDictionary(const String & name_, const Context & context_)
|
||||
: IDatabase(name_)
|
||||
, log(&Poco::Logger::get("DatabaseDictionary(" + database_name + ")"))
|
||||
, global_context(global_context_.getGlobalContext())
|
||||
, global_context(context_.getGlobalContext())
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -22,7 +22,7 @@ namespace DB
|
||||
class DatabaseDictionary final : public IDatabase
|
||||
{
|
||||
public:
|
||||
DatabaseDictionary(const String & name_, const Context & global_context);
|
||||
DatabaseDictionary(const String & name_, const Context & context_);
|
||||
|
||||
String getEngineName() const override
|
||||
{
|
||||
|
@ -13,6 +13,7 @@
|
||||
# include <Databases/MySQL/FetchTablesColumnsList.h>
|
||||
# include <Formats/MySQLBlockInputStream.h>
|
||||
# include <IO/Operators.h>
|
||||
# include <Interpreters/Context.h>
|
||||
# include <Parsers/ASTCreateQuery.h>
|
||||
# include <Parsers/ASTFunction.h>
|
||||
# include <Parsers/ParserCreateQuery.h>
|
||||
|
@ -120,7 +120,7 @@ XDBCDictionarySource::XDBCDictionarySource(
|
||||
, invalidate_query{config_.getString(config_prefix_ + ".invalidate_query", "")}
|
||||
, bridge_helper{bridge_}
|
||||
, timeouts{ConnectionTimeouts::getHTTPTimeouts(context_)}
|
||||
, global_context(context_)
|
||||
, global_context(context_.getGlobalContext())
|
||||
{
|
||||
bridge_url = bridge_helper->getMainURI();
|
||||
|
||||
|
@ -34,6 +34,7 @@ namespace ErrorCodes
|
||||
extern const int UNKNOWN_FORMAT;
|
||||
extern const int INCORRECT_DISK_INDEX;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int PATH_ACCESS_DENIED;
|
||||
}
|
||||
|
||||
|
||||
@ -93,6 +94,7 @@ namespace
|
||||
/// Metadata file version.
|
||||
static constexpr UInt32 VERSION_ABSOLUTE_PATHS = 1;
|
||||
static constexpr UInt32 VERSION_RELATIVE_PATHS = 2;
|
||||
static constexpr UInt32 VERSION_READ_ONLY_FLAG = 3;
|
||||
|
||||
using PathAndSize = std::pair<String, size_t>;
|
||||
|
||||
@ -109,6 +111,8 @@ namespace
|
||||
std::vector<PathAndSize> s3_objects;
|
||||
/// Number of references (hardlinks) to this metadata file.
|
||||
UInt32 ref_count;
|
||||
/// Flag indicates that file is read only.
|
||||
bool read_only = false;
|
||||
|
||||
/// Load metadata by path or create empty if `create` flag is set.
|
||||
explicit Metadata(const String & s3_root_path_, const String & disk_path_, const String & metadata_file_path_, bool create = false)
|
||||
@ -122,10 +126,10 @@ namespace
|
||||
UInt32 version;
|
||||
readIntText(version, buf);
|
||||
|
||||
if (version != VERSION_RELATIVE_PATHS && version != VERSION_ABSOLUTE_PATHS)
|
||||
if (version < VERSION_ABSOLUTE_PATHS || version > VERSION_READ_ONLY_FLAG)
|
||||
throw Exception(
|
||||
"Unknown metadata file version. Path: " + disk_path + metadata_file_path
|
||||
+ " Version: " + std::to_string(version) + ", Maximum expected version: " + std::to_string(VERSION_RELATIVE_PATHS),
|
||||
+ " Version: " + std::to_string(version) + ", Maximum expected version: " + std::to_string(VERSION_READ_ONLY_FLAG),
|
||||
ErrorCodes::UNKNOWN_FORMAT);
|
||||
|
||||
assertChar('\n', buf);
|
||||
@ -158,6 +162,12 @@ namespace
|
||||
|
||||
readIntText(ref_count, buf);
|
||||
assertChar('\n', buf);
|
||||
|
||||
if (version >= VERSION_READ_ONLY_FLAG)
|
||||
{
|
||||
readBoolText(read_only, buf);
|
||||
assertChar('\n', buf);
|
||||
}
|
||||
}
|
||||
|
||||
void addObject(const String & path, size_t size)
|
||||
@ -189,6 +199,9 @@ namespace
|
||||
writeIntText(ref_count, buf);
|
||||
writeChar('\n', buf);
|
||||
|
||||
writeBoolText(read_only, buf);
|
||||
writeChar('\n', buf);
|
||||
|
||||
buf.finalize();
|
||||
if (sync)
|
||||
buf.sync();
|
||||
@ -632,6 +645,12 @@ std::unique_ptr<ReadBufferFromFileBase> DiskS3::readFile(const String & path, si
|
||||
std::unique_ptr<WriteBufferFromFileBase> DiskS3::writeFile(const String & path, size_t buf_size, WriteMode mode, size_t estimated_size, size_t)
|
||||
{
|
||||
bool exist = exists(path);
|
||||
if (exist)
|
||||
{
|
||||
Metadata metadata(s3_root_path, metadata_path, path);
|
||||
if (metadata.read_only)
|
||||
throw Exception("File is read-only: " + path, ErrorCodes::PATH_ACCESS_DENIED);
|
||||
}
|
||||
/// Path to store new S3 object.
|
||||
auto s3_path = getRandomName();
|
||||
bool is_multipart = estimated_size >= min_multi_part_upload_size;
|
||||
@ -797,7 +816,11 @@ void DiskS3::createFile(const String & path)
|
||||
|
||||
void DiskS3::setReadOnly(const String & path)
|
||||
{
|
||||
Poco::File(metadata_path + path).setReadOnly(true);
|
||||
/// We should store read only flag inside metadata file (instead of using FS flag),
|
||||
/// because we modify metadata file when create hard-links from it.
|
||||
Metadata metadata(s3_root_path, metadata_path, path);
|
||||
metadata.read_only = true;
|
||||
metadata.save();
|
||||
}
|
||||
|
||||
int DiskS3::open(const String & /*path*/, mode_t /*mode*/) const
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include <Columns/ColumnsCommon.h>
|
||||
#include <Common/FieldVisitors.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <Functions/IFunctionAdaptors.h>
|
||||
#include <Functions/FunctionsMiscellaneous.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
@ -153,6 +154,9 @@ struct ConvertImpl
|
||||
{
|
||||
if constexpr (std::is_same_v<FromFieldType, UInt128> || std::is_same_v<ToFieldType, UInt128>)
|
||||
throw Exception("Unexpected UInt128 to big int conversion", ErrorCodes::NOT_IMPLEMENTED);
|
||||
/// If From Data is Nan or Inf, throw exception
|
||||
else if (!isFinite(vec_from[i]))
|
||||
throw Exception("Unexpected inf or nan to big int conversion", ErrorCodes::NOT_IMPLEMENTED);
|
||||
else
|
||||
vec_to[i] = bigint_cast<ToFieldType>(vec_from[i]);
|
||||
}
|
||||
@ -1874,8 +1878,15 @@ class ExecutableFunctionCast : public IExecutableFunctionImpl
|
||||
public:
|
||||
using WrapperType = std::function<ColumnPtr(ColumnsWithTypeAndName &, const DataTypePtr &, const ColumnNullable *, size_t)>;
|
||||
|
||||
explicit ExecutableFunctionCast(WrapperType && wrapper_function_, const char * name_)
|
||||
: wrapper_function(std::move(wrapper_function_)), name(name_) {}
|
||||
struct Diagnostic
|
||||
{
|
||||
std::string column_from;
|
||||
std::string column_to;
|
||||
};
|
||||
|
||||
explicit ExecutableFunctionCast(
|
||||
WrapperType && wrapper_function_, const char * name_, std::optional<Diagnostic> diagnostic_)
|
||||
: wrapper_function(std::move(wrapper_function_)), name(name_), diagnostic(std::move(diagnostic_)) {}
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
@ -1887,7 +1898,17 @@ protected:
|
||||
if (arguments.size() > 2)
|
||||
new_arguments.insert(std::end(new_arguments), std::next(std::begin(arguments), 2), std::end(arguments));
|
||||
|
||||
return wrapper_function(new_arguments, result_type, nullptr, input_rows_count);
|
||||
try
|
||||
{
|
||||
return wrapper_function(new_arguments, result_type, nullptr, input_rows_count);
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
if (diagnostic)
|
||||
e.addMessage("while converting source column " + backQuoteIfNeed(diagnostic->column_from) +
|
||||
" to destination column " + backQuoteIfNeed(diagnostic->column_to));
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
bool useDefaultImplementationForNulls() const override { return false; }
|
||||
@ -1898,6 +1919,7 @@ protected:
|
||||
private:
|
||||
WrapperType wrapper_function;
|
||||
const char * name;
|
||||
std::optional<Diagnostic> diagnostic;
|
||||
};
|
||||
|
||||
|
||||
@ -1908,11 +1930,12 @@ class FunctionCast final : public IFunctionBaseImpl
|
||||
public:
|
||||
using WrapperType = std::function<ColumnPtr(ColumnsWithTypeAndName &, const DataTypePtr &, const ColumnNullable *, size_t)>;
|
||||
using MonotonicityForRange = std::function<Monotonicity(const IDataType &, const Field &, const Field &)>;
|
||||
using Diagnostic = ExecutableFunctionCast::Diagnostic;
|
||||
|
||||
FunctionCast(const char * name_, MonotonicityForRange && monotonicity_for_range_
|
||||
, const DataTypes & argument_types_, const DataTypePtr & return_type_)
|
||||
: name(name_), monotonicity_for_range(monotonicity_for_range_)
|
||||
, argument_types(argument_types_), return_type(return_type_)
|
||||
, const DataTypes & argument_types_, const DataTypePtr & return_type_, std::optional<Diagnostic> diagnostic_)
|
||||
: name(name_), monotonicity_for_range(monotonicity_for_range_)
|
||||
, argument_types(argument_types_), return_type(return_type_), diagnostic(std::move(diagnostic_))
|
||||
{
|
||||
}
|
||||
|
||||
@ -1921,8 +1944,18 @@ public:
|
||||
|
||||
ExecutableFunctionImplPtr prepare(const ColumnsWithTypeAndName & /*sample_columns*/) const override
|
||||
{
|
||||
return std::make_unique<ExecutableFunctionCast>(
|
||||
prepareUnpackDictionaries(getArgumentTypes()[0], getResultType()), name);
|
||||
try
|
||||
{
|
||||
return std::make_unique<ExecutableFunctionCast>(
|
||||
prepareUnpackDictionaries(getArgumentTypes()[0], getResultType()), name, diagnostic);
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
if (diagnostic)
|
||||
e.addMessage("while converting source column " + backQuoteIfNeed(diagnostic->column_from) +
|
||||
" to destination column " + backQuoteIfNeed(diagnostic->column_to));
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
String getName() const override { return name; }
|
||||
@ -1948,6 +1981,8 @@ private:
|
||||
DataTypes argument_types;
|
||||
DataTypePtr return_type;
|
||||
|
||||
std::optional<Diagnostic> diagnostic;
|
||||
|
||||
template <typename DataType>
|
||||
WrapperType createWrapper(const DataTypePtr & from_type, const DataType * const, bool requested_result_is_nullable) const
|
||||
{
|
||||
@ -2558,14 +2593,19 @@ class CastOverloadResolver : public IFunctionOverloadResolverImpl
|
||||
{
|
||||
public:
|
||||
using MonotonicityForRange = FunctionCast::MonotonicityForRange;
|
||||
using Diagnostic = FunctionCast::Diagnostic;
|
||||
|
||||
static constexpr auto name = "CAST";
|
||||
|
||||
static FunctionOverloadResolverImplPtr create(const Context & context);
|
||||
static FunctionOverloadResolverImplPtr createImpl(bool keep_nullable) { return std::make_unique<CastOverloadResolver>(keep_nullable); }
|
||||
static FunctionOverloadResolverImplPtr createImpl(bool keep_nullable, std::optional<Diagnostic> diagnostic = {})
|
||||
{
|
||||
return std::make_unique<CastOverloadResolver>(keep_nullable, std::move(diagnostic));
|
||||
}
|
||||
|
||||
explicit CastOverloadResolver(bool keep_nullable_)
|
||||
: keep_nullable(keep_nullable_)
|
||||
|
||||
explicit CastOverloadResolver(bool keep_nullable_, std::optional<Diagnostic> diagnostic_ = {})
|
||||
: keep_nullable(keep_nullable_), diagnostic(std::move(diagnostic_))
|
||||
{}
|
||||
|
||||
String getName() const override { return name; }
|
||||
@ -2584,7 +2624,7 @@ protected:
|
||||
data_types[i] = arguments[i].type;
|
||||
|
||||
auto monotonicity = getMonotonicityInformation(arguments.front().type, return_type.get());
|
||||
return std::make_unique<FunctionCast>(name, std::move(monotonicity), data_types, return_type);
|
||||
return std::make_unique<FunctionCast>(name, std::move(monotonicity), data_types, return_type, diagnostic);
|
||||
}
|
||||
|
||||
DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments) const override
|
||||
@ -2612,6 +2652,7 @@ protected:
|
||||
|
||||
private:
|
||||
bool keep_nullable;
|
||||
std::optional<Diagnostic> diagnostic;
|
||||
|
||||
template <typename DataType>
|
||||
static auto monotonicityForType(const DataType * const)
|
||||
|
@ -1,51 +1,9 @@
|
||||
#include <Functions/IFunctionImpl.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/materialize.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace
|
||||
{
|
||||
|
||||
/** materialize(x) - materialize the constant
|
||||
*/
|
||||
class FunctionMaterialize : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "materialize";
|
||||
static FunctionPtr create(const Context &)
|
||||
{
|
||||
return std::make_shared<FunctionMaterialize>();
|
||||
}
|
||||
|
||||
bool useDefaultImplementationForNulls() const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Get the function name.
|
||||
String getName() const override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
size_t getNumberOfArguments() const override
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
return arguments[0];
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
|
||||
{
|
||||
return arguments[0].column->convertToFullColumnIfConst();
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
void registerFunctionMaterialize(FunctionFactory & factory)
|
||||
{
|
||||
|
46
src/Functions/materialize.h
Normal file
46
src/Functions/materialize.h
Normal file
@ -0,0 +1,46 @@
|
||||
#pragma once
|
||||
#include <Functions/IFunctionImpl.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** materialize(x) - materialize the constant
|
||||
*/
|
||||
class FunctionMaterialize : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "materialize";
|
||||
static FunctionPtr create(const Context &)
|
||||
{
|
||||
return std::make_shared<FunctionMaterialize>();
|
||||
}
|
||||
|
||||
bool useDefaultImplementationForNulls() const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Get the function name.
|
||||
String getName() const override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
size_t getNumberOfArguments() const override
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
return arguments[0];
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
|
||||
{
|
||||
return arguments[0].column->convertToFullColumnIfConst();
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -1,7 +1,11 @@
|
||||
#include <Interpreters/ActionsDAG.h>
|
||||
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/IFunctionAdaptors.h>
|
||||
#include <Functions/FunctionsConversion.h>
|
||||
#include <Functions/materialize.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/ExpressionJIT.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
@ -18,23 +22,26 @@ namespace ErrorCodes
|
||||
extern const int DUPLICATE_COLUMN;
|
||||
extern const int UNKNOWN_IDENTIFIER;
|
||||
extern const int TYPE_MISMATCH;
|
||||
extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
|
||||
extern const int THERE_IS_NO_COLUMN;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
|
||||
ActionsDAG::ActionsDAG(const NamesAndTypesList & inputs)
|
||||
ActionsDAG::ActionsDAG(const NamesAndTypesList & inputs_)
|
||||
{
|
||||
for (const auto & input : inputs)
|
||||
addInput(input.name, input.type);
|
||||
for (const auto & input : inputs_)
|
||||
addInput(input.name, input.type, true);
|
||||
}
|
||||
|
||||
ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs)
|
||||
ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs_)
|
||||
{
|
||||
for (const auto & input : inputs)
|
||||
for (const auto & input : inputs_)
|
||||
{
|
||||
if (input.column && isColumnConst(*input.column))
|
||||
addInput(input);
|
||||
addInput(input, true);
|
||||
else
|
||||
addInput(input.name, input.type);
|
||||
addInput(input.name, input.type, true);
|
||||
}
|
||||
}
|
||||
|
||||
@ -46,6 +53,9 @@ ActionsDAG::Node & ActionsDAG::addNode(Node node, bool can_replace)
|
||||
|
||||
auto & res = nodes.emplace_back(std::move(node));
|
||||
|
||||
if (res.type == ActionType::INPUT)
|
||||
inputs.emplace_back(&res);
|
||||
|
||||
index.replace(&res);
|
||||
return res;
|
||||
}
|
||||
@ -59,17 +69,17 @@ ActionsDAG::Node & ActionsDAG::getNode(const std::string & name)
|
||||
return **it;
|
||||
}
|
||||
|
||||
const ActionsDAG::Node & ActionsDAG::addInput(std::string name, DataTypePtr type)
|
||||
const ActionsDAG::Node & ActionsDAG::addInput(std::string name, DataTypePtr type, bool can_replace)
|
||||
{
|
||||
Node node;
|
||||
node.type = ActionType::INPUT;
|
||||
node.result_type = std::move(type);
|
||||
node.result_name = std::move(name);
|
||||
|
||||
return addNode(std::move(node));
|
||||
return addNode(std::move(node), can_replace);
|
||||
}
|
||||
|
||||
const ActionsDAG::Node & ActionsDAG::addInput(ColumnWithTypeAndName column)
|
||||
const ActionsDAG::Node & ActionsDAG::addInput(ColumnWithTypeAndName column, bool can_replace)
|
||||
{
|
||||
Node node;
|
||||
node.type = ActionType::INPUT;
|
||||
@ -77,10 +87,10 @@ const ActionsDAG::Node & ActionsDAG::addInput(ColumnWithTypeAndName column)
|
||||
node.result_name = std::move(column.name);
|
||||
node.column = std::move(column.column);
|
||||
|
||||
return addNode(std::move(node));
|
||||
return addNode(std::move(node), can_replace);
|
||||
}
|
||||
|
||||
const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column)
|
||||
const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column, bool can_replace)
|
||||
{
|
||||
if (!column.column)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add column {} because it is nullptr", column.name);
|
||||
@ -91,13 +101,16 @@ const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column)
|
||||
node.result_name = std::move(column.name);
|
||||
node.column = std::move(column.column);
|
||||
|
||||
return addNode(std::move(node));
|
||||
return addNode(std::move(node), can_replace);
|
||||
}
|
||||
|
||||
const ActionsDAG::Node & ActionsDAG::addAlias(const std::string & name, std::string alias, bool can_replace)
|
||||
{
|
||||
auto & child = getNode(name);
|
||||
return addAlias(getNode(name), alias, can_replace);
|
||||
}
|
||||
|
||||
ActionsDAG::Node & ActionsDAG::addAlias(Node & child, std::string alias, bool can_replace)
|
||||
{
|
||||
Node node;
|
||||
node.type = ActionType::ALIAS;
|
||||
node.result_type = child.result_type;
|
||||
@ -144,20 +157,33 @@ const ActionsDAG::Node & ActionsDAG::addFunction(
|
||||
compilation_cache = context.getCompiledExpressionCache();
|
||||
#endif
|
||||
|
||||
size_t num_arguments = argument_names.size();
|
||||
Inputs children;
|
||||
children.reserve(argument_names.size());
|
||||
for (const auto & name : argument_names)
|
||||
children.push_back(&getNode(name));
|
||||
|
||||
return addFunction(function, children, std::move(result_name), false);
|
||||
}
|
||||
|
||||
ActionsDAG::Node & ActionsDAG::addFunction(
|
||||
const FunctionOverloadResolverPtr & function,
|
||||
Inputs children,
|
||||
std::string result_name,
|
||||
bool can_replace)
|
||||
{
|
||||
size_t num_arguments = children.size();
|
||||
|
||||
Node node;
|
||||
node.type = ActionType::FUNCTION;
|
||||
node.function_builder = function;
|
||||
node.children.reserve(num_arguments);
|
||||
node.children = std::move(children);
|
||||
|
||||
bool all_const = true;
|
||||
ColumnsWithTypeAndName arguments(num_arguments);
|
||||
|
||||
for (size_t i = 0; i < num_arguments; ++i)
|
||||
{
|
||||
auto & child = getNode(argument_names[i]);
|
||||
node.children.emplace_back(&child);
|
||||
auto & child = *node.children[i];
|
||||
node.allow_constant_folding = node.allow_constant_folding && child.allow_constant_folding;
|
||||
|
||||
ColumnWithTypeAndName argument;
|
||||
@ -213,27 +239,26 @@ const ActionsDAG::Node & ActionsDAG::addFunction(
|
||||
if (result_name.empty())
|
||||
{
|
||||
result_name = function->getName() + "(";
|
||||
for (size_t i = 0; i < argument_names.size(); ++i)
|
||||
for (size_t i = 0; i < num_arguments; ++i)
|
||||
{
|
||||
if (i)
|
||||
result_name += ", ";
|
||||
result_name += argument_names[i];
|
||||
result_name += node.children[i]->result_name;
|
||||
}
|
||||
result_name += ")";
|
||||
}
|
||||
|
||||
node.result_name = std::move(result_name);
|
||||
|
||||
return addNode(std::move(node));
|
||||
return addNode(std::move(node), can_replace);
|
||||
}
|
||||
|
||||
|
||||
NamesAndTypesList ActionsDAG::getRequiredColumns() const
|
||||
{
|
||||
NamesAndTypesList result;
|
||||
for (const auto & node : nodes)
|
||||
if (node.type == ActionType::INPUT)
|
||||
result.emplace_back(node.result_name, node.result_type);
|
||||
for (const auto & input : inputs)
|
||||
result.emplace_back(input->result_name, input->result_type);
|
||||
|
||||
return result;
|
||||
}
|
||||
@ -347,6 +372,8 @@ void ActionsDAG::removeUnusedActions()
|
||||
}
|
||||
|
||||
nodes.remove_if([&](const Node & node) { return visited_nodes.count(&node) == 0; });
|
||||
auto it = std::remove_if(inputs.begin(), inputs.end(), [&](const Node * node) { return visited_nodes.count(node) == 0; });
|
||||
inputs.erase(it, inputs.end());
|
||||
}
|
||||
|
||||
void ActionsDAG::addAliases(const NamesWithAliases & aliases, std::vector<Node *> & result_nodes)
|
||||
@ -442,6 +469,9 @@ ActionsDAGPtr ActionsDAG::clone() const
|
||||
for (const auto & node : index)
|
||||
actions->index.insert(copy_map[node]);
|
||||
|
||||
for (const auto & node : inputs)
|
||||
actions->inputs.push_back(copy_map[node]);
|
||||
|
||||
return actions;
|
||||
}
|
||||
|
||||
@ -531,6 +561,115 @@ bool ActionsDAG::empty() const
|
||||
return true;
|
||||
}
|
||||
|
||||
ActionsDAGPtr ActionsDAG::makeConvertingActions(
|
||||
const ColumnsWithTypeAndName & source,
|
||||
const ColumnsWithTypeAndName & result,
|
||||
MatchColumnsMode mode,
|
||||
bool ignore_constant_values)
|
||||
{
|
||||
size_t num_input_columns = source.size();
|
||||
size_t num_result_columns = result.size();
|
||||
|
||||
if (mode == MatchColumnsMode::Position && num_input_columns != num_result_columns)
|
||||
throw Exception("Number of columns doesn't match", ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH);
|
||||
|
||||
auto actions_dag = std::make_shared<ActionsDAG>(source);
|
||||
std::vector<Node *> projection(num_result_columns);
|
||||
|
||||
FunctionOverloadResolverPtr func_builder_materialize =
|
||||
std::make_shared<FunctionOverloadResolverAdaptor>(
|
||||
std::make_unique<DefaultOverloadResolver>(
|
||||
std::make_shared<FunctionMaterialize>()));
|
||||
|
||||
std::map<std::string_view, std::list<size_t>> inputs;
|
||||
if (mode == MatchColumnsMode::Name)
|
||||
{
|
||||
for (size_t pos = 0; pos < actions_dag->inputs.size(); ++pos)
|
||||
inputs[actions_dag->inputs[pos]->result_name].push_back(pos);
|
||||
}
|
||||
|
||||
for (size_t result_col_num = 0; result_col_num < num_result_columns; ++result_col_num)
|
||||
{
|
||||
const auto & res_elem = result[result_col_num];
|
||||
Node * src_node = nullptr;
|
||||
|
||||
switch (mode)
|
||||
{
|
||||
case MatchColumnsMode::Position:
|
||||
{
|
||||
src_node = actions_dag->inputs[result_col_num];
|
||||
break;
|
||||
}
|
||||
|
||||
case MatchColumnsMode::Name:
|
||||
{
|
||||
auto & input = inputs[res_elem.name];
|
||||
if (input.empty())
|
||||
throw Exception("Cannot find column " + backQuoteIfNeed(res_elem.name) + " in source stream",
|
||||
ErrorCodes::THERE_IS_NO_COLUMN);
|
||||
|
||||
src_node = actions_dag->inputs[input.front()];
|
||||
input.pop_front();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/// Check constants.
|
||||
if (const auto * res_const = typeid_cast<const ColumnConst *>(res_elem.column.get()))
|
||||
{
|
||||
if (const auto * src_const = typeid_cast<const ColumnConst *>(src_node->column.get()))
|
||||
{
|
||||
if (ignore_constant_values)
|
||||
src_node = const_cast<Node *>(&actions_dag->addColumn(res_elem, true));
|
||||
else if (res_const->getField() != src_const->getField())
|
||||
throw Exception("Cannot convert column " + backQuoteIfNeed(res_elem.name) + " because "
|
||||
"it is constant but values of constants are different in source and result",
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
else
|
||||
throw Exception("Cannot convert column " + backQuoteIfNeed(res_elem.name) + " because "
|
||||
"it is non constant in source stream but must be constant in result",
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
|
||||
/// Add CAST function to convert into result type if needed.
|
||||
if (!res_elem.type->equals(*src_node->result_type))
|
||||
{
|
||||
ColumnWithTypeAndName column;
|
||||
column.name = res_elem.type->getName();
|
||||
column.column = DataTypeString().createColumnConst(0, column.name);
|
||||
column.type = std::make_shared<DataTypeString>();
|
||||
|
||||
auto * right_arg = const_cast<Node *>(&actions_dag->addColumn(std::move(column), true));
|
||||
auto * left_arg = src_node;
|
||||
|
||||
CastOverloadResolver::Diagnostic diagnostic = {src_node->result_name, res_elem.name};
|
||||
FunctionOverloadResolverPtr func_builder_cast =
|
||||
std::make_shared<FunctionOverloadResolverAdaptor>(
|
||||
CastOverloadResolver::createImpl(false, std::move(diagnostic)));
|
||||
|
||||
Inputs children = { left_arg, right_arg };
|
||||
src_node = &actions_dag->addFunction(func_builder_cast, std::move(children), {}, true);
|
||||
}
|
||||
|
||||
if (src_node->column && isColumnConst(*src_node->column) && !(res_elem.column && isColumnConst(*res_elem.column)))
|
||||
{
|
||||
Inputs children = {src_node};
|
||||
src_node = &actions_dag->addFunction(func_builder_materialize, std::move(children), {}, true);
|
||||
}
|
||||
|
||||
if (src_node->result_name != res_elem.name)
|
||||
src_node = &actions_dag->addAlias(*src_node, res_elem.name, true);
|
||||
|
||||
projection[result_col_num] = src_node;
|
||||
}
|
||||
|
||||
actions_dag->removeUnusedActions(projection);
|
||||
actions_dag->projectInput();
|
||||
|
||||
return actions_dag;
|
||||
}
|
||||
|
||||
ActionsDAGPtr ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & array_joined_columns)
|
||||
{
|
||||
/// Split DAG into two parts.
|
||||
@ -540,6 +679,7 @@ ActionsDAGPtr ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & array_join
|
||||
std::list<Node> split_nodes;
|
||||
Index this_index;
|
||||
Index split_index;
|
||||
Inputs new_inputs;
|
||||
|
||||
struct Frame
|
||||
{
|
||||
@ -628,6 +768,9 @@ ActionsDAGPtr ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & array_join
|
||||
input_node.result_name = child->result_name; // getUniqueNameForIndex(index, child->result_name);
|
||||
child_data.to_this = &this_nodes.emplace_back(std::move(input_node));
|
||||
|
||||
if (child->type != ActionType::INPUT)
|
||||
new_inputs.push_back(child_data.to_this);
|
||||
|
||||
/// This node is needed for current action, so put it to index also.
|
||||
split_index.replace(child_data.to_split);
|
||||
}
|
||||
@ -658,6 +801,9 @@ ActionsDAGPtr ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & array_join
|
||||
input_node.result_type = node.result_type;
|
||||
input_node.result_name = node.result_name;
|
||||
cur_data.to_this = &this_nodes.emplace_back(std::move(input_node));
|
||||
|
||||
if (copy.type != ActionType::INPUT)
|
||||
new_inputs.push_back(cur_data.to_this);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -676,12 +822,28 @@ ActionsDAGPtr ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & array_join
|
||||
if (split_actions_are_empty)
|
||||
return {};
|
||||
|
||||
Inputs this_inputs;
|
||||
Inputs split_inputs;
|
||||
|
||||
for (auto * input : inputs)
|
||||
{
|
||||
const auto & cur = data[input];
|
||||
if (cur.to_this)
|
||||
this_inputs.push_back(cur.to_this);
|
||||
if (cur.to_split)
|
||||
split_inputs.push_back(cur.to_split);
|
||||
}
|
||||
|
||||
this_inputs.insert(this_inputs.end(), new_inputs.begin(), new_inputs.end());
|
||||
|
||||
index.swap(this_index);
|
||||
nodes.swap(this_nodes);
|
||||
inputs.swap(this_inputs);
|
||||
|
||||
auto split_actions = cloneEmpty();
|
||||
split_actions->nodes.swap(split_nodes);
|
||||
split_actions->index.swap(split_index);
|
||||
split_actions->inputs.swap(split_inputs);
|
||||
split_actions->settings.project_input = false;
|
||||
|
||||
return split_actions;
|
||||
|
@ -151,6 +151,7 @@ public:
|
||||
};
|
||||
|
||||
using Nodes = std::list<Node>;
|
||||
using Inputs = std::vector<Node *>;
|
||||
|
||||
struct ActionsSettings
|
||||
{
|
||||
@ -165,6 +166,7 @@ public:
|
||||
private:
|
||||
Nodes nodes;
|
||||
Index index;
|
||||
Inputs inputs;
|
||||
|
||||
ActionsSettings settings;
|
||||
|
||||
@ -176,11 +178,12 @@ public:
|
||||
ActionsDAG() = default;
|
||||
ActionsDAG(const ActionsDAG &) = delete;
|
||||
ActionsDAG & operator=(const ActionsDAG &) = delete;
|
||||
explicit ActionsDAG(const NamesAndTypesList & inputs);
|
||||
explicit ActionsDAG(const ColumnsWithTypeAndName & inputs);
|
||||
explicit ActionsDAG(const NamesAndTypesList & inputs_);
|
||||
explicit ActionsDAG(const ColumnsWithTypeAndName & inputs_);
|
||||
|
||||
const Nodes & getNodes() const { return nodes; }
|
||||
const Index & getIndex() const { return index; }
|
||||
const Inputs & getInputs() const { return inputs; }
|
||||
|
||||
NamesAndTypesList getRequiredColumns() const;
|
||||
ColumnsWithTypeAndName getResultColumns() const;
|
||||
@ -190,9 +193,9 @@ public:
|
||||
std::string dumpNames() const;
|
||||
std::string dumpDAG() const;
|
||||
|
||||
const Node & addInput(std::string name, DataTypePtr type);
|
||||
const Node & addInput(ColumnWithTypeAndName column);
|
||||
const Node & addColumn(ColumnWithTypeAndName column);
|
||||
const Node & addInput(std::string name, DataTypePtr type, bool can_replace = false);
|
||||
const Node & addInput(ColumnWithTypeAndName column, bool can_replace = false);
|
||||
const Node & addColumn(ColumnWithTypeAndName column, bool can_replace = false);
|
||||
const Node & addAlias(const std::string & name, std::string alias, bool can_replace = false);
|
||||
const Node & addArrayJoin(const std::string & source_name, std::string result_name);
|
||||
const Node & addFunction(
|
||||
@ -227,10 +230,35 @@ public:
|
||||
|
||||
ActionsDAGPtr clone() const;
|
||||
|
||||
|
||||
enum class MatchColumnsMode
|
||||
{
|
||||
/// Require same number of columns in source and result. Match columns by corresponding positions, regardless to names.
|
||||
Position,
|
||||
/// Find columns in source by their names. Allow excessive columns in source.
|
||||
Name,
|
||||
};
|
||||
|
||||
/// Create ActionsDAG which converts block structure from source to result.
|
||||
/// It is needed to convert result from different sources to the same structure, e.g. for UNION query.
|
||||
/// Conversion should be possible with only usage of CAST function and renames.
|
||||
static ActionsDAGPtr makeConvertingActions(
|
||||
const ColumnsWithTypeAndName & source,
|
||||
const ColumnsWithTypeAndName & result,
|
||||
MatchColumnsMode mode,
|
||||
bool ignore_constant_values = false); /// Do not check that constants are same. Use value from result_header.
|
||||
|
||||
private:
|
||||
Node & addNode(Node node, bool can_replace = false);
|
||||
Node & getNode(const std::string & name);
|
||||
|
||||
Node & addAlias(Node & child, std::string alias, bool can_replace);
|
||||
Node & addFunction(
|
||||
const FunctionOverloadResolverPtr & function,
|
||||
Inputs children,
|
||||
std::string result_name,
|
||||
bool can_replace);
|
||||
|
||||
ActionsDAGPtr cloneEmpty() const
|
||||
{
|
||||
auto actions = std::make_shared<ActionsDAG>();
|
||||
|
@ -10,11 +10,10 @@
|
||||
|
||||
#include <common/logger_useful.h>
|
||||
#include <Processors/Pipe.h>
|
||||
#include <Processors/Transforms/ConvertingTransform.h>
|
||||
#include <Processors/Sources/RemoteSource.h>
|
||||
#include <Processors/Sources/DelayedSource.h>
|
||||
#include <Processors/QueryPlan/QueryPlan.h>
|
||||
#include <Processors/QueryPlan/ConvertingStep.h>
|
||||
#include <Processors/QueryPlan/ExpressionStep.h>
|
||||
|
||||
|
||||
namespace ProfileEvents
|
||||
@ -87,7 +86,13 @@ std::unique_ptr<QueryPlan> createLocalPlan(
|
||||
/// Convert header structure to expected.
|
||||
/// Also we ignore constants from result and replace it with constants from header.
|
||||
/// It is needed for functions like `now64()` or `randConstant()` because their values may be different.
|
||||
auto converting = std::make_unique<ConvertingStep>(query_plan->getCurrentDataStream(), header, true);
|
||||
auto convert_actions_dag = ActionsDAG::makeConvertingActions(
|
||||
query_plan->getCurrentDataStream().header.getColumnsWithTypeAndName(),
|
||||
header.getColumnsWithTypeAndName(),
|
||||
ActionsDAG::MatchColumnsMode::Name,
|
||||
true);
|
||||
|
||||
auto converting = std::make_unique<ExpressionStep>(query_plan->getCurrentDataStream(), convert_actions_dag);
|
||||
converting->setStepDescription("Convert block structure for query from local replica");
|
||||
query_plan->addStep(std::move(converting));
|
||||
|
||||
|
@ -336,9 +336,9 @@ struct ContextShared
|
||||
ReplicatedFetchList replicated_fetch_list;
|
||||
ConfigurationPtr users_config; /// Config with the users, profiles and quotas sections.
|
||||
InterserverIOHandler interserver_io_handler; /// Handler for interserver communication.
|
||||
std::optional<BackgroundSchedulePool> buffer_flush_schedule_pool; /// A thread pool that can do background flush for Buffer tables.
|
||||
std::optional<BackgroundSchedulePool> schedule_pool; /// A thread pool that can run different jobs in background (used in replicated tables)
|
||||
std::optional<BackgroundSchedulePool> distributed_schedule_pool; /// A thread pool that can run different jobs in background (used for distributed sends)
|
||||
mutable std::optional<BackgroundSchedulePool> buffer_flush_schedule_pool; /// A thread pool that can do background flush for Buffer tables.
|
||||
mutable std::optional<BackgroundSchedulePool> schedule_pool; /// A thread pool that can run different jobs in background (used in replicated tables)
|
||||
mutable std::optional<BackgroundSchedulePool> distributed_schedule_pool; /// A thread pool that can run different jobs in background (used for distributed sends)
|
||||
MultiVersion<Macros> macros; /// Substitutions extracted from config.
|
||||
std::unique_ptr<DDLWorker> ddl_worker; /// Process ddl commands from zk.
|
||||
/// Rules for selecting the compression settings, depending on the size of the part.
|
||||
@ -484,7 +484,7 @@ Context Context::createGlobal(ContextShared * shared)
|
||||
|
||||
void Context::initGlobal()
|
||||
{
|
||||
DatabaseCatalog::init(this);
|
||||
DatabaseCatalog::init(*this);
|
||||
TemporaryLiveViewCleaner::init(*this);
|
||||
}
|
||||
|
||||
@ -1401,7 +1401,7 @@ void Context::dropCaches() const
|
||||
shared->mark_cache->reset();
|
||||
}
|
||||
|
||||
BackgroundSchedulePool & Context::getBufferFlushSchedulePool()
|
||||
BackgroundSchedulePool & Context::getBufferFlushSchedulePool() const
|
||||
{
|
||||
auto lock = getLock();
|
||||
if (!shared->buffer_flush_schedule_pool)
|
||||
@ -1443,7 +1443,7 @@ BackgroundTaskSchedulingSettings Context::getBackgroundMoveTaskSchedulingSetting
|
||||
return task_settings;
|
||||
}
|
||||
|
||||
BackgroundSchedulePool & Context::getSchedulePool()
|
||||
BackgroundSchedulePool & Context::getSchedulePool() const
|
||||
{
|
||||
auto lock = getLock();
|
||||
if (!shared->schedule_pool)
|
||||
@ -1454,7 +1454,7 @@ BackgroundSchedulePool & Context::getSchedulePool()
|
||||
return *shared->schedule_pool;
|
||||
}
|
||||
|
||||
BackgroundSchedulePool & Context::getDistributedSchedulePool()
|
||||
BackgroundSchedulePool & Context::getDistributedSchedulePool() const
|
||||
{
|
||||
auto lock = getLock();
|
||||
if (!shared->distributed_schedule_pool)
|
||||
|
@ -519,9 +519,9 @@ public:
|
||||
BackgroundTaskSchedulingSettings getBackgroundProcessingTaskSchedulingSettings() const;
|
||||
BackgroundTaskSchedulingSettings getBackgroundMoveTaskSchedulingSettings() const;
|
||||
|
||||
BackgroundSchedulePool & getBufferFlushSchedulePool();
|
||||
BackgroundSchedulePool & getSchedulePool();
|
||||
BackgroundSchedulePool & getDistributedSchedulePool();
|
||||
BackgroundSchedulePool & getBufferFlushSchedulePool() const;
|
||||
BackgroundSchedulePool & getSchedulePool() const;
|
||||
BackgroundSchedulePool & getDistributedSchedulePool() const;
|
||||
|
||||
/// Has distributed_ddl configuration or not.
|
||||
bool hasDistributedDDL() const;
|
||||
|
@ -39,7 +39,7 @@ namespace ErrorCodes
|
||||
|
||||
TemporaryTableHolder::TemporaryTableHolder(const Context & context_,
|
||||
const TemporaryTableHolder::Creator & creator, const ASTPtr & query)
|
||||
: global_context(&context_.getGlobalContext())
|
||||
: global_context(context_.getGlobalContext())
|
||||
, temporary_tables(DatabaseCatalog::instance().getDatabaseForTemporaryTables().get())
|
||||
{
|
||||
ASTPtr original_create;
|
||||
@ -62,7 +62,7 @@ TemporaryTableHolder::TemporaryTableHolder(const Context & context_,
|
||||
}
|
||||
auto table_id = StorageID(DatabaseCatalog::TEMPORARY_DATABASE, global_name, id);
|
||||
auto table = creator(table_id);
|
||||
temporary_tables->createTable(*global_context, global_name, table, original_create);
|
||||
temporary_tables->createTable(global_context, global_name, table, original_create);
|
||||
table->startup();
|
||||
}
|
||||
|
||||
@ -107,7 +107,7 @@ TemporaryTableHolder & TemporaryTableHolder::operator = (TemporaryTableHolder &&
|
||||
TemporaryTableHolder::~TemporaryTableHolder()
|
||||
{
|
||||
if (id != UUIDHelpers::Nil)
|
||||
temporary_tables->dropTable(*global_context, "_tmp_" + toString(id));
|
||||
temporary_tables->dropTable(global_context, "_tmp_" + toString(id));
|
||||
}
|
||||
|
||||
StorageID TemporaryTableHolder::getGlobalTableID() const
|
||||
@ -117,7 +117,7 @@ StorageID TemporaryTableHolder::getGlobalTableID() const
|
||||
|
||||
StoragePtr TemporaryTableHolder::getTable() const
|
||||
{
|
||||
auto table = temporary_tables->tryGetTable("_tmp_" + toString(id), *global_context);
|
||||
auto table = temporary_tables->tryGetTable("_tmp_" + toString(id), global_context);
|
||||
if (!table)
|
||||
throw Exception("Temporary table " + getGlobalTableID().getNameForLogs() + " not found", ErrorCodes::LOGICAL_ERROR);
|
||||
return table;
|
||||
@ -126,13 +126,13 @@ StoragePtr TemporaryTableHolder::getTable() const
|
||||
|
||||
void DatabaseCatalog::loadDatabases()
|
||||
{
|
||||
drop_delay_sec = global_context->getConfigRef().getInt("database_atomic_delay_before_drop_table_sec", default_drop_delay_sec);
|
||||
drop_delay_sec = global_context.getConfigRef().getInt("database_atomic_delay_before_drop_table_sec", default_drop_delay_sec);
|
||||
|
||||
auto db_for_temporary_and_external_tables = std::make_shared<DatabaseMemory>(TEMPORARY_DATABASE, *global_context);
|
||||
auto db_for_temporary_and_external_tables = std::make_shared<DatabaseMemory>(TEMPORARY_DATABASE, global_context);
|
||||
attachDatabase(TEMPORARY_DATABASE, db_for_temporary_and_external_tables);
|
||||
|
||||
loadMarkedAsDroppedTables();
|
||||
auto task_holder = global_context->getSchedulePool().createTask("DatabaseCatalog", [this](){ this->dropTableDataTask(); });
|
||||
auto task_holder = global_context.getSchedulePool().createTask("DatabaseCatalog", [this](){ this->dropTableDataTask(); });
|
||||
drop_task = std::make_unique<BackgroundSchedulePoolTaskHolder>(std::move(task_holder));
|
||||
(*drop_task)->activate();
|
||||
std::lock_guard lock{tables_marked_dropped_mutex};
|
||||
@ -328,11 +328,11 @@ DatabasePtr DatabaseCatalog::detachDatabase(const String & database_name, bool d
|
||||
if (drop)
|
||||
{
|
||||
/// Delete the database.
|
||||
db->drop(*global_context);
|
||||
db->drop(global_context);
|
||||
|
||||
/// Old ClickHouse versions did not store database.sql files
|
||||
Poco::File database_metadata_file(
|
||||
global_context->getPath() + "metadata/" + escapeForFileName(database_name) + ".sql");
|
||||
global_context.getPath() + "metadata/" + escapeForFileName(database_name) + ".sql");
|
||||
if (database_metadata_file.exists())
|
||||
database_metadata_file.remove(false);
|
||||
}
|
||||
@ -505,14 +505,12 @@ void DatabaseCatalog::updateUUIDMapping(const UUID & uuid, DatabasePtr database,
|
||||
|
||||
std::unique_ptr<DatabaseCatalog> DatabaseCatalog::database_catalog;
|
||||
|
||||
DatabaseCatalog::DatabaseCatalog(Context * global_context_)
|
||||
DatabaseCatalog::DatabaseCatalog(Context & global_context_)
|
||||
: global_context(global_context_), log(&Poco::Logger::get("DatabaseCatalog"))
|
||||
{
|
||||
if (!global_context)
|
||||
throw Exception("DatabaseCatalog is not initialized. It's a bug.", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
DatabaseCatalog & DatabaseCatalog::init(Context * global_context_)
|
||||
DatabaseCatalog & DatabaseCatalog::init(Context & global_context_)
|
||||
{
|
||||
if (database_catalog)
|
||||
{
|
||||
@ -651,7 +649,7 @@ void DatabaseCatalog::loadMarkedAsDroppedTables()
|
||||
/// we should load them and enqueue cleanup to remove data from store/ and metadata from ZooKeeper
|
||||
|
||||
std::map<String, StorageID> dropped_metadata;
|
||||
String path = global_context->getPath() + "metadata_dropped/";
|
||||
String path = global_context.getPath() + "metadata_dropped/";
|
||||
|
||||
if (!std::filesystem::exists(path))
|
||||
{
|
||||
@ -706,7 +704,7 @@ void DatabaseCatalog::loadMarkedAsDroppedTables()
|
||||
|
||||
String DatabaseCatalog::getPathForDroppedMetadata(const StorageID & table_id) const
|
||||
{
|
||||
return global_context->getPath() + "metadata_dropped/" +
|
||||
return global_context.getPath() + "metadata_dropped/" +
|
||||
escapeForFileName(table_id.getDatabaseName()) + "." +
|
||||
escapeForFileName(table_id.getTableName()) + "." +
|
||||
toString(table_id.uuid) + ".sql";
|
||||
@ -729,7 +727,7 @@ void DatabaseCatalog::enqueueDroppedTableCleanup(StorageID table_id, StoragePtr
|
||||
{
|
||||
/// Try load table from metadata to drop it correctly (e.g. remove metadata from zk or remove data from all volumes)
|
||||
LOG_INFO(log, "Trying load partially dropped table {} from {}", table_id.getNameForLogs(), dropped_metadata_path);
|
||||
ASTPtr ast = DatabaseOnDisk::parseQueryFromMetadata(log, *global_context, dropped_metadata_path, /*throw_on_error*/ false, /*remove_empty*/false);
|
||||
ASTPtr ast = DatabaseOnDisk::parseQueryFromMetadata(log, global_context, dropped_metadata_path, /*throw_on_error*/ false, /*remove_empty*/false);
|
||||
auto * create = typeid_cast<ASTCreateQuery *>(ast.get());
|
||||
assert(!create || create->uuid == table_id.uuid);
|
||||
|
||||
@ -740,7 +738,7 @@ void DatabaseCatalog::enqueueDroppedTableCleanup(StorageID table_id, StoragePtr
|
||||
create->table = table_id.table_name;
|
||||
try
|
||||
{
|
||||
table = createTableFromAST(*create, table_id.getDatabaseName(), data_path, *global_context, false).second;
|
||||
table = createTableFromAST(*create, table_id.getDatabaseName(), data_path, global_context, false).second;
|
||||
table->is_dropped = true;
|
||||
}
|
||||
catch (...)
|
||||
@ -867,7 +865,7 @@ void DatabaseCatalog::dropTableFinally(const TableMarkedAsDropped & table)
|
||||
|
||||
/// Even if table is not loaded, try remove its data from disk.
|
||||
/// TODO remove data from all volumes
|
||||
String data_path = global_context->getPath() + "store/" + getPathForUUID(table.table_id.uuid);
|
||||
String data_path = global_context.getPath() + "store/" + getPathForUUID(table.table_id.uuid);
|
||||
Poco::File table_data_dir{data_path};
|
||||
if (table_data_dir.exists())
|
||||
{
|
||||
@ -901,7 +899,7 @@ String DatabaseCatalog::resolveDictionaryName(const String & name) const
|
||||
String maybe_database_name = name.substr(0, pos);
|
||||
String maybe_table_name = name.substr(pos + 1);
|
||||
|
||||
auto db_and_table = tryGetDatabaseAndTable({maybe_database_name, maybe_table_name}, *global_context);
|
||||
auto db_and_table = tryGetDatabaseAndTable({maybe_database_name, maybe_table_name}, global_context);
|
||||
if (!db_and_table.first)
|
||||
return name;
|
||||
assert(db_and_table.second);
|
||||
|
@ -73,7 +73,6 @@ struct TemporaryTableHolder : boost::noncopyable
|
||||
{
|
||||
typedef std::function<StoragePtr(const StorageID &)> Creator;
|
||||
|
||||
TemporaryTableHolder() = default;
|
||||
TemporaryTableHolder(const Context & context, const Creator & creator, const ASTPtr & query = {});
|
||||
|
||||
/// Creates temporary table with Engine=Memory
|
||||
@ -95,7 +94,7 @@ struct TemporaryTableHolder : boost::noncopyable
|
||||
|
||||
operator bool () const { return id != UUIDHelpers::Nil; }
|
||||
|
||||
const Context * global_context = nullptr;
|
||||
const Context & global_context;
|
||||
IDatabase * temporary_tables = nullptr;
|
||||
UUID id = UUIDHelpers::Nil;
|
||||
};
|
||||
@ -111,7 +110,7 @@ public:
|
||||
static constexpr const char * TEMPORARY_DATABASE = "_temporary_and_external_tables";
|
||||
static constexpr const char * SYSTEM_DATABASE = "system";
|
||||
|
||||
static DatabaseCatalog & init(Context * global_context_);
|
||||
static DatabaseCatalog & init(Context & global_context_);
|
||||
static DatabaseCatalog & instance();
|
||||
static void shutdown();
|
||||
|
||||
@ -199,7 +198,7 @@ private:
|
||||
// make emplace(global_context_) compile with private constructor ¯\_(ツ)_/¯.
|
||||
static std::unique_ptr<DatabaseCatalog> database_catalog;
|
||||
|
||||
DatabaseCatalog(Context * global_context_);
|
||||
DatabaseCatalog(Context & global_context_);
|
||||
void assertDatabaseExistsUnlocked(const String & database_name) const;
|
||||
void assertDatabaseDoesntExistUnlocked(const String & database_name) const;
|
||||
|
||||
@ -240,7 +239,7 @@ private:
|
||||
using UUIDToDatabaseMap = std::unordered_map<UUID, DatabasePtr>;
|
||||
|
||||
/// For some reason Context is required to get Storage from Database object
|
||||
Context * global_context;
|
||||
Context & global_context;
|
||||
mutable std::mutex databases_mutex;
|
||||
|
||||
ViewDependencies view_dependencies;
|
||||
|
@ -83,6 +83,7 @@ void ExpressionActions::linearizeActions()
|
||||
|
||||
const auto & nodes = getNodes();
|
||||
const auto & index = actions_dag->getIndex();
|
||||
const auto & inputs = actions_dag->getInputs();
|
||||
|
||||
std::vector<Data> data(nodes.size());
|
||||
std::unordered_map<const Node *, size_t> reverse_index;
|
||||
@ -163,11 +164,10 @@ void ExpressionActions::linearizeActions()
|
||||
{
|
||||
/// Argument for input is special. It contains the position from required columns.
|
||||
ExpressionActions::Argument argument;
|
||||
argument.pos = required_columns.size();
|
||||
argument.needed_later = !cur.parents.empty();
|
||||
arguments.emplace_back(argument);
|
||||
|
||||
required_columns.push_back({node->result_name, node->result_type});
|
||||
//required_columns.push_back({node->result_name, node->result_type});
|
||||
}
|
||||
|
||||
actions.push_back({node, arguments, free_position});
|
||||
@ -199,6 +199,15 @@ void ExpressionActions::linearizeActions()
|
||||
ColumnWithTypeAndName col{node->column, node->result_type, node->result_name};
|
||||
sample_block.insert(std::move(col));
|
||||
}
|
||||
|
||||
for (const auto * input : inputs)
|
||||
{
|
||||
const auto & cur = data[reverse_index[input]];
|
||||
auto pos = required_columns.size();
|
||||
actions[cur.position].arguments.front().pos = pos;
|
||||
required_columns.push_back({input->result_name, input->result_type});
|
||||
input_positions[input->result_name].emplace_back(pos);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -412,14 +421,23 @@ void ExpressionActions::execute(Block & block, size_t & num_rows, bool dry_run)
|
||||
.num_rows = num_rows,
|
||||
};
|
||||
|
||||
execution_context.inputs_pos.reserve(required_columns.size());
|
||||
execution_context.inputs_pos.assign(required_columns.size(), -1);
|
||||
|
||||
for (const auto & column : required_columns)
|
||||
for (size_t pos = 0; pos < block.columns(); ++pos)
|
||||
{
|
||||
ssize_t pos = -1;
|
||||
if (block.has(column.name))
|
||||
pos = block.getPositionByName(column.name);
|
||||
execution_context.inputs_pos.push_back(pos);
|
||||
const auto & col = block.getByPosition(pos);
|
||||
auto it = input_positions.find(col.name);
|
||||
if (it != input_positions.end())
|
||||
{
|
||||
for (auto input_pos : it->second)
|
||||
{
|
||||
if (execution_context.inputs_pos[input_pos] < 0)
|
||||
{
|
||||
execution_context.inputs_pos[input_pos] = pos;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
execution_context.columns.resize(num_columns);
|
||||
|
@ -44,10 +44,10 @@ public:
|
||||
struct Argument
|
||||
{
|
||||
/// Position in ExecutionContext::columns
|
||||
size_t pos;
|
||||
size_t pos = 0;
|
||||
/// True if there is another action which will use this column.
|
||||
/// Otherwise column will be removed.
|
||||
bool needed_later;
|
||||
bool needed_later = false;
|
||||
};
|
||||
|
||||
using Arguments = std::vector<Argument>;
|
||||
@ -63,6 +63,11 @@ public:
|
||||
|
||||
using Actions = std::vector<Action>;
|
||||
|
||||
/// This map helps to find input position bu it's name.
|
||||
/// Key is a view to input::result_name.
|
||||
/// Result is a list because it is allowed for inputs to have same names.
|
||||
using NameToInputMap = std::unordered_map<std::string_view, std::list<size_t>>;
|
||||
|
||||
private:
|
||||
|
||||
ActionsDAGPtr actions_dag;
|
||||
@ -70,6 +75,7 @@ private:
|
||||
size_t num_columns = 0;
|
||||
|
||||
NamesAndTypesList required_columns;
|
||||
NameToInputMap input_positions;
|
||||
ColumnNumbers result_positions;
|
||||
Block sample_block;
|
||||
|
||||
|
@ -42,8 +42,8 @@ namespace
|
||||
}
|
||||
|
||||
|
||||
ExternalLoaderDatabaseConfigRepository::ExternalLoaderDatabaseConfigRepository(IDatabase & database_, const Context & global_context_)
|
||||
: global_context(global_context_.getGlobalContext())
|
||||
ExternalLoaderDatabaseConfigRepository::ExternalLoaderDatabaseConfigRepository(IDatabase & database_, const Context & context_)
|
||||
: global_context(context_.getGlobalContext())
|
||||
, database_name(database_.getDatabaseName())
|
||||
, database(database_)
|
||||
{
|
||||
|
@ -26,7 +26,7 @@
|
||||
#include <Processors/NullSink.h>
|
||||
#include <Processors/Sources/SinkToOutputStream.h>
|
||||
#include <Processors/Sources/SourceFromInputStream.h>
|
||||
#include <Processors/Transforms/ConvertingTransform.h>
|
||||
#include <Processors/Transforms/ExpressionTransform.h>
|
||||
#include <Storages/StorageDistributed.h>
|
||||
#include <TableFunctions/TableFunctionFactory.h>
|
||||
#include <Common/checkStackSize.h>
|
||||
@ -378,11 +378,15 @@ BlockIO InterpreterInsertQuery::execute()
|
||||
else if (query.select || query.watch)
|
||||
{
|
||||
const auto & header = out_streams.at(0)->getHeader();
|
||||
auto actions_dag = ActionsDAG::makeConvertingActions(
|
||||
res.pipeline.getHeader().getColumnsWithTypeAndName(),
|
||||
header.getColumnsWithTypeAndName(),
|
||||
ActionsDAG::MatchColumnsMode::Position);
|
||||
auto actions = std::make_shared<ExpressionActions>(actions_dag);
|
||||
|
||||
res.pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
||||
{
|
||||
return std::make_shared<ConvertingTransform>(in_header, header,
|
||||
ConvertingTransform::MatchColumnsMode::Position);
|
||||
return std::make_shared<ExpressionTransform>(in_header, actions);
|
||||
});
|
||||
|
||||
res.pipeline.setSinks([&](const Block &, QueryPipeline::StreamType type) -> ProcessorPtr
|
||||
|
@ -78,7 +78,6 @@
|
||||
#include <ext/map.h>
|
||||
#include <ext/scope_guard.h>
|
||||
#include <memory>
|
||||
#include <Processors/QueryPlan/ConvertingStep.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -479,7 +478,13 @@ void InterpreterSelectQuery::buildQueryPlan(QueryPlan & query_plan)
|
||||
/// We must guarantee that result structure is the same as in getSampleBlock()
|
||||
if (!blocksHaveEqualStructure(query_plan.getCurrentDataStream().header, result_header))
|
||||
{
|
||||
auto converting = std::make_unique<ConvertingStep>(query_plan.getCurrentDataStream(), result_header, true);
|
||||
auto convert_actions_dag = ActionsDAG::makeConvertingActions(
|
||||
query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(),
|
||||
result_header.getColumnsWithTypeAndName(),
|
||||
ActionsDAG::MatchColumnsMode::Name,
|
||||
true);
|
||||
|
||||
auto converting = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), convert_actions_dag);
|
||||
query_plan.addStep(std::move(converting));
|
||||
}
|
||||
}
|
||||
|
@ -77,7 +77,7 @@ ASTs InterpreterShowAccessQuery::getCreateAndGrantQueries() const
|
||||
for (const auto & entity : entities)
|
||||
{
|
||||
create_queries.push_back(InterpreterShowCreateAccessEntityQuery::getCreateQuery(*entity, access_control));
|
||||
if (entity->isTypeOf(EntityType::USER) || entity->isTypeOf(EntityType::USER))
|
||||
if (entity->isTypeOf(EntityType::USER) || entity->isTypeOf(EntityType::ROLE))
|
||||
boost::range::push_back(grant_queries, InterpreterShowGrantsQuery::getGrantQueries(*entity, access_control));
|
||||
}
|
||||
|
||||
|
@ -225,7 +225,7 @@ void JSONCompactEachRowRowInputFormat::readField(size_t index, MutableColumns &
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
e.addMessage("(while read the value of key " + getPort().getHeader().getByPosition(index).name + ")");
|
||||
e.addMessage("(while reading the value of key " + getPort().getHeader().getByPosition(index).name + ")");
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
@ -163,7 +163,7 @@ void JSONEachRowRowInputFormat::readField(size_t index, MutableColumns & columns
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
e.addMessage("(while read the value of key " + columnName(index) + ")");
|
||||
e.addMessage("(while reading the value of key " + columnName(index) + ")");
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
@ -102,7 +102,7 @@ bool RegexpRowInputFormat::readField(size_t index, MutableColumns & columns)
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
e.addMessage("(while read the value of column " + getPort().getHeader().getByPosition(index).name + ")");
|
||||
e.addMessage("(while reading the value of column " + getPort().getHeader().getByPosition(index).name + ")");
|
||||
throw;
|
||||
}
|
||||
return read;
|
||||
|
@ -5,7 +5,7 @@
|
||||
#include <Processors/Transforms/TotalsHavingTransform.h>
|
||||
#include <Processors/Transforms/ExtremesTransform.h>
|
||||
#include <Processors/Transforms/CreatingSetsTransform.h>
|
||||
#include <Processors/Transforms/ConvertingTransform.h>
|
||||
#include <Processors/Transforms/ExpressionTransform.h>
|
||||
#include <Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h>
|
||||
#include <Processors/Formats/IOutputFormat.h>
|
||||
#include <Processors/Sources/SourceFromInputStream.h>
|
||||
@ -14,6 +14,7 @@
|
||||
#include <Processors/Sources/SourceFromSingleChunk.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/ExpressionActions.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Processors/DelayedPortsProcessor.h>
|
||||
@ -226,10 +227,15 @@ QueryPipeline QueryPipeline::unitePipelines(
|
||||
|
||||
if (!pipeline.isCompleted())
|
||||
{
|
||||
auto actions_dag = ActionsDAG::makeConvertingActions(
|
||||
pipeline.getHeader().getColumnsWithTypeAndName(),
|
||||
common_header.getColumnsWithTypeAndName(),
|
||||
ActionsDAG::MatchColumnsMode::Position);
|
||||
auto actions = std::make_shared<ExpressionActions>(actions_dag);
|
||||
|
||||
pipeline.addSimpleTransform([&](const Block & header)
|
||||
{
|
||||
return std::make_shared<ConvertingTransform>(
|
||||
header, common_header, ConvertingTransform::MatchColumnsMode::Position);
|
||||
return std::make_shared<ExpressionTransform>(header, actions);
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -1,8 +1,9 @@
|
||||
#include <Processors/QueryPlan/ArrayJoinStep.h>
|
||||
#include <Processors/Transforms/ArrayJoinTransform.h>
|
||||
#include <Processors/Transforms/ConvertingTransform.h>
|
||||
#include <Processors/Transforms/ExpressionTransform.h>
|
||||
#include <Processors/QueryPipeline.h>
|
||||
#include <Interpreters/ArrayJoinAction.h>
|
||||
#include <Interpreters/ExpressionActions.h>
|
||||
#include <IO/Operators.h>
|
||||
|
||||
namespace DB
|
||||
@ -55,9 +56,15 @@ void ArrayJoinStep::transformPipeline(QueryPipeline & pipeline)
|
||||
|
||||
if (res_header && !blocksHaveEqualStructure(res_header, output_stream->header))
|
||||
{
|
||||
auto actions_dag = ActionsDAG::makeConvertingActions(
|
||||
pipeline.getHeader().getColumnsWithTypeAndName(),
|
||||
res_header.getColumnsWithTypeAndName(),
|
||||
ActionsDAG::MatchColumnsMode::Name);
|
||||
auto actions = std::make_shared<ExpressionActions>(actions_dag);
|
||||
|
||||
pipeline.addSimpleTransform([&](const Block & header)
|
||||
{
|
||||
return std::make_shared<ConvertingTransform>(header, res_header, ConvertingTransform::MatchColumnsMode::Name);
|
||||
return std::make_shared<ExpressionTransform>(header, actions);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -1,77 +0,0 @@
|
||||
#include <Processors/QueryPlan/ConvertingStep.h>
|
||||
#include <Processors/QueryPipeline.h>
|
||||
#include <Processors/Transforms/ConvertingTransform.h>
|
||||
#include <IO/Operators.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
static ITransformingStep::Traits getTraits()
|
||||
{
|
||||
return ITransformingStep::Traits
|
||||
{
|
||||
{
|
||||
.preserves_distinct_columns = true,
|
||||
.returns_single_stream = false,
|
||||
.preserves_number_of_streams = true,
|
||||
.preserves_sorting = true,
|
||||
},
|
||||
{
|
||||
.preserves_number_of_rows = true,
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
ConvertingStep::ConvertingStep(const DataStream & input_stream_, Block result_header_, bool ignore_constant_values_)
|
||||
: ITransformingStep(input_stream_, result_header_, getTraits())
|
||||
, result_header(std::move(result_header_))
|
||||
, ignore_constant_values(ignore_constant_values_)
|
||||
{
|
||||
updateDistinctColumns(output_stream->header, output_stream->distinct_columns);
|
||||
}
|
||||
|
||||
void ConvertingStep::transformPipeline(QueryPipeline & pipeline)
|
||||
{
|
||||
pipeline.addSimpleTransform([&](const Block & header)
|
||||
{
|
||||
return std::make_shared<ConvertingTransform>(header, result_header, ConvertingTransform::MatchColumnsMode::Name, ignore_constant_values);
|
||||
});
|
||||
}
|
||||
|
||||
void ConvertingStep::describeActions(FormatSettings & settings) const
|
||||
{
|
||||
const auto & header = input_streams[0].header;
|
||||
auto conversion = ConvertingTransform(header, result_header, ConvertingTransform::MatchColumnsMode::Name, ignore_constant_values)
|
||||
.getConversion();
|
||||
|
||||
auto dump_description = [&](const ColumnWithTypeAndName & elem, bool is_const)
|
||||
{
|
||||
settings.out << elem.name << ' ' << elem.type->getName() << (is_const ? " Const" : "") << '\n';
|
||||
};
|
||||
|
||||
String prefix(settings.offset, ' ');
|
||||
|
||||
for (size_t i = 0; i < conversion.size(); ++i)
|
||||
{
|
||||
const auto & from = header.getByPosition(conversion[i]);
|
||||
const auto & to = result_header.getByPosition(i);
|
||||
|
||||
bool from_const = from.column && isColumnConst(*from.column);
|
||||
bool to_const = to.column && isColumnConst(*to.column);
|
||||
|
||||
settings.out << prefix;
|
||||
|
||||
if (from.name == to.name && from.type->equals(*to.type) && from_const == to_const)
|
||||
dump_description(from, from_const);
|
||||
else
|
||||
{
|
||||
dump_description(to, to_const);
|
||||
settings.out << " ← ";
|
||||
dump_description(from, from_const);
|
||||
}
|
||||
|
||||
settings.out << '\n';
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,25 +0,0 @@
|
||||
#pragma once
|
||||
#include <Processors/QueryPlan/ITransformingStep.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Convert one block structure to another. See ConvertingTransform.
|
||||
class ConvertingStep : public ITransformingStep
|
||||
{
|
||||
public:
|
||||
ConvertingStep(const DataStream & input_stream_, Block result_header_, bool ignore_constant_values_ = false);
|
||||
|
||||
String getName() const override { return "Converting"; }
|
||||
|
||||
void transformPipeline(QueryPipeline & pipeline) override;
|
||||
|
||||
void describeActions(FormatSettings & settings) const override;
|
||||
|
||||
private:
|
||||
Block result_header;
|
||||
/// Do not check that constants are same. Use value from result_header.
|
||||
bool ignore_constant_values;
|
||||
};
|
||||
|
||||
}
|
@ -1,7 +1,6 @@
|
||||
#include <Processors/QueryPlan/ExpressionStep.h>
|
||||
#include <Processors/Transforms/ExpressionTransform.h>
|
||||
#include <Processors/QueryPipeline.h>
|
||||
#include <Processors/Transforms/ConvertingTransform.h>
|
||||
#include <Processors/Transforms/JoiningTransform.h>
|
||||
#include <Interpreters/ExpressionActions.h>
|
||||
#include <IO/Operators.h>
|
||||
@ -75,10 +74,15 @@ void ExpressionStep::transformPipeline(QueryPipeline & pipeline)
|
||||
|
||||
if (!blocksHaveEqualStructure(pipeline.getHeader(), output_stream->header))
|
||||
{
|
||||
auto convert_actions_dag = ActionsDAG::makeConvertingActions(
|
||||
pipeline.getHeader().getColumnsWithTypeAndName(),
|
||||
output_stream->header.getColumnsWithTypeAndName(),
|
||||
ActionsDAG::MatchColumnsMode::Name);
|
||||
auto convert_actions = std::make_shared<ExpressionActions>(convert_actions_dag);
|
||||
|
||||
pipeline.addSimpleTransform([&](const Block & header)
|
||||
{
|
||||
return std::make_shared<ConvertingTransform>(header, output_stream->header,
|
||||
ConvertingTransform::MatchColumnsMode::Name);
|
||||
return std::make_shared<ExpressionTransform>(header, convert_actions);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
#include <Processors/QueryPlan/FilterStep.h>
|
||||
#include <Processors/Transforms/FilterTransform.h>
|
||||
#include <Processors/QueryPipeline.h>
|
||||
#include <Processors/Transforms/ConvertingTransform.h>
|
||||
#include <Processors/Transforms/ExpressionTransform.h>
|
||||
#include <Interpreters/ExpressionActions.h>
|
||||
#include <IO/Operators.h>
|
||||
|
||||
@ -67,9 +67,15 @@ void FilterStep::transformPipeline(QueryPipeline & pipeline)
|
||||
|
||||
if (!blocksHaveEqualStructure(pipeline.getHeader(), output_stream->header))
|
||||
{
|
||||
auto convert_actions_dag = ActionsDAG::makeConvertingActions(
|
||||
pipeline.getHeader().getColumnsWithTypeAndName(),
|
||||
output_stream->header.getColumnsWithTypeAndName(),
|
||||
ActionsDAG::MatchColumnsMode::Name);
|
||||
auto convert_actions = std::make_shared<ExpressionActions>(convert_actions_dag);
|
||||
|
||||
pipeline.addSimpleTransform([&](const Block & header)
|
||||
{
|
||||
return std::make_shared<ConvertingTransform>(header, output_stream->header, ConvertingTransform::MatchColumnsMode::Name);
|
||||
return std::make_shared<ExpressionTransform>(header, convert_actions);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -1,137 +0,0 @@
|
||||
#include <Processors/Transforms/ConvertingTransform.h>
|
||||
|
||||
#include <Interpreters/castColumn.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Common/quoteString.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int THERE_IS_NO_COLUMN;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
static ColumnPtr castColumnWithDiagnostic(
|
||||
const ColumnWithTypeAndName & src_elem,
|
||||
const ColumnWithTypeAndName & res_elem)
|
||||
{
|
||||
try
|
||||
{
|
||||
return castColumn(src_elem, res_elem.type);
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
e.addMessage("while converting source column " + backQuoteIfNeed(src_elem.name) +
|
||||
" to destination column " + backQuoteIfNeed(res_elem.name));
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
ConvertingTransform::ConvertingTransform(
|
||||
Block source_header_,
|
||||
Block result_header_,
|
||||
MatchColumnsMode mode_,
|
||||
bool ignore_constant_values_)
|
||||
: ISimpleTransform(std::move(source_header_), std::move(result_header_), false)
|
||||
, conversion(getOutputPort().getHeader().columns())
|
||||
, ignore_constant_values(ignore_constant_values_)
|
||||
{
|
||||
const auto & source = getInputPort().getHeader();
|
||||
const auto & result = getOutputPort().getHeader();
|
||||
|
||||
size_t num_input_columns = source.columns();
|
||||
size_t num_result_columns = result.columns();
|
||||
|
||||
if (mode_ == MatchColumnsMode::Position && num_input_columns != num_result_columns)
|
||||
throw Exception("Number of columns doesn't match", ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH);
|
||||
|
||||
for (size_t result_col_num = 0; result_col_num < num_result_columns; ++result_col_num)
|
||||
{
|
||||
const auto & res_elem = result.getByPosition(result_col_num);
|
||||
|
||||
switch (mode_)
|
||||
{
|
||||
case MatchColumnsMode::Position:
|
||||
conversion[result_col_num] = result_col_num;
|
||||
break;
|
||||
|
||||
case MatchColumnsMode::Name:
|
||||
/// It may seem strange, but sometimes block may have columns with the same name.
|
||||
/// For this specific case, try to get column from the same position if it has correct name first.
|
||||
if (result_col_num < source.columns() && source.getByPosition(result_col_num).name == res_elem.name)
|
||||
conversion[result_col_num] = result_col_num;
|
||||
else if (source.has(res_elem.name))
|
||||
conversion[result_col_num] = source.getPositionByName(res_elem.name);
|
||||
else
|
||||
throw Exception("Cannot find column " + backQuoteIfNeed(res_elem.name) + " in source stream",
|
||||
ErrorCodes::THERE_IS_NO_COLUMN);
|
||||
break;
|
||||
}
|
||||
|
||||
const auto & src_elem = source.getByPosition(conversion[result_col_num]);
|
||||
|
||||
/// Check constants.
|
||||
|
||||
if (const auto * res_const = typeid_cast<const ColumnConst *>(res_elem.column.get()))
|
||||
{
|
||||
if (const auto * src_const = typeid_cast<const ColumnConst *>(src_elem.column.get()))
|
||||
{
|
||||
if (!ignore_constant_values && res_const->getField() != src_const->getField())
|
||||
throw Exception("Cannot convert column " + backQuoteIfNeed(res_elem.name) + " because "
|
||||
"it is constant but values of constants are different in source and result",
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
else
|
||||
throw Exception("Cannot convert column " + backQuoteIfNeed(res_elem.name) + " because "
|
||||
"it is non constant in source stream but must be constant in result",
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
|
||||
/// Check conversion by dry run CAST function.
|
||||
|
||||
castColumnWithDiagnostic(src_elem, res_elem);
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertingTransform::transform(Chunk & chunk)
|
||||
{
|
||||
const auto & source = getInputPort().getHeader();
|
||||
const auto & result = getOutputPort().getHeader();
|
||||
|
||||
auto num_rows = chunk.getNumRows();
|
||||
auto src_columns = chunk.detachColumns();
|
||||
|
||||
size_t num_res_columns = conversion.size();
|
||||
|
||||
Columns res_columns;
|
||||
res_columns.reserve(num_res_columns);
|
||||
|
||||
for (size_t res_pos = 0; res_pos < num_res_columns; ++res_pos)
|
||||
{
|
||||
auto src_elem = source.getByPosition(conversion[res_pos]);
|
||||
src_elem.column = src_columns[conversion[res_pos]];
|
||||
auto res_elem = result.getByPosition(res_pos);
|
||||
|
||||
if (ignore_constant_values && isColumnConst(*res_elem.column))
|
||||
{
|
||||
res_columns.emplace_back(res_elem.column->cloneResized(num_rows));
|
||||
continue;
|
||||
}
|
||||
|
||||
ColumnPtr converted = castColumnWithDiagnostic(src_elem, res_elem);
|
||||
|
||||
if (!isColumnConst(*res_elem.column))
|
||||
converted = converted->convertToFullColumnIfConst();
|
||||
|
||||
res_columns.emplace_back(std::move(converted));
|
||||
}
|
||||
|
||||
chunk.setColumns(std::move(res_columns), num_rows);
|
||||
}
|
||||
|
||||
}
|
@ -1,57 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <Processors/ISimpleTransform.h>
|
||||
#include <Core/ColumnNumbers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** Convert one block structure to another:
|
||||
*
|
||||
* Leaves only necessary columns;
|
||||
*
|
||||
* Columns are searched in source first by name;
|
||||
* and if there is no column with same name, then by position.
|
||||
*
|
||||
* Converting types of matching columns (with CAST function).
|
||||
*
|
||||
* Materializing columns which are const in source and non-const in result,
|
||||
* throw if they are const in result and non const in source,
|
||||
* or if they are const and have different values.
|
||||
*/
|
||||
class ConvertingTransform : public ISimpleTransform
|
||||
{
|
||||
public:
|
||||
enum class MatchColumnsMode
|
||||
{
|
||||
/// Require same number of columns in source and result. Match columns by corresponding positions, regardless to names.
|
||||
Position,
|
||||
/// Find columns in source by their names. Allow excessive columns in source.
|
||||
Name,
|
||||
};
|
||||
|
||||
ConvertingTransform(
|
||||
Block source_header_,
|
||||
Block result_header_,
|
||||
MatchColumnsMode mode_,
|
||||
bool ignore_constant_values_ = false); /// Do not check that constants are same. Use value from result_header.
|
||||
|
||||
String getName() const override { return "Converting"; }
|
||||
|
||||
const ColumnNumbers & getConversion() const { return conversion; }
|
||||
|
||||
protected:
|
||||
void transform(Chunk & chunk) override;
|
||||
|
||||
private:
|
||||
/// How to construct result block. Position in source block, where to get each column.
|
||||
ColumnNumbers conversion;
|
||||
/// Do not check that constants are same. Use value from result_header.
|
||||
/// This is needed in case run functions which are constant in query scope,
|
||||
/// but may return different result being executed remotely, like `now64()` or `randConstant()`.
|
||||
/// In this case we replace constants from remote source to constatns from initiator.
|
||||
bool ignore_constant_values;
|
||||
};
|
||||
|
||||
}
|
@ -95,7 +95,6 @@ SRCS(
|
||||
QueryPlan/AddingMissedStep.cpp
|
||||
QueryPlan/AggregatingStep.cpp
|
||||
QueryPlan/ArrayJoinStep.cpp
|
||||
QueryPlan/ConvertingStep.cpp
|
||||
QueryPlan/CreatingSetsStep.cpp
|
||||
QueryPlan/CubeStep.cpp
|
||||
QueryPlan/DistinctStep.cpp
|
||||
@ -135,7 +134,6 @@ SRCS(
|
||||
Transforms/AggregatingInOrderTransform.cpp
|
||||
Transforms/AggregatingTransform.cpp
|
||||
Transforms/ArrayJoinTransform.cpp
|
||||
Transforms/ConvertingTransform.cpp
|
||||
Transforms/CopyTransform.cpp
|
||||
Transforms/CreatingSetsTransform.cpp
|
||||
Transforms/CubeTransform.cpp
|
||||
|
@ -328,6 +328,16 @@ void MySQLHandler::comQuery(ReadBuffer & payload)
|
||||
|
||||
Context query_context = connection_context;
|
||||
|
||||
std::atomic<size_t> affected_rows {0};
|
||||
auto prev = query_context.getProgressCallback();
|
||||
query_context.setProgressCallback([&, prev = prev](const Progress & progress)
|
||||
{
|
||||
if (prev)
|
||||
prev(progress);
|
||||
|
||||
affected_rows += progress.written_rows;
|
||||
});
|
||||
|
||||
executeQuery(should_replace ? replacement : payload, *out, true, query_context,
|
||||
[&with_output](const String &, const String &, const String &, const String &)
|
||||
{
|
||||
@ -336,7 +346,7 @@ void MySQLHandler::comQuery(ReadBuffer & payload)
|
||||
);
|
||||
|
||||
if (!with_output)
|
||||
packet_endpoint->sendPacket(OKPacket(0x00, client_capability_flags, 0, 0, 0), true);
|
||||
packet_endpoint->sendPacket(OKPacket(0x00, client_capability_flags, affected_rows, 0, 0), true);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -83,13 +83,13 @@ StorageDistributedDirectoryMonitor::StorageDistributedDirectoryMonitor(
|
||||
: storage(storage_)
|
||||
, pool(std::move(pool_))
|
||||
, path{path_ + '/'}
|
||||
, should_batch_inserts(storage.global_context->getSettingsRef().distributed_directory_monitor_batch_inserts)
|
||||
, min_batched_block_size_rows(storage.global_context->getSettingsRef().min_insert_block_size_rows)
|
||||
, min_batched_block_size_bytes(storage.global_context->getSettingsRef().min_insert_block_size_bytes)
|
||||
, should_batch_inserts(storage.global_context.getSettingsRef().distributed_directory_monitor_batch_inserts)
|
||||
, min_batched_block_size_rows(storage.global_context.getSettingsRef().min_insert_block_size_rows)
|
||||
, min_batched_block_size_bytes(storage.global_context.getSettingsRef().min_insert_block_size_bytes)
|
||||
, current_batch_file_path{path + "current_batch.txt"}
|
||||
, default_sleep_time{storage.global_context->getSettingsRef().distributed_directory_monitor_sleep_time_ms.totalMilliseconds()}
|
||||
, default_sleep_time{storage.global_context.getSettingsRef().distributed_directory_monitor_sleep_time_ms.totalMilliseconds()}
|
||||
, sleep_time{default_sleep_time}
|
||||
, max_sleep_time{storage.global_context->getSettingsRef().distributed_directory_monitor_max_sleep_time_ms.totalMilliseconds()}
|
||||
, max_sleep_time{storage.global_context.getSettingsRef().distributed_directory_monitor_max_sleep_time_ms.totalMilliseconds()}
|
||||
, log{&Poco::Logger::get(getLoggerName())}
|
||||
, monitor_blocker(monitor_blocker_)
|
||||
, metric_pending_files(CurrentMetrics::DistributedFilesToInsert, 0)
|
||||
@ -249,7 +249,7 @@ ConnectionPoolPtr StorageDistributedDirectoryMonitor::createPool(const std::stri
|
||||
|
||||
auto pools = createPoolsForAddresses(name, pool_factory);
|
||||
|
||||
const auto settings = storage.global_context->getSettings();
|
||||
const auto settings = storage.global_context.getSettings();
|
||||
return pools.size() == 1 ? pools.front() : std::make_shared<ConnectionPoolWithFailover>(pools,
|
||||
settings.load_balancing,
|
||||
settings.distributed_replica_error_half_life.totalSeconds(),
|
||||
@ -308,7 +308,7 @@ bool StorageDistributedDirectoryMonitor::processFiles(const std::map<UInt64, std
|
||||
void StorageDistributedDirectoryMonitor::processFile(const std::string & file_path)
|
||||
{
|
||||
LOG_TRACE(log, "Started processing `{}`", file_path);
|
||||
auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(storage.global_context->getSettingsRef());
|
||||
auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(storage.global_context.getSettingsRef());
|
||||
|
||||
try
|
||||
{
|
||||
@ -483,7 +483,7 @@ struct StorageDistributedDirectoryMonitor::Batch
|
||||
|
||||
Poco::File{tmp_file}.renameTo(parent.current_batch_file_path);
|
||||
}
|
||||
auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(parent.storage.global_context->getSettingsRef());
|
||||
auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(parent.storage.global_context.getSettingsRef());
|
||||
auto connection = parent.pool->get(timeouts);
|
||||
|
||||
bool batch_broken = false;
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <Storages/Kafka/ReadBufferFromKafkaConsumer.h>
|
||||
#include <Processors/Formats/InputStreamFromInputFormat.h>
|
||||
#include <common/logger_useful.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
@ -1,7 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
#include <Storages/Kafka/StorageKafka.h>
|
||||
#include <Storages/Kafka/ReadBufferFromKafkaConsumer.h>
|
||||
|
@ -1,4 +1,4 @@
|
||||
#include "KafkaBlockOutputStream.h"
|
||||
#include <Storages/Kafka/KafkaBlockOutputStream.h>
|
||||
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <Storages/Kafka/WriteBufferToKafkaProducer.h>
|
||||
|
@ -1,7 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataStreams/IBlockOutputStream.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Storages/Kafka/StorageKafka.h>
|
||||
|
||||
namespace DB
|
||||
|
@ -33,6 +33,7 @@
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <common/logger_useful.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Processors/Sources/SourceFromInputStream.h>
|
||||
#include <librdkafka/rdkafka.h>
|
||||
#include <common/getFQDNOrHostName.h>
|
||||
@ -169,7 +170,7 @@ namespace
|
||||
|
||||
StorageKafka::StorageKafka(
|
||||
const StorageID & table_id_,
|
||||
Context & context_,
|
||||
const Context & context_,
|
||||
const ColumnsDescription & columns_,
|
||||
std::unique_ptr<KafkaSettings> kafka_settings_)
|
||||
: IStorage(table_id_)
|
||||
|
@ -4,7 +4,6 @@
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Storages/Kafka/Buffer_fwd.h>
|
||||
#include <Storages/Kafka/KafkaSettings.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Common/SettingsChanges.h>
|
||||
|
||||
#include <Poco/Semaphore.h>
|
||||
@ -69,13 +68,13 @@ public:
|
||||
protected:
|
||||
StorageKafka(
|
||||
const StorageID & table_id_,
|
||||
Context & context_,
|
||||
const Context & context_,
|
||||
const ColumnsDescription & columns_,
|
||||
std::unique_ptr<KafkaSettings> kafka_settings_);
|
||||
|
||||
private:
|
||||
// Configuration and state
|
||||
Context & global_context;
|
||||
const Context & global_context;
|
||||
std::unique_ptr<KafkaSettings> kafka_settings;
|
||||
const Names topics;
|
||||
const String brokers;
|
||||
|
@ -45,6 +45,7 @@ constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE = 1;
|
||||
constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE_AND_TTL_INFOS = 2;
|
||||
constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_TYPE = 3;
|
||||
constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_DEFAULT_COMPRESSION = 4;
|
||||
constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_UUID = 5;
|
||||
|
||||
|
||||
std::string getEndpointId(const std::string & node_id)
|
||||
@ -109,7 +110,7 @@ void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & /*bo
|
||||
}
|
||||
|
||||
/// We pretend to work as older server version, to be sure that client will correctly process our version
|
||||
response.addCookie({"server_protocol_version", toString(std::min(client_protocol_version, REPLICATION_PROTOCOL_VERSION_WITH_PARTS_DEFAULT_COMPRESSION))});
|
||||
response.addCookie({"server_protocol_version", toString(std::min(client_protocol_version, REPLICATION_PROTOCOL_VERSION_WITH_PARTS_UUID))});
|
||||
|
||||
++total_sends;
|
||||
SCOPE_EXIT({--total_sends;});
|
||||
@ -138,12 +139,14 @@ void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & /*bo
|
||||
if (client_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_TYPE)
|
||||
writeStringBinary(part->getType().toString(), out);
|
||||
|
||||
if (client_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_UUID)
|
||||
writeUUIDText(part->uuid, out);
|
||||
|
||||
if (isInMemoryPart(part))
|
||||
sendPartFromMemory(part, out);
|
||||
else
|
||||
{
|
||||
bool send_default_compression_file = client_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_DEFAULT_COMPRESSION;
|
||||
sendPartFromDisk(part, out, send_default_compression_file);
|
||||
sendPartFromDisk(part, out, client_protocol_version);
|
||||
}
|
||||
}
|
||||
catch (const NetException &)
|
||||
@ -176,7 +179,7 @@ void Service::sendPartFromMemory(const MergeTreeData::DataPartPtr & part, WriteB
|
||||
block_out.write(part_in_memory->block);
|
||||
}
|
||||
|
||||
void Service::sendPartFromDisk(const MergeTreeData::DataPartPtr & part, WriteBuffer & out, bool send_default_compression_file)
|
||||
void Service::sendPartFromDisk(const MergeTreeData::DataPartPtr & part, WriteBuffer & out, int client_protocol_version)
|
||||
{
|
||||
/// We'll take a list of files from the list of checksums.
|
||||
MergeTreeData::DataPart::Checksums checksums = part->checksums;
|
||||
@ -184,8 +187,9 @@ void Service::sendPartFromDisk(const MergeTreeData::DataPartPtr & part, WriteBuf
|
||||
auto file_names_without_checksums = part->getFileNamesWithoutChecksums();
|
||||
for (const auto & file_name : file_names_without_checksums)
|
||||
{
|
||||
if (!send_default_compression_file && file_name == IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME)
|
||||
if (client_protocol_version < REPLICATION_PROTOCOL_VERSION_WITH_PARTS_DEFAULT_COMPRESSION && file_name == IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME)
|
||||
continue;
|
||||
|
||||
checksums.files[file_name] = {};
|
||||
}
|
||||
|
||||
@ -263,7 +267,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart(
|
||||
{
|
||||
{"endpoint", getEndpointId(replica_path)},
|
||||
{"part", part_name},
|
||||
{"client_protocol_version", toString(REPLICATION_PROTOCOL_VERSION_WITH_PARTS_DEFAULT_COMPRESSION)},
|
||||
{"client_protocol_version", toString(REPLICATION_PROTOCOL_VERSION_WITH_PARTS_UUID)},
|
||||
{"compress", "false"}
|
||||
});
|
||||
|
||||
@ -318,6 +322,10 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart(
|
||||
if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_TYPE)
|
||||
readStringBinary(part_type, in);
|
||||
|
||||
UUID part_uuid = UUIDHelpers::Nil;
|
||||
if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_UUID)
|
||||
readUUIDText(part_uuid, in);
|
||||
|
||||
auto storage_id = data.getStorageID();
|
||||
String new_part_path = part_type == "InMemory" ? "memory" : data.getFullPathOnDisk(reservation->getDisk()) + part_name + "/";
|
||||
auto entry = data.global_context.getReplicatedFetchList().insert(
|
||||
@ -327,12 +335,14 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart(
|
||||
|
||||
in.setNextCallback(ReplicatedFetchReadCallback(*entry));
|
||||
|
||||
return part_type == "InMemory" ? downloadPartToMemory(part_name, metadata_snapshot, std::move(reservation), in)
|
||||
: downloadPartToDisk(part_name, replica_path, to_detached, tmp_prefix_, sync, std::move(reservation), in);
|
||||
|
||||
return part_type == "InMemory" ? downloadPartToMemory(part_name, part_uuid, metadata_snapshot, std::move(reservation), in)
|
||||
: downloadPartToDisk(part_name, replica_path, to_detached, tmp_prefix_, sync, std::move(reservation), in);
|
||||
}
|
||||
|
||||
MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory(
|
||||
const String & part_name,
|
||||
const UUID & part_uuid,
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
ReservationPtr reservation,
|
||||
PooledReadWriteBufferFromHTTP & in)
|
||||
@ -348,6 +358,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory(
|
||||
MergeTreeData::MutableDataPartPtr new_data_part =
|
||||
std::make_shared<MergeTreeDataPartInMemory>(data, part_name, volume);
|
||||
|
||||
new_data_part->uuid = part_uuid;
|
||||
new_data_part->is_temp = true;
|
||||
new_data_part->setColumns(block.getNamesAndTypesList());
|
||||
new_data_part->minmax_idx.update(block, data.minmax_idx_columns);
|
||||
|
@ -32,7 +32,7 @@ public:
|
||||
private:
|
||||
MergeTreeData::DataPartPtr findPart(const String & name);
|
||||
void sendPartFromMemory(const MergeTreeData::DataPartPtr & part, WriteBuffer & out);
|
||||
void sendPartFromDisk(const MergeTreeData::DataPartPtr & part, WriteBuffer & out, bool send_default_compression_file);
|
||||
void sendPartFromDisk(const MergeTreeData::DataPartPtr & part, WriteBuffer & out, int client_protocol_version);
|
||||
|
||||
private:
|
||||
/// StorageReplicatedMergeTree::shutdown() waits for all parts exchange handlers to finish,
|
||||
@ -80,6 +80,7 @@ private:
|
||||
|
||||
MergeTreeData::MutableDataPartPtr downloadPartToMemory(
|
||||
const String & part_name,
|
||||
const UUID & part_uuid,
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
ReservationPtr reservation,
|
||||
PooledReadWriteBufferFromHTTP & in);
|
||||
|
@ -410,6 +410,7 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks
|
||||
/// Motivation: memory for index is shared between queries - not belong to the query itself.
|
||||
MemoryTracker::BlockerInThread temporarily_disable_memory_tracker;
|
||||
|
||||
loadUUID();
|
||||
loadColumns(require_columns_checksums);
|
||||
loadChecksums(require_columns_checksums);
|
||||
loadIndexGranularity();
|
||||
@ -482,6 +483,7 @@ NameSet IMergeTreeDataPart::getFileNamesWithoutChecksums() const
|
||||
|
||||
NameSet result = {"checksums.txt", "columns.txt"};
|
||||
String default_codec_path = getFullRelativePath() + DEFAULT_COMPRESSION_CODEC_FILE_NAME;
|
||||
|
||||
if (volume->getDisk()->exists(default_codec_path))
|
||||
result.emplace(DEFAULT_COMPRESSION_CODEC_FILE_NAME);
|
||||
|
||||
@ -726,6 +728,19 @@ void IMergeTreeDataPart::loadTTLInfos()
|
||||
}
|
||||
}
|
||||
|
||||
void IMergeTreeDataPart::loadUUID()
|
||||
{
|
||||
String path = getFullRelativePath() + UUID_FILE_NAME;
|
||||
|
||||
if (volume->getDisk()->exists(path))
|
||||
{
|
||||
auto in = openForReading(volume->getDisk(), path);
|
||||
readText(uuid, *in);
|
||||
if (uuid == UUIDHelpers::Nil)
|
||||
throw Exception("Unexpected empty " + String(UUID_FILE_NAME) + " in part: " + name, ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
void IMergeTreeDataPart::loadColumns(bool require)
|
||||
{
|
||||
String path = getFullRelativePath() + "columns.txt";
|
||||
|
@ -164,6 +164,11 @@ public:
|
||||
String name;
|
||||
MergeTreePartInfo info;
|
||||
|
||||
/// Part unique identifier.
|
||||
/// The intention is to use it for identifying cases where the same part is
|
||||
/// processed by multiple shards.
|
||||
UUID uuid = UUIDHelpers::Nil;
|
||||
|
||||
VolumePtr volume;
|
||||
|
||||
/// A directory path (relative to storage's path) where part data is actually stored
|
||||
@ -348,6 +353,8 @@ public:
|
||||
|
||||
static inline constexpr auto DELETE_ON_DESTROY_MARKER_FILE_NAME = "delete-on-destroy.txt";
|
||||
|
||||
static inline constexpr auto UUID_FILE_NAME = "uuid.txt";
|
||||
|
||||
/// Checks that all TTLs (table min/max, column ttls, so on) for part
|
||||
/// calculated. Part without calculated TTL may exist if TTL was added after
|
||||
/// part creation (using alter query with materialize_ttl setting).
|
||||
@ -384,6 +391,9 @@ private:
|
||||
/// In compact parts order of columns is necessary
|
||||
NameToPosition column_name_to_position;
|
||||
|
||||
/// Reads part unique identifier (if exists) from uuid.txt
|
||||
void loadUUID();
|
||||
|
||||
/// Reads columns names and types from columns.txt
|
||||
void loadColumns(bool require);
|
||||
|
||||
|
@ -134,7 +134,7 @@ MergeTreeData::MergeTreeData(
|
||||
bool attach,
|
||||
BrokenPartCallback broken_part_callback_)
|
||||
: IStorage(table_id_)
|
||||
, global_context(context_)
|
||||
, global_context(context_.getGlobalContext())
|
||||
, merging_params(merging_params_)
|
||||
, require_part_metadata(require_part_metadata_)
|
||||
, relative_data_path(relative_data_path_)
|
||||
@ -2003,7 +2003,7 @@ bool MergeTreeData::renameTempPartAndReplace(
|
||||
if (part_in_memory && getSettings()->in_memory_parts_enable_wal)
|
||||
{
|
||||
auto wal = getWriteAheadLog();
|
||||
wal->addPart(part_in_memory->block, part_in_memory->name);
|
||||
wal->addPart(part_in_memory);
|
||||
}
|
||||
|
||||
if (out_covered_parts)
|
||||
|
@ -732,6 +732,7 @@ protected:
|
||||
friend class MergeTreeDataMergerMutator;
|
||||
friend struct ReplicatedMergeTreeTableMetadata;
|
||||
friend class StorageReplicatedMergeTree;
|
||||
friend class MergeTreeDataWriter;
|
||||
|
||||
bool require_part_metadata;
|
||||
|
||||
|
@ -681,6 +681,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
|
||||
single_disk_volume,
|
||||
TMP_PREFIX + future_part.name);
|
||||
|
||||
new_data_part->uuid = future_part.uuid;
|
||||
new_data_part->setColumns(storage_columns);
|
||||
new_data_part->partition.assign(future_part.getPartition());
|
||||
new_data_part->is_temp = true;
|
||||
@ -1137,6 +1138,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
|
||||
auto new_data_part = data.createPart(
|
||||
future_part.name, future_part.type, future_part.part_info, single_disk_volume, "tmp_mut_" + future_part.name);
|
||||
|
||||
new_data_part->uuid = future_part.uuid;
|
||||
new_data_part->is_temp = true;
|
||||
new_data_part->ttl_infos = source_part->ttl_infos;
|
||||
|
||||
@ -1818,6 +1820,16 @@ void MergeTreeDataMergerMutator::finalizeMutatedPart(
|
||||
const CompressionCodecPtr & codec)
|
||||
{
|
||||
auto disk = new_data_part->volume->getDisk();
|
||||
|
||||
if (new_data_part->uuid != UUIDHelpers::Nil)
|
||||
{
|
||||
auto out = disk->writeFile(new_data_part->getFullRelativePath() + IMergeTreeDataPart::UUID_FILE_NAME, 4096);
|
||||
HashingWriteBuffer out_hashing(*out);
|
||||
writeUUIDText(new_data_part->uuid, out_hashing);
|
||||
new_data_part->checksums.files[IMergeTreeDataPart::UUID_FILE_NAME].file_size = out_hashing.count();
|
||||
new_data_part->checksums.files[IMergeTreeDataPart::UUID_FILE_NAME].file_hash = out_hashing.getHash();
|
||||
}
|
||||
|
||||
if (need_remove_expired_values)
|
||||
{
|
||||
/// Write a file with ttl infos in json format.
|
||||
|
@ -19,6 +19,7 @@ class MergeProgressCallback;
|
||||
struct FutureMergedMutatedPart
|
||||
{
|
||||
String name;
|
||||
UUID uuid = UUIDHelpers::Nil;
|
||||
String path;
|
||||
MergeTreeDataPartType type;
|
||||
MergeTreePartInfo part_info;
|
||||
|
@ -74,6 +74,7 @@ void MergeTreeDataPartInMemory::flushToDisk(const String & base_path, const Stri
|
||||
auto new_type = storage.choosePartTypeOnDisk(block.bytes(), rows_count);
|
||||
auto new_data_part = storage.createPart(name, new_type, info, volume, new_relative_path);
|
||||
|
||||
new_data_part->uuid = uuid;
|
||||
new_data_part->setColumns(columns);
|
||||
new_data_part->partition.value.assign(partition.value);
|
||||
new_data_part->minmax_idx = minmax_idx;
|
||||
|
@ -247,6 +247,9 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
|
||||
createVolumeFromReservation(reservation, volume),
|
||||
TMP_PREFIX + part_name);
|
||||
|
||||
if (data.storage_settings.get()->assign_part_uuids)
|
||||
new_data_part->uuid = UUIDHelpers::generateV4();
|
||||
|
||||
new_data_part->setColumns(columns);
|
||||
new_data_part->partition = std::move(partition);
|
||||
new_data_part->minmax_idx = std::move(minmax_idx);
|
||||
|
@ -107,6 +107,7 @@ struct Settings;
|
||||
M(String, storage_policy, "default", "Name of storage disk policy", 0) \
|
||||
M(Bool, allow_nullable_key, false, "Allow Nullable types as primary keys.", 0) \
|
||||
M(Bool, remove_empty_parts, true, "Remove empty parts after they were pruned by TTL, mutation, or collapsing merge algorithm", 0) \
|
||||
M(Bool, assign_part_uuids, false, "Generate UUIDs for parts. Before enabling check that all replicas support new format.", 0) \
|
||||
\
|
||||
/** Settings for testing purposes */ \
|
||||
M(Bool, randomize_part_type, false, "For testing purposes only. Randomizes part type between wide and compact", 0) \
|
||||
|
@ -2,8 +2,13 @@
|
||||
#include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
#include <Storages/MergeTree/MergedBlockOutputStream.h>
|
||||
#include <IO/MemoryReadWriteBuffer.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <Poco/File.h>
|
||||
#include <IO/copyData.h>
|
||||
#include <Poco/JSON/JSON.h>
|
||||
#include <Poco/JSON/Object.h>
|
||||
#include <Poco/JSON/Stringifier.h>
|
||||
#include <Poco/JSON/Parser.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
namespace DB
|
||||
@ -56,18 +61,23 @@ void MergeTreeWriteAheadLog::init()
|
||||
bytes_at_last_sync = 0;
|
||||
}
|
||||
|
||||
void MergeTreeWriteAheadLog::addPart(const Block & block, const String & part_name)
|
||||
void MergeTreeWriteAheadLog::addPart(DataPartInMemoryPtr & part)
|
||||
{
|
||||
std::unique_lock lock(write_mutex);
|
||||
|
||||
auto part_info = MergeTreePartInfo::fromPartName(part_name, storage.format_version);
|
||||
auto part_info = MergeTreePartInfo::fromPartName(part->name, storage.format_version);
|
||||
min_block_number = std::min(min_block_number, part_info.min_block);
|
||||
max_block_number = std::max(max_block_number, part_info.max_block);
|
||||
|
||||
writeIntBinary(WAL_VERSION, *out);
|
||||
|
||||
ActionMetadata metadata{};
|
||||
metadata.part_uuid = part->uuid;
|
||||
metadata.write(*out);
|
||||
|
||||
writeIntBinary(static_cast<UInt8>(ActionType::ADD_PART), *out);
|
||||
writeStringBinary(part_name, *out);
|
||||
block_out->write(block);
|
||||
writeStringBinary(part->name, *out);
|
||||
block_out->write(part->block);
|
||||
block_out->flush();
|
||||
sync(lock);
|
||||
|
||||
@ -81,6 +91,10 @@ void MergeTreeWriteAheadLog::dropPart(const String & part_name)
|
||||
std::unique_lock lock(write_mutex);
|
||||
|
||||
writeIntBinary(WAL_VERSION, *out);
|
||||
|
||||
ActionMetadata metadata{};
|
||||
metadata.write(*out);
|
||||
|
||||
writeIntBinary(static_cast<UInt8>(ActionType::DROP_PART), *out);
|
||||
writeStringBinary(part_name, *out);
|
||||
out->next();
|
||||
@ -143,6 +157,8 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore(const Stor
|
||||
single_disk_volume,
|
||||
part_name);
|
||||
|
||||
part->uuid = metadata.part_uuid;
|
||||
|
||||
block = block_in.read();
|
||||
}
|
||||
else
|
||||
@ -237,6 +253,29 @@ MergeTreeWriteAheadLog::tryParseMinMaxBlockNumber(const String & filename)
|
||||
return std::make_pair(min_block, max_block);
|
||||
}
|
||||
|
||||
String MergeTreeWriteAheadLog::ActionMetadata::toJSON() const
|
||||
{
|
||||
Poco::JSON::Object json;
|
||||
|
||||
if (part_uuid != UUIDHelpers::Nil)
|
||||
json.set(JSON_KEY_PART_UUID, toString(part_uuid));
|
||||
|
||||
std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
|
||||
oss.exceptions(std::ios::failbit);
|
||||
json.stringify(oss);
|
||||
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
void MergeTreeWriteAheadLog::ActionMetadata::fromJSON(const String & buf)
|
||||
{
|
||||
Poco::JSON::Parser parser;
|
||||
auto json = parser.parse(buf).extract<Poco::JSON::Object::Ptr>();
|
||||
|
||||
if (json->has(JSON_KEY_PART_UUID))
|
||||
part_uuid = parseFromString<UUID>(json->getValue<std::string>(JSON_KEY_PART_UUID));
|
||||
}
|
||||
|
||||
void MergeTreeWriteAheadLog::ActionMetadata::read(ReadBuffer & meta_in)
|
||||
{
|
||||
readIntBinary(min_compatible_version, meta_in);
|
||||
@ -247,19 +286,23 @@ void MergeTreeWriteAheadLog::ActionMetadata::read(ReadBuffer & meta_in)
|
||||
size_t metadata_size;
|
||||
readVarUInt(metadata_size, meta_in);
|
||||
|
||||
UInt32 metadata_start = meta_in.offset();
|
||||
if (metadata_size == 0)
|
||||
return;
|
||||
|
||||
/// For the future: read metadata here.
|
||||
String buf(metadata_size, ' ');
|
||||
meta_in.readStrict(buf.data(), metadata_size);
|
||||
|
||||
|
||||
/// Skip extra fields if any. If min_compatible_version is lower than WAL_VERSION it means
|
||||
/// that the fields are not critical for the correctness.
|
||||
meta_in.ignore(metadata_size - (meta_in.offset() - metadata_start));
|
||||
fromJSON(buf);
|
||||
}
|
||||
|
||||
void MergeTreeWriteAheadLog::ActionMetadata::write(WriteBuffer & meta_out) const
|
||||
{
|
||||
writeIntBinary(min_compatible_version, meta_out);
|
||||
writeVarUInt(static_cast<UInt32>(0), meta_out);
|
||||
|
||||
String ser_meta = toJSON();
|
||||
|
||||
writeVarUInt(static_cast<UInt32>(ser_meta.length()), meta_out);
|
||||
writeString(ser_meta, meta_out);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -2,9 +2,10 @@
|
||||
|
||||
#include <DataStreams/NativeBlockInputStream.h>
|
||||
#include <DataStreams/NativeBlockOutputStream.h>
|
||||
#include <Storages/MergeTree/IMergeTreeDataPart.h>
|
||||
#include <Core/BackgroundSchedulePool.h>
|
||||
#include <Disks/IDisk.h>
|
||||
#include <Storages/MergeTree/IMergeTreeDataPart.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -36,11 +37,20 @@ public:
|
||||
/// The same approach can be used recursively inside metadata.
|
||||
UInt8 min_compatible_version = 0;
|
||||
|
||||
/// Actual metadata.
|
||||
UUID part_uuid = UUIDHelpers::Nil;
|
||||
|
||||
void write(WriteBuffer & meta_out) const;
|
||||
void read(ReadBuffer & meta_in);
|
||||
|
||||
private:
|
||||
static constexpr auto JSON_KEY_PART_UUID = "part_uuid";
|
||||
|
||||
String toJSON() const;
|
||||
void fromJSON(const String & buf);
|
||||
};
|
||||
|
||||
constexpr static UInt8 WAL_VERSION = 0;
|
||||
constexpr static UInt8 WAL_VERSION = 1;
|
||||
constexpr static auto WAL_FILE_NAME = "wal";
|
||||
constexpr static auto WAL_FILE_EXTENSION = ".bin";
|
||||
constexpr static auto DEFAULT_WAL_FILE_NAME = "wal.bin";
|
||||
@ -50,7 +60,7 @@ public:
|
||||
|
||||
~MergeTreeWriteAheadLog();
|
||||
|
||||
void addPart(const Block & block, const String & part_name);
|
||||
void addPart(DataPartInMemoryPtr & part);
|
||||
void dropPart(const String & part_name);
|
||||
std::vector<MergeTreeMutableDataPartPtr> restore(const StorageMetadataPtr & metadata_snapshot);
|
||||
|
||||
|
@ -133,6 +133,18 @@ void MergedBlockOutputStream::finalizePartOnDisk(
|
||||
MergeTreeData::DataPart::Checksums & checksums,
|
||||
bool sync)
|
||||
{
|
||||
if (new_part->uuid != UUIDHelpers::Nil)
|
||||
{
|
||||
auto out = volume->getDisk()->writeFile(part_path + IMergeTreeDataPart::UUID_FILE_NAME, 4096);
|
||||
HashingWriteBuffer out_hashing(*out);
|
||||
writeUUIDText(new_part->uuid, out_hashing);
|
||||
checksums.files[IMergeTreeDataPart::UUID_FILE_NAME].file_size = out_hashing.count();
|
||||
checksums.files[IMergeTreeDataPart::UUID_FILE_NAME].file_hash = out_hashing.getHash();
|
||||
out->finalize();
|
||||
if (sync)
|
||||
out->sync();
|
||||
}
|
||||
|
||||
if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING || isCompactPart(new_part))
|
||||
{
|
||||
new_part->partition.store(storage, volume->getDisk(), part_path, checksums);
|
||||
|
@ -264,6 +264,7 @@ void ReplicatedMergeTreeBlockOutputStream::commitPart(
|
||||
log_entry.create_time = time(nullptr);
|
||||
log_entry.source_replica = storage.replica_name;
|
||||
log_entry.new_part_name = part->name;
|
||||
/// TODO maybe add UUID here as well?
|
||||
log_entry.quorum = quorum;
|
||||
log_entry.block_id = block_id;
|
||||
log_entry.new_part_type = part->getType();
|
||||
|
@ -19,7 +19,14 @@ namespace ErrorCodes
|
||||
|
||||
void ReplicatedMergeTreeLogEntryData::writeText(WriteBuffer & out) const
|
||||
{
|
||||
out << "format version: 4\n"
|
||||
UInt8 format_version = 4;
|
||||
|
||||
/// Conditionally bump format_version only when uuid has been assigned.
|
||||
/// If some other feature requires bumping format_version to >= 5 then this code becomes no-op.
|
||||
if (new_part_uuid != UUIDHelpers::Nil)
|
||||
format_version = std::max(format_version, static_cast<UInt8>(5));
|
||||
|
||||
out << "format version: " << format_version << "\n"
|
||||
<< "create_time: " << LocalDateTime(create_time ? create_time : time(nullptr)) << "\n"
|
||||
<< "source replica: " << source_replica << '\n'
|
||||
<< "block_id: " << escape << block_id << '\n';
|
||||
@ -36,8 +43,13 @@ void ReplicatedMergeTreeLogEntryData::writeText(WriteBuffer & out) const
|
||||
out << s << '\n';
|
||||
out << "into\n" << new_part_name;
|
||||
out << "\ndeduplicate: " << deduplicate;
|
||||
|
||||
if (merge_type != MergeType::REGULAR)
|
||||
out <<"\nmerge_type: " << static_cast<UInt64>(merge_type);
|
||||
|
||||
if (new_part_uuid != UUIDHelpers::Nil)
|
||||
out << "\ninto_uuid: " << new_part_uuid;
|
||||
|
||||
break;
|
||||
|
||||
case DROP_RANGE:
|
||||
@ -75,6 +87,10 @@ void ReplicatedMergeTreeLogEntryData::writeText(WriteBuffer & out) const
|
||||
<< "to\n"
|
||||
<< new_part_name;
|
||||
|
||||
if (new_part_uuid != UUIDHelpers::Nil)
|
||||
out << "\nto_uuid\n"
|
||||
<< new_part_uuid;
|
||||
|
||||
if (isAlterMutation())
|
||||
out << "\nalter_version\n" << alter_version;
|
||||
break;
|
||||
@ -113,7 +129,7 @@ void ReplicatedMergeTreeLogEntryData::readText(ReadBuffer & in)
|
||||
|
||||
in >> "format version: " >> format_version >> "\n";
|
||||
|
||||
if (format_version < 1 || format_version > 4)
|
||||
if (format_version < 1 || format_version > 5)
|
||||
throw Exception("Unknown ReplicatedMergeTreeLogEntry format version: " + DB::toString(format_version), ErrorCodes::UNKNOWN_FORMAT_VERSION);
|
||||
|
||||
if (format_version >= 2)
|
||||
@ -156,15 +172,21 @@ void ReplicatedMergeTreeLogEntryData::readText(ReadBuffer & in)
|
||||
in >> "\ndeduplicate: " >> deduplicate;
|
||||
|
||||
/// Trying to be more backward compatible
|
||||
in >> "\n";
|
||||
if (checkString("merge_type: ", in))
|
||||
while (!trailing_newline_found)
|
||||
{
|
||||
UInt64 value;
|
||||
in >> value;
|
||||
merge_type = checkAndGetMergeType(value);
|
||||
in >> "\n";
|
||||
|
||||
if (checkString("merge_type: ", in))
|
||||
{
|
||||
UInt64 value;
|
||||
in >> value;
|
||||
merge_type = checkAndGetMergeType(value);
|
||||
}
|
||||
else if (checkString("into_uuid: ", in))
|
||||
in >> new_part_uuid;
|
||||
else
|
||||
trailing_newline_found = true;
|
||||
}
|
||||
else
|
||||
trailing_newline_found = true;
|
||||
}
|
||||
}
|
||||
else if (type_str == "drop" || type_str == "detach")
|
||||
@ -198,12 +220,17 @@ void ReplicatedMergeTreeLogEntryData::readText(ReadBuffer & in)
|
||||
>> new_part_name;
|
||||
source_parts.push_back(source_part);
|
||||
|
||||
in >> "\n";
|
||||
while (!trailing_newline_found)
|
||||
{
|
||||
in >> "\n";
|
||||
|
||||
if (in.eof())
|
||||
trailing_newline_found = true;
|
||||
else if (checkString("alter_version\n", in))
|
||||
in >> alter_version;
|
||||
if (checkString("alter_version\n", in))
|
||||
in >> alter_version;
|
||||
else if (checkString("to_uuid\n", in))
|
||||
in >> new_part_uuid;
|
||||
else
|
||||
trailing_newline_found = true;
|
||||
}
|
||||
}
|
||||
else if (type_str == "alter")
|
||||
{
|
||||
|
@ -77,6 +77,7 @@ struct ReplicatedMergeTreeLogEntryData
|
||||
MergeTreeDataPartType new_part_type;
|
||||
String block_id; /// For parts of level zero, the block identifier for deduplication (node name in /blocks/).
|
||||
mutable String actual_new_part_name; /// GET_PART could actually fetch a part covering 'new_part_name'.
|
||||
UUID new_part_uuid = UUIDHelpers::Nil;
|
||||
|
||||
Strings source_parts;
|
||||
bool deduplicate = false; /// Do deduplicate on merge
|
||||
|
@ -1,6 +1,8 @@
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <Processors/Formats/InputStreamFromInputFormat.h>
|
||||
#include <Storages/RabbitMQ/RabbitMQBlockInputStream.h>
|
||||
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Processors/Formats/InputStreamFromInputFormat.h>
|
||||
#include <Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h>
|
||||
|
||||
namespace ErrorCodes
|
||||
|
@ -1,7 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Storages/RabbitMQ/StorageRabbitMQ.h>
|
||||
#include <Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h>
|
||||
|
||||
@ -38,7 +37,7 @@ public:
|
||||
private:
|
||||
StorageRabbitMQ & storage;
|
||||
StorageMetadataPtr metadata_snapshot;
|
||||
Context context;
|
||||
const Context & context;
|
||||
Names column_names;
|
||||
const size_t max_block_size;
|
||||
bool ack_in_suffix;
|
||||
|
@ -1,7 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataStreams/IBlockOutputStream.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Storages/RabbitMQ/StorageRabbitMQ.h>
|
||||
|
||||
|
||||
@ -23,7 +22,7 @@ public:
|
||||
private:
|
||||
StorageRabbitMQ & storage;
|
||||
StorageMetadataPtr metadata_snapshot;
|
||||
Context context;
|
||||
const Context & context;
|
||||
ProducerBufferPtr buffer;
|
||||
BlockOutputStreamPtr child;
|
||||
};
|
||||
|
@ -69,7 +69,7 @@ namespace ExchangeType
|
||||
|
||||
StorageRabbitMQ::StorageRabbitMQ(
|
||||
const StorageID & table_id_,
|
||||
Context & context_,
|
||||
const Context & context_,
|
||||
const ColumnsDescription & columns_,
|
||||
std::unique_ptr<RabbitMQSettings> rabbitmq_settings_)
|
||||
: IStorage(table_id_)
|
||||
|
@ -67,12 +67,12 @@ public:
|
||||
protected:
|
||||
StorageRabbitMQ(
|
||||
const StorageID & table_id_,
|
||||
Context & context_,
|
||||
const Context & context_,
|
||||
const ColumnsDescription & columns_,
|
||||
std::unique_ptr<RabbitMQSettings> rabbitmq_settings_);
|
||||
|
||||
private:
|
||||
Context global_context;
|
||||
const Context & global_context;
|
||||
Context rabbitmq_context;
|
||||
std::unique_ptr<RabbitMQSettings> rabbitmq_settings;
|
||||
|
||||
|
@ -1,7 +1,9 @@
|
||||
#include <Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h>
|
||||
#include "Core/Block.h"
|
||||
#include "Columns/ColumnString.h"
|
||||
#include "Columns/ColumnsNumber.h"
|
||||
|
||||
#include <Core/Block.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <common/logger_useful.h>
|
||||
#include <amqpcpp.h>
|
||||
#include <uv.h>
|
||||
@ -25,7 +27,7 @@ namespace ErrorCodes
|
||||
|
||||
WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer(
|
||||
std::pair<String, UInt16> & parsed_address_,
|
||||
Context & global_context,
|
||||
const Context & global_context,
|
||||
const std::pair<String, String> & login_password_,
|
||||
const Names & routing_keys_,
|
||||
const String & exchange_name_,
|
||||
|
@ -9,7 +9,7 @@
|
||||
#include <Storages/RabbitMQ/RabbitMQHandler.h>
|
||||
#include <Common/ConcurrentBoundedQueue.h>
|
||||
#include <Core/BackgroundSchedulePool.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Core/Names.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -19,7 +19,7 @@ class WriteBufferToRabbitMQProducer : public WriteBuffer
|
||||
public:
|
||||
WriteBufferToRabbitMQProducer(
|
||||
std::pair<String, UInt16> & parsed_address_,
|
||||
Context & global_context,
|
||||
const Context & global_context,
|
||||
const std::pair<String, String> & login_password_,
|
||||
const Names & routing_keys_,
|
||||
const String & exchange_name_,
|
||||
|
@ -22,7 +22,7 @@
|
||||
#include <common/logger_useful.h>
|
||||
#include <common/getThreadId.h>
|
||||
#include <ext/range.h>
|
||||
#include <Processors/QueryPlan/ConvertingStep.h>
|
||||
#include <Processors/QueryPlan/ExpressionStep.h>
|
||||
#include <Processors/Transforms/FilterTransform.h>
|
||||
#include <Processors/Transforms/ExpressionTransform.h>
|
||||
#include <Processors/Sources/SourceFromInputStream.h>
|
||||
@ -65,14 +65,14 @@ StorageBuffer::StorageBuffer(
|
||||
const StorageID & table_id_,
|
||||
const ColumnsDescription & columns_,
|
||||
const ConstraintsDescription & constraints_,
|
||||
Context & context_,
|
||||
const Context & context_,
|
||||
size_t num_shards_,
|
||||
const Thresholds & min_thresholds_,
|
||||
const Thresholds & max_thresholds_,
|
||||
const StorageID & destination_id_,
|
||||
bool allow_materialized_)
|
||||
: IStorage(table_id_)
|
||||
, global_context(context_)
|
||||
, global_context(context_.getGlobalContext())
|
||||
, num_shards(num_shards_), buffers(num_shards_)
|
||||
, min_thresholds(min_thresholds_)
|
||||
, max_thresholds(max_thresholds_)
|
||||
@ -248,9 +248,12 @@ void StorageBuffer::read(
|
||||
adding_missed->setStepDescription("Add columns missing in destination table");
|
||||
query_plan.addStep(std::move(adding_missed));
|
||||
|
||||
auto converting = std::make_unique<ConvertingStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
header);
|
||||
auto actions_dag = ActionsDAG::makeConvertingActions(
|
||||
query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(),
|
||||
header.getColumnsWithTypeAndName(),
|
||||
ActionsDAG::MatchColumnsMode::Name);
|
||||
|
||||
auto converting = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), actions_dag);
|
||||
|
||||
converting->setStepDescription("Convert destination table columns to Buffer table structure");
|
||||
query_plan.addStep(std::move(converting));
|
||||
@ -339,7 +342,12 @@ void StorageBuffer::read(
|
||||
/// Convert structure from table to structure from buffer.
|
||||
if (!blocksHaveEqualStructure(query_plan.getCurrentDataStream().header, result_header))
|
||||
{
|
||||
auto converting = std::make_unique<ConvertingStep>(query_plan.getCurrentDataStream(), result_header);
|
||||
auto convert_actions_dag = ActionsDAG::makeConvertingActions(
|
||||
query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(),
|
||||
result_header.getColumnsWithTypeAndName(),
|
||||
ActionsDAG::MatchColumnsMode::Name);
|
||||
|
||||
auto converting = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), convert_actions_dag);
|
||||
query_plan.addStep(std::move(converting));
|
||||
}
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user