mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 08:40:50 +00:00
Merge branch 'master' into vectorize-sum
This commit is contained in:
commit
6669607e17
12
.arcignore
Normal file
12
.arcignore
Normal file
@ -0,0 +1,12 @@
|
||||
# .arcignore is the same as .gitignore but for Arc VCS.
|
||||
# Arc VCS is a proprietary VCS in Yandex that is very similar to Git
|
||||
# from the user perspective but with the following differences:
|
||||
# 1. Data is stored in distributed object storage.
|
||||
# 2. Local copy works via FUSE without downloading all the objects.
|
||||
# For this reason, it is better suited for huge monorepositories that can be found in large companies (e.g. Yandex, Google).
|
||||
# As ClickHouse developers, we don't use Arc as a VCS (we use Git).
|
||||
# But the ClickHouse source code is also mirrored into internal monorepository and our collegues are using Arc.
|
||||
# You can read more about Arc here: https://habr.com/en/company/yandex/blog/482926/
|
||||
|
||||
# Repository is synchronized without 3rd-party submodules.
|
||||
contrib
|
@ -385,9 +385,6 @@ if (OS_LINUX AND NOT ENABLE_JEMALLOC)
|
||||
endif ()
|
||||
|
||||
if (USE_OPENCL)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DUSE_OPENCL=1")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_OPENCL=1")
|
||||
|
||||
if (OS_DARWIN)
|
||||
set(OPENCL_LINKER_FLAGS "-framework OpenCL")
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OPENCL_LINKER_FLAGS}")
|
||||
|
@ -1,13 +1,19 @@
|
||||
# TODO: enable by default
|
||||
if(0)
|
||||
option(ENABLE_OPENCL "Enable OpenCL support" ${ENABLE_LIBRARIES})
|
||||
endif()
|
||||
|
||||
if(ENABLE_OPENCL)
|
||||
|
||||
# Intel OpenCl driver: sudo apt install intel-opencl-icd
|
||||
# TODO It's possible to add it as submodules: https://github.com/intel/compute-runtime/releases
|
||||
# @sa https://github.com/intel/compute-runtime/releases
|
||||
|
||||
# OpenCL applications should link wiht ICD loader
|
||||
# sudo apt install opencl-headers ocl-icd-libopencl1
|
||||
# sudo ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so
|
||||
# TODO: add https://github.com/OCL-dev/ocl-icd as submodule instead
|
||||
|
||||
find_package(OpenCL REQUIRED)
|
||||
find_package(OpenCL)
|
||||
if(OpenCL_FOUND)
|
||||
set(USE_OPENCL 1)
|
||||
endif()
|
||||
|
@ -1,4 +1,4 @@
|
||||
version: '2.2'
|
||||
version: '2.3'
|
||||
services:
|
||||
hdfs1:
|
||||
image: sequenceiq/hadoop-docker:2.7.0
|
||||
|
@ -1,4 +1,4 @@
|
||||
version: '2.2'
|
||||
version: '2.3'
|
||||
|
||||
services:
|
||||
kafka_zookeeper:
|
||||
|
@ -1,4 +1,4 @@
|
||||
version: '2.2'
|
||||
version: '2.3'
|
||||
|
||||
services:
|
||||
minio1:
|
||||
|
@ -1,4 +1,4 @@
|
||||
version: '2.2'
|
||||
version: '2.3'
|
||||
services:
|
||||
mongo1:
|
||||
image: mongo:3.6
|
||||
|
@ -1,4 +1,4 @@
|
||||
version: '2.2'
|
||||
version: '2.3'
|
||||
services:
|
||||
mysql1:
|
||||
image: mysql:5.7
|
||||
|
@ -1,4 +1,4 @@
|
||||
version: '2.2'
|
||||
version: '2.3'
|
||||
networks:
|
||||
default:
|
||||
driver: bridge
|
||||
|
@ -1,4 +1,4 @@
|
||||
version: '2.2'
|
||||
version: '2.3'
|
||||
services:
|
||||
postgres1:
|
||||
image: postgres
|
||||
|
@ -1,4 +1,4 @@
|
||||
version: '2.2'
|
||||
version: '2.3'
|
||||
services:
|
||||
redis1:
|
||||
image: redis
|
||||
|
@ -1,25 +1,47 @@
|
||||
version: '2.2'
|
||||
version: '2.3'
|
||||
services:
|
||||
zoo1:
|
||||
image: zookeeper:3.4.12
|
||||
restart: always
|
||||
environment:
|
||||
ZOO_TICK_TIME: 500
|
||||
ZOO_MY_ID: 1
|
||||
ZOO_SERVERS: server.1=zoo1:2888:3888 server.2=zoo2:2888:3888 server.3=zoo3:2888:3888
|
||||
|
||||
ZOO_MY_ID: 1
|
||||
JVMFLAGS: -Dzookeeper.forceSync=no
|
||||
volumes:
|
||||
- type: ${ZK_FS:-tmpfs}
|
||||
source: ${ZK_DATA1:-}
|
||||
target: /data
|
||||
- type: ${ZK_FS:-tmpfs}
|
||||
source: ${ZK_DATA_LOG1:-}
|
||||
target: /datalog
|
||||
zoo2:
|
||||
image: zookeeper:3.4.12
|
||||
restart: always
|
||||
environment:
|
||||
ZOO_TICK_TIME: 500
|
||||
ZOO_MY_ID: 2
|
||||
ZOO_SERVERS: server.1=zoo1:2888:3888 server.2=zoo2:2888:3888 server.3=zoo3:2888:3888
|
||||
|
||||
ZOO_MY_ID: 2
|
||||
JVMFLAGS: -Dzookeeper.forceSync=no
|
||||
volumes:
|
||||
- type: ${ZK_FS:-tmpfs}
|
||||
source: ${ZK_DATA2:-}
|
||||
target: /data
|
||||
- type: ${ZK_FS:-tmpfs}
|
||||
source: ${ZK_DATA_LOG2:-}
|
||||
target: /datalog
|
||||
zoo3:
|
||||
image: zookeeper:3.4.12
|
||||
restart: always
|
||||
environment:
|
||||
ZOO_TICK_TIME: 500
|
||||
ZOO_MY_ID: 3
|
||||
ZOO_SERVERS: server.1=zoo1:2888:3888 server.2=zoo2:2888:3888 server.3=zoo3:2888:3888
|
||||
ZOO_MY_ID: 3
|
||||
JVMFLAGS: -Dzookeeper.forceSync=no
|
||||
volumes:
|
||||
- type: ${ZK_FS:-tmpfs}
|
||||
source: ${ZK_DATA3:-}
|
||||
target: /data
|
||||
- type: ${ZK_FS:-tmpfs}
|
||||
source: ${ZK_DATA_LOG3:-}
|
||||
target: /datalog
|
||||
|
@ -27,7 +27,7 @@ function configure
|
||||
kill -0 $left_pid
|
||||
disown $left_pid
|
||||
set +m
|
||||
while ! clickhouse-client --port 9001 --query "select 1" ; do kill -0 $left_pid ; echo . ; sleep 1 ; done
|
||||
while ! clickhouse-client --port 9001 --query "select 1" && kill -0 $left_pid ; do echo . ; sleep 1 ; done
|
||||
echo server for setup started
|
||||
|
||||
clickhouse-client --port 9001 --query "create database test" ||:
|
||||
@ -71,9 +71,9 @@ function restart
|
||||
|
||||
set +m
|
||||
|
||||
while ! clickhouse-client --port 9001 --query "select 1" ; do kill -0 $left_pid ; echo . ; sleep 1 ; done
|
||||
while ! clickhouse-client --port 9001 --query "select 1" && kill -0 $left_pid ; do echo . ; sleep 1 ; done
|
||||
echo left ok
|
||||
while ! clickhouse-client --port 9002 --query "select 1" ; do kill -0 $right_pid ; echo . ; sleep 1 ; done
|
||||
while ! clickhouse-client --port 9002 --query "select 1" && kill -0 $right_pid ; do echo . ; sleep 1 ; done
|
||||
echo right ok
|
||||
|
||||
clickhouse-client --port 9001 --query "select * from system.tables where database != 'system'"
|
||||
@ -133,7 +133,7 @@ function run_tests
|
||||
fi
|
||||
|
||||
# Delete old report files.
|
||||
for x in {test-times,skipped-tests,wall-clock-times,report-thresholds,client-times}.tsv
|
||||
for x in {test-times,wall-clock-times}.tsv
|
||||
do
|
||||
rm -v "$x" ||:
|
||||
touch "$x"
|
||||
@ -220,66 +220,127 @@ function get_profiles
|
||||
# Build and analyze randomization distribution for all queries.
|
||||
function analyze_queries
|
||||
{
|
||||
rm -v analyze-commands.txt analyze-errors.log all-queries.tsv unstable-queries.tsv ./*-report.tsv raw-queries.tsv client-times.tsv report-thresholds.tsv ||:
|
||||
rm -v analyze-commands.txt analyze-errors.log all-queries.tsv unstable-queries.tsv ./*-report.tsv raw-queries.tsv ||:
|
||||
rm -rfv analyze ||:
|
||||
mkdir analyze ||:
|
||||
|
||||
# FIXME This loop builds column definitons from TSVWithNamesAndTypes in an
|
||||
# absolutely atrocious way. This should be done by the file() function itself.
|
||||
for x in {right,left}-{addresses,{query,query-thread,trace,metric}-log}.tsv
|
||||
do
|
||||
paste -d' ' \
|
||||
<(sed -n '1{s/\t/\n/g;p;q}' "$x" | sed 's/\(^.*$\)/"\1"/') \
|
||||
<(sed -n '2{s/\t/\n/g;p;q}' "$x" ) \
|
||||
| tr '\n' ', ' | sed 's/,$//' > "$x.columns"
|
||||
done
|
||||
|
||||
# Split the raw test output into files suitable for analysis.
|
||||
IFS=$'\n'
|
||||
for test_file in $(find . -maxdepth 1 -name "*-raw.tsv" -print)
|
||||
do
|
||||
test_name=$(basename "$test_file" "-raw.tsv")
|
||||
sed -n "s/^query\t//p" < "$test_file" > "$test_name-queries.tsv"
|
||||
sed -n "s/^client-time/$test_name/p" < "$test_file" >> "client-times.tsv"
|
||||
sed -n "s/^report-threshold/$test_name/p" < "$test_file" >> "report-thresholds.tsv"
|
||||
sed -n "s/^skipped/$test_name/p" < "$test_file" >> "skipped-tests.tsv"
|
||||
sed -n "s/^query\t/$test_name\t/p" < "$test_file" >> "analyze/query-runs.tsv"
|
||||
sed -n "s/^client-time/$test_name/p" < "$test_file" >> "analyze/client-times.tsv"
|
||||
sed -n "s/^report-threshold/$test_name/p" < "$test_file" >> "analyze/report-thresholds.tsv"
|
||||
sed -n "s/^skipped/$test_name/p" < "$test_file" >> "analyze/skipped-tests.tsv"
|
||||
sed -n "s/^display-name/$test_name/p" < "$test_file" >> "analyze/query-display-names.tsv"
|
||||
done
|
||||
unset IFS
|
||||
|
||||
# for each query run, prepare array of metrics from query log
|
||||
clickhouse-local --query "
|
||||
create view query_runs as select * from file('analyze/query-runs.tsv', TSV,
|
||||
'test text, query_index int, query_id text, version UInt8, time float');
|
||||
|
||||
create view left_query_log as select *
|
||||
from file('left-query-log.tsv', TSVWithNamesAndTypes,
|
||||
'$(cat "left-query-log.tsv.columns")');
|
||||
|
||||
create view right_query_log as select *
|
||||
from file('right-query-log.tsv', TSVWithNamesAndTypes,
|
||||
'$(cat "right-query-log.tsv.columns")');
|
||||
|
||||
create table query_metrics engine File(TSV, -- do not add header -- will parse with grep
|
||||
'analyze/query-run-metrics.tsv')
|
||||
as select
|
||||
test, query_index, 0 run, version,
|
||||
[
|
||||
-- server-reported time
|
||||
query_duration_ms / toFloat64(1000)
|
||||
, toFloat64(memory_usage)
|
||||
-- client-reported time
|
||||
, query_runs.time
|
||||
] metrics
|
||||
from (
|
||||
select *, 0 version from left_query_log
|
||||
union all
|
||||
select *, 1 version from right_query_log
|
||||
) query_logs
|
||||
right join query_runs
|
||||
using (query_id, version)
|
||||
;
|
||||
"
|
||||
|
||||
# This is a lateral join in bash... please forgive me.
|
||||
# We don't have arrayPermute(), so I have to make random permutations with
|
||||
# We don't have arrayPermute(), so I have to make random permutations with
|
||||
# `order by rand`, and it becomes really slow if I do it for more than one
|
||||
# query. We also don't have lateral joins. So I just put all runs of each
|
||||
# query into a separate file, and then compute randomization distribution
|
||||
# for each file. I do this in parallel using GNU parallel.
|
||||
query_index=1
|
||||
IFS=$'\n'
|
||||
for test_file in $(find . -maxdepth 1 -name "*-queries.tsv" -print)
|
||||
for prefix in $(cut -f1,2 "analyze/query-run-metrics.tsv" | sort | uniq)
|
||||
do
|
||||
test_name=$(basename "$test_file" "-queries.tsv")
|
||||
query_index=1
|
||||
for query in $(cut -d' ' -f1 "$test_file" | sort | uniq)
|
||||
do
|
||||
query_prefix="$test_name.q$query_index"
|
||||
query_index=$((query_index + 1))
|
||||
grep -F "$query " "$test_file" > "$query_prefix.tmp"
|
||||
printf "%s\0\n" \
|
||||
"clickhouse-local \
|
||||
--file \"$query_prefix.tmp\" \
|
||||
--structure 'query text, run int, version UInt32, time float' \
|
||||
--query \"$(cat "$script_dir/eqmed.sql")\" \
|
||||
>> \"$test_name-report.tsv\"" \
|
||||
2>> analyze-errors.log \
|
||||
>> analyze-commands.txt
|
||||
done
|
||||
file="analyze/q$query_index.tmp"
|
||||
grep -F "$prefix " "analyze/query-run-metrics.tsv" > "$file" &
|
||||
printf "%s\0\n" \
|
||||
"clickhouse-local \
|
||||
--file \"$file\" \
|
||||
--structure 'test text, query text, run int, version UInt8, metrics Array(float)' \
|
||||
--query \"$(cat "$script_dir/eqmed.sql")\" \
|
||||
>> \"analyze/query-reports.tsv\"" \
|
||||
2>> analyze/errors.log \
|
||||
>> analyze/commands.txt
|
||||
|
||||
query_index=$((query_index + 1))
|
||||
done
|
||||
wait
|
||||
unset IFS
|
||||
|
||||
parallel --verbose --null < analyze-commands.txt
|
||||
parallel --joblog analyze/parallel-log.txt --null < analyze/commands.txt
|
||||
}
|
||||
|
||||
# Analyze results
|
||||
function report
|
||||
{
|
||||
|
||||
rm -r report ||:
|
||||
mkdir report ||:
|
||||
|
||||
|
||||
rm ./*.{rep,svg} test-times.tsv test-dump.tsv unstable.tsv unstable-query-ids.tsv unstable-query-metrics.tsv changed-perf.tsv unstable-tests.tsv unstable-queries.tsv bad-tests.tsv slow-on-client.tsv all-queries.tsv ||:
|
||||
|
||||
cat analyze-errors.log >> report/errors.log ||:
|
||||
cat analyze/errors.log >> report/errors.log ||:
|
||||
cat profile-errors.log >> report/errors.log ||:
|
||||
|
||||
clickhouse-local --query "
|
||||
create view query_display_names as select * from
|
||||
file('analyze/query-display-names.tsv', TSV,
|
||||
'test text, query_index int, query_display_name text')
|
||||
;
|
||||
|
||||
create table query_metric_stats engine File(TSVWithNamesAndTypes,
|
||||
'report/query-metric-stats.tsv') as
|
||||
select *, metric_name
|
||||
from file ('analyze/query-reports.tsv', TSV, 'left Array(float),
|
||||
right Array(float), diff Array(float), stat_threshold Array(float),
|
||||
test text, query_index int') reports
|
||||
left array join ['server_time', 'memory', 'client_time'] as metric_name,
|
||||
left, right, diff, stat_threshold
|
||||
left join query_display_names
|
||||
on reports.test = query_display_names.test
|
||||
and reports.query_index = query_display_names.query_index
|
||||
;
|
||||
|
||||
-- Main statistics for queries -- query time as reported in query log.
|
||||
create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
|
||||
as select
|
||||
-- FIXME Comparison mode doesn't make sense for queries that complete
|
||||
@ -296,34 +357,54 @@ create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
|
||||
|
||||
left, right, diff, stat_threshold,
|
||||
if(report_threshold > 0, report_threshold, 0.10) as report_threshold,
|
||||
reports.test,
|
||||
query
|
||||
from
|
||||
(
|
||||
select *,
|
||||
replaceAll(_file, '-report.tsv', '') test
|
||||
from file('*-report.tsv', TSV, 'left float, right float, diff float, stat_threshold float, query text')
|
||||
) reports
|
||||
left join file('report-thresholds.tsv', TSV, 'test text, report_threshold float') thresholds
|
||||
using test
|
||||
;
|
||||
test, query_index, query_display_name
|
||||
from query_metric_stats
|
||||
left join file('analyze/report-thresholds.tsv', TSV,
|
||||
'test text, report_threshold float') thresholds
|
||||
on query_metric_stats.test = thresholds.test
|
||||
where metric_name = 'server_time'
|
||||
order by test, query_index, metric_name
|
||||
;
|
||||
|
||||
-- keep the table in old format so that we can analyze new and old data together
|
||||
create table queries_old_format engine File(TSVWithNamesAndTypes, 'queries.rep')
|
||||
as select short, changed_fail, unstable_fail, left, right, diff, stat_threshold, test, query
|
||||
as select short, changed_fail, unstable_fail, left, right, diff,
|
||||
stat_threshold, test, query_display_name query
|
||||
from queries
|
||||
;
|
||||
|
||||
-- save all test runs as JSON for the new comparison page
|
||||
create table all_query_runs_json engine File(JSON, 'report/all-query-runs.json') as
|
||||
select test, query_display_name query,
|
||||
versions_runs[1] runs_left, versions_runs[2] runs_right
|
||||
from (
|
||||
select
|
||||
test, query_index,
|
||||
groupArrayInsertAt(runs, version) versions_runs
|
||||
from (
|
||||
select
|
||||
test, query_index, version,
|
||||
groupArray(metrics[1]) runs
|
||||
from file('analyze/query-run-metrics.tsv', TSV,
|
||||
'test text, query_index int, run int, version UInt8, metrics Array(float)')
|
||||
group by test, query_index, version
|
||||
)
|
||||
group by test, query_index
|
||||
) runs
|
||||
left join query_display_names using (test, query_index)
|
||||
;
|
||||
|
||||
create table changed_perf_tsv engine File(TSV, 'report/changed-perf.tsv') as
|
||||
select left, right, diff, stat_threshold, changed_fail, test, query from queries where changed_show
|
||||
order by abs(diff) desc;
|
||||
select left, right, diff, stat_threshold, changed_fail, test, query_display_name
|
||||
from queries where changed_show order by abs(diff) desc;
|
||||
|
||||
create table unstable_queries_tsv engine File(TSV, 'report/unstable-queries.tsv') as
|
||||
select left, right, diff, stat_threshold, unstable_fail, test, query from queries where unstable_show
|
||||
order by stat_threshold desc;
|
||||
select left, right, diff, stat_threshold, unstable_fail, test, query_display_name
|
||||
from queries where unstable_show order by stat_threshold desc;
|
||||
|
||||
create table queries_for_flamegraph engine File(TSVWithNamesAndTypes, 'report/queries-for-flamegraph.tsv') as
|
||||
select query, test from queries where unstable_show or changed_show
|
||||
create table queries_for_flamegraph engine File(TSVWithNamesAndTypes,
|
||||
'report/queries-for-flamegraph.tsv') as
|
||||
select test, query_index from queries where unstable_show or changed_show
|
||||
;
|
||||
|
||||
create table unstable_tests_tsv engine File(TSV, 'report/bad-tests.tsv') as
|
||||
@ -331,23 +412,23 @@ create table unstable_tests_tsv engine File(TSV, 'report/bad-tests.tsv') as
|
||||
group by test having s > 0 order by s desc;
|
||||
|
||||
create table query_time engine Memory as select *
|
||||
from file('client-times.tsv', TSV, 'test text, query text, client float, server float');
|
||||
from file('analyze/client-times.tsv', TSV,
|
||||
'test text, query_index int, client float, server float');
|
||||
|
||||
create table wall_clock engine Memory as select *
|
||||
from file('wall-clock-times.tsv', TSV, 'test text, real float, user float, system float');
|
||||
|
||||
create table slow_on_client_tsv engine File(TSV, 'report/slow-on-client.tsv') as
|
||||
select client, server, floor(client/server, 3) p, query
|
||||
from query_time where p > 1.02 order by p desc;
|
||||
select client, server, floor(client/server, 3) p, query_display_name
|
||||
from query_time left join query_display_names using (test, query_index)
|
||||
where p > 1.02 order by p desc;
|
||||
|
||||
create table test_time engine Memory as
|
||||
select test, sum(client) total_client_time,
|
||||
maxIf(client, not short) query_max,
|
||||
minIf(client, not short) query_min,
|
||||
count(*) queries,
|
||||
sum(short) short_queries
|
||||
from query_time full join queries
|
||||
using test, query
|
||||
count(*) queries, sum(short) short_queries
|
||||
from query_time full join queries using (test, query_index)
|
||||
group by test;
|
||||
|
||||
create table test_times_tsv engine File(TSV, 'report/test-times.tsv') as
|
||||
@ -359,40 +440,89 @@ create table test_times_tsv engine File(TSV, 'report/test-times.tsv') as
|
||||
floor(real / queries, 3) avg_real_per_query,
|
||||
floor(query_min, 3)
|
||||
from test_time
|
||||
-- wall clock times are also measured for skipped tests, so don't
|
||||
-- do full join
|
||||
left join wall_clock using test
|
||||
-- wall clock times are also measured for skipped tests, so don't
|
||||
-- do full join
|
||||
left join wall_clock using test
|
||||
order by avg_real_per_query desc;
|
||||
|
||||
-- report for all queries page, only main metric
|
||||
create table all_tests_tsv engine File(TSV, 'report/all-queries.tsv') as
|
||||
select changed_fail, unstable_fail,
|
||||
left, right, diff,
|
||||
floor(left > right ? left / right : right / left, 3),
|
||||
stat_threshold, test, query
|
||||
from queries order by test, query;
|
||||
stat_threshold, test, query_display_name
|
||||
from queries order by test, query_display_name;
|
||||
|
||||
-- new report for all queries with all metrics (no page yet)
|
||||
create table all_query_metrics_tsv engine File(TSV, 'report/all-query-metrics.tsv') as
|
||||
select metric_name, left, right, diff,
|
||||
floor(left > right ? left / right : right / left, 3),
|
||||
stat_threshold, test, query_index, query_display_name
|
||||
from query_metric_stats
|
||||
order by test, query_index;
|
||||
" 2> >(tee -a report/errors.log 1>&2)
|
||||
|
||||
for x in {right,left}-{addresses,{query,query-thread,trace,metric}-log}.tsv
|
||||
do
|
||||
# FIXME This loop builds column definitons from TSVWithNamesAndTypes in an
|
||||
# absolutely atrocious way. This should be done by the file() function itself.
|
||||
paste -d' ' \
|
||||
<(sed -n '1{s/\t/\n/g;p;q}' "$x" | sed 's/\(^.*$\)/"\1"/') \
|
||||
<(sed -n '2{s/\t/\n/g;p;q}' "$x" ) \
|
||||
| tr '\n' ', ' | sed 's/,$//' > "$x.columns"
|
||||
done
|
||||
|
||||
# Prepare source data for metrics and flamegraphs for unstable queries.
|
||||
for version in {right,left}
|
||||
do
|
||||
clickhouse-local --query "
|
||||
do
|
||||
rm -rf data
|
||||
clickhouse-local --query "
|
||||
create view queries_for_flamegraph as
|
||||
select * from file('report/queries-for-flamegraph.tsv', TSVWithNamesAndTypes,
|
||||
'query text, test text');
|
||||
'test text, query_index int');
|
||||
|
||||
create view query_runs as
|
||||
with 0 as left, 1 as right
|
||||
select * from file('analyze/query-runs.tsv', TSV,
|
||||
'test text, query_index int, query_id text, version UInt8, time float')
|
||||
where version = $version
|
||||
;
|
||||
|
||||
create view query_display_names as select * from
|
||||
file('analyze/query-display-names.tsv', TSV,
|
||||
'test text, query_index int, query_display_name text')
|
||||
;
|
||||
|
||||
create table unstable_query_runs engine File(TSVWithNamesAndTypes,
|
||||
'unstable-query-runs.$version.rep') as
|
||||
select test, query_index, query_display_name, query_id
|
||||
from query_runs
|
||||
join queries_for_flamegraph on
|
||||
query_runs.test = queries_for_flamegraph.test
|
||||
and query_runs.query_index = queries_for_flamegraph.query_index
|
||||
left join query_display_names on
|
||||
query_runs.test = query_display_names.test
|
||||
and query_runs.query_index = query_display_names.query_index
|
||||
;
|
||||
|
||||
create view query_log as select *
|
||||
from file('$version-query-log.tsv', TSVWithNamesAndTypes,
|
||||
'$(cat "$version-query-log.tsv.columns")');
|
||||
|
||||
create table unstable_run_metrics engine File(TSVWithNamesAndTypes,
|
||||
'unstable-run-metrics.$version.rep') as
|
||||
select
|
||||
test, query_index, query_id,
|
||||
ProfileEvents.Values value, ProfileEvents.Names metric
|
||||
from query_log array join ProfileEvents
|
||||
join unstable_query_runs using (query_id)
|
||||
;
|
||||
|
||||
create table unstable_run_metrics_2 engine File(TSVWithNamesAndTypes,
|
||||
'unstable-run-metrics-2.$version.rep') as
|
||||
select
|
||||
test, query_index, query_id,
|
||||
v, n
|
||||
from (
|
||||
select
|
||||
test, query_index, query_id,
|
||||
['memory_usage', 'read_bytes', 'written_bytes', 'query_duration_ms'] n,
|
||||
[memory_usage, read_bytes, written_bytes, query_duration_ms] v
|
||||
from query_log
|
||||
join unstable_query_runs using (query_id)
|
||||
)
|
||||
array join v, n;
|
||||
|
||||
create view trace_log as select *
|
||||
from file('$version-trace-log.tsv', TSVWithNamesAndTypes,
|
||||
'$(cat "$version-trace-log.tsv.columns")');
|
||||
@ -404,88 +534,64 @@ create view addresses_src as select *
|
||||
create table addresses_join_$version engine Join(any, left, address) as
|
||||
select addr address, name from addresses_src;
|
||||
|
||||
create table unstable_query_runs engine File(TSVWithNamesAndTypes,
|
||||
'unstable-query-runs.$version.rep') as
|
||||
select query, query_id from query_log
|
||||
where query in (select query from queries_for_flamegraph)
|
||||
and query_id not like 'prewarm %'
|
||||
;
|
||||
|
||||
create table unstable_query_log engine File(Vertical,
|
||||
'unstable-query-log.$version.rep') as
|
||||
select * from query_log
|
||||
where query_id in (select query_id from unstable_query_runs);
|
||||
|
||||
create table unstable_run_metrics engine File(TSVWithNamesAndTypes,
|
||||
'unstable-run-metrics.$version.rep') as
|
||||
select ProfileEvents.Values value, ProfileEvents.Names metric, query_id, query
|
||||
from query_log array join ProfileEvents
|
||||
where query_id in (select query_id from unstable_query_runs)
|
||||
;
|
||||
|
||||
create table unstable_run_metrics_2 engine File(TSVWithNamesAndTypes,
|
||||
'unstable-run-metrics-2.$version.rep') as
|
||||
select v, n, query_id, query
|
||||
from
|
||||
(select
|
||||
['memory_usage', 'read_bytes', 'written_bytes', 'query_duration_ms'] n,
|
||||
[memory_usage, read_bytes, written_bytes, query_duration_ms] v,
|
||||
query,
|
||||
query_id
|
||||
from query_log
|
||||
where query_id in (select query_id from unstable_query_runs))
|
||||
array join n, v;
|
||||
|
||||
create table unstable_run_traces engine File(TSVWithNamesAndTypes,
|
||||
'unstable-run-traces.$version.rep') as
|
||||
select
|
||||
test, query_index, query_id,
|
||||
count() value,
|
||||
joinGet(addresses_join_$version, 'name', arrayJoin(trace)) metric,
|
||||
unstable_query_runs.query_id,
|
||||
any(unstable_query_runs.query) query
|
||||
from unstable_query_runs
|
||||
join trace_log on trace_log.query_id = unstable_query_runs.query_id
|
||||
group by unstable_query_runs.query_id, metric
|
||||
joinGet(addresses_join_$version, 'name', arrayJoin(trace)) metric
|
||||
from trace_log
|
||||
join unstable_query_runs using query_id
|
||||
group by test, query_index, query_id, metric
|
||||
order by count() desc
|
||||
;
|
||||
|
||||
create table metric_devation engine File(TSVWithNamesAndTypes,
|
||||
'metric-deviation.$version.rep') as
|
||||
select query, floor((q[3] - q[1])/q[2], 3) d,
|
||||
quantilesExact(0, 0.5, 1)(value) q, metric
|
||||
from (select * from unstable_run_metrics
|
||||
union all select * from unstable_run_traces
|
||||
union all select * from unstable_run_metrics_2) mm
|
||||
join queries_for_flamegraph using query
|
||||
group by query, metric
|
||||
having d > 0.5
|
||||
order by query desc, d desc
|
||||
-- first goes the key used to split the file with grep
|
||||
select test, query_index, query_display_name,
|
||||
d, q, metric
|
||||
from (
|
||||
select
|
||||
test, query_index,
|
||||
floor((q[3] - q[1])/q[2], 3) d,
|
||||
quantilesExact(0, 0.5, 1)(value) q, metric
|
||||
from (select * from unstable_run_metrics
|
||||
union all select * from unstable_run_traces
|
||||
union all select * from unstable_run_metrics_2) mm
|
||||
group by test, query_index, metric
|
||||
having d > 0.5
|
||||
) metrics
|
||||
left join unstable_query_runs using (test, query_index)
|
||||
order by test, query_index, d desc
|
||||
;
|
||||
|
||||
create table stacks engine File(TSV, 'stacks.$version.rep') as
|
||||
select
|
||||
query,
|
||||
-- first goes the key used to split the file with grep
|
||||
test, query_index, any(query_display_name),
|
||||
arrayStringConcat(
|
||||
arrayMap(x -> joinGet(addresses_join_$version, 'name', x),
|
||||
arrayReverse(trace)
|
||||
),
|
||||
';'
|
||||
) readable_trace,
|
||||
count()
|
||||
count() c
|
||||
from trace_log
|
||||
join unstable_query_runs using query_id
|
||||
group by query, trace
|
||||
group by test, query_index, trace
|
||||
;
|
||||
" 2> >(tee -a report/errors.log 1>&2) # do not run in parallel because they use the same data dir for StorageJoins which leads to weird errors.
|
||||
done
|
||||
wait
|
||||
|
||||
# Create per-query flamegraphs and files with metrics
|
||||
IFS=$'\n'
|
||||
for version in {right,left}
|
||||
do
|
||||
for query in $(cut -d' ' -f1 "stacks.$version.rep" | sort | uniq)
|
||||
for query in $(cut -d' ' -f1,2,3 "stacks.$version.rep" | sort | uniq)
|
||||
do
|
||||
query_file=$(echo "$query" | cut -c-120 | sed 's/[/]/_/g')
|
||||
query_file=$(echo "$query" | cut -c-120 | sed 's/[/ ]/_/g')
|
||||
|
||||
# Build separate .svg flamegraph for each query.
|
||||
grep -F "$query " "stacks.$version.rep" \
|
||||
@ -542,7 +648,7 @@ case "$stage" in
|
||||
# to collect the logs. Prefer not to restart, because addresses might change
|
||||
# and we won't be able to process trace_log data. Start in a subshell, so that
|
||||
# it doesn't interfere with the watchdog through `wait`.
|
||||
( time get_profiles || restart || get_profiles ||: )
|
||||
( get_profiles || restart || get_profiles ||: )
|
||||
|
||||
# Kill the whole process group, because somehow when the subshell is killed,
|
||||
# the sleep inside remains alive and orphaned.
|
||||
|
@ -119,5 +119,5 @@ done
|
||||
|
||||
dmesg -T > dmesg.log
|
||||
|
||||
7z a /output/output.7z ./*.{log,tsv,html,txt,rep,svg} {right,left}/{performance,db/preprocessed_configs,scripts} ./report
|
||||
7z a /output/output.7z ./*.{log,tsv,html,txt,rep,svg} {right,left}/{performance,db/preprocessed_configs,scripts} report analyze
|
||||
cp compare.log /output
|
||||
|
@ -1,32 +1,37 @@
|
||||
-- input is table(query text, run UInt32, version int, time float)
|
||||
-- input is table(test text, query text, run UInt32, version int, metrics Array(float))
|
||||
select
|
||||
floor(original_medians_array.time_by_version[1], 4) l,
|
||||
floor(original_medians_array.time_by_version[2], 4) r,
|
||||
floor((r - l) / l, 3) diff_percent,
|
||||
floor(threshold / l, 3) threshold_percent,
|
||||
query
|
||||
arrayMap(x -> floor(x, 4), original_medians_array.medians_by_version[1] as l) l_rounded,
|
||||
arrayMap(x -> floor(x, 4), original_medians_array.medians_by_version[2] as r) r_rounded,
|
||||
arrayMap(x, y -> floor((y - x) / x, 3), l, r) diff_percent,
|
||||
arrayMap(x, y -> floor(x / y, 3), threshold, l) threshold_percent,
|
||||
test, query
|
||||
from
|
||||
(
|
||||
-- quantiles of randomization distributions
|
||||
select quantileExact(0.999)(abs(time_by_label[1] - time_by_label[2]) as d) threshold
|
||||
select quantileExactForEach(0.999)(
|
||||
arrayMap(x, y -> abs(x - y), metrics_by_label[1], metrics_by_label[2]) as d
|
||||
) threshold
|
||||
---- uncomment to see what the distribution is really like
|
||||
--, uniqExact(d) u
|
||||
--, uniqExact(d.1) u
|
||||
--, arraySort(x->x.1,
|
||||
-- arrayZip(
|
||||
-- (sumMap([d], [1]) as f).1,
|
||||
-- (sumMap([d.1], [1]) as f).1,
|
||||
-- f.2)) full_histogram
|
||||
from
|
||||
(
|
||||
select virtual_run, groupArrayInsertAt(median_time, random_label) time_by_label -- make array 'random label' -> 'median time'
|
||||
-- make array 'random label' -> '[median metric]'
|
||||
select virtual_run, groupArrayInsertAt(median_metrics, random_label) metrics_by_label
|
||||
from (
|
||||
select medianExact(time) median_time, virtual_run, random_label -- get median times, grouping by random label
|
||||
-- get [median metric] arrays among virtual runs, grouping by random label
|
||||
select medianExactForEach(metrics) median_metrics, virtual_run, random_label
|
||||
from (
|
||||
select *, toUInt32(rowNumberInAllBlocks() % 2) random_label -- randomly relabel measurements
|
||||
-- randomly relabel measurements
|
||||
select *, toUInt32(rowNumberInAllBlocks() % 2) random_label
|
||||
from (
|
||||
select time, number virtual_run
|
||||
select metrics, number virtual_run
|
||||
from
|
||||
-- strip the query away before the join -- it might be several kB long;
|
||||
(select time, run, version from table) no_query,
|
||||
(select metrics, run, version from table) no_query,
|
||||
-- duplicate input measurements into many virtual runs
|
||||
numbers(1, 100000) nn
|
||||
-- for each virtual run, randomly reorder measurements
|
||||
@ -40,19 +45,19 @@ from
|
||||
-- this select aggregates by virtual_run
|
||||
) rd,
|
||||
(
|
||||
select groupArrayInsertAt(median_time, version) time_by_version
|
||||
select groupArrayInsertAt(median_metrics, version) medians_by_version
|
||||
from
|
||||
(
|
||||
select medianExact(time) median_time, version
|
||||
select medianExactForEach(metrics) median_metrics, version
|
||||
from table
|
||||
group by version
|
||||
) original_medians
|
||||
) original_medians_array,
|
||||
(
|
||||
select any(query) query from table
|
||||
select any(test) test, any(query) query from table
|
||||
) any_query,
|
||||
(
|
||||
select throwIf(uniq(query) != 1) from table
|
||||
select throwIf(uniq((test, query)) != 1) from table
|
||||
) check_single_query -- this subselect checks that there is only one query in the input table;
|
||||
-- written this way so that it is not optimized away (#10523)
|
||||
;
|
||||
|
@ -29,6 +29,8 @@ parser.add_argument('--runs', type=int, default=int(os.environ.get('CHPC_RUNS',
|
||||
parser.add_argument('--no-long', type=bool, default=True, help='Skip the tests tagged as long.')
|
||||
args = parser.parse_args()
|
||||
|
||||
test_name = os.path.splitext(os.path.basename(args.file[0].name))[0]
|
||||
|
||||
tree = et.parse(args.file[0])
|
||||
root = tree.getroot()
|
||||
|
||||
@ -141,19 +143,25 @@ test_queries = substitute_parameters(test_query_templates)
|
||||
|
||||
report_stage_end('substitute2')
|
||||
|
||||
for i, q in enumerate(test_queries):
|
||||
for query_index, q in enumerate(test_queries):
|
||||
query_prefix = f'{test_name}.query{query_index}'
|
||||
|
||||
# We have some crazy long queries (about 100kB), so trim them to a sane
|
||||
# length.
|
||||
# length. This means we can't use query text as an identifier and have to
|
||||
# use the test name + the test-wide query index.
|
||||
query_display_name = q
|
||||
if len(query_display_name) > 1000:
|
||||
query_display_name = f'{query_display_name[:1000]}...({i})'
|
||||
|
||||
print(f'display-name\t{query_index}\t{tsv_escape(query_display_name)}')
|
||||
|
||||
# Prewarm: run once on both servers. Helps to bring the data into memory,
|
||||
# precompile the queries, etc.
|
||||
try:
|
||||
for conn_index, c in enumerate(connections):
|
||||
res = c.execute(q, query_id = f'prewarm {0} {query_display_name}')
|
||||
print(f'prewarm\t{tsv_escape(query_display_name)}\t{conn_index}\t{c.last_query.elapsed}')
|
||||
prewarm_id = f'{query_prefix}.prewarm0'
|
||||
res = c.execute(q, query_id = prewarm_id)
|
||||
print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}')
|
||||
except:
|
||||
# If prewarm fails for some query -- skip it, and try to test the others.
|
||||
# This might happen if the new test introduces some function that the
|
||||
@ -170,13 +178,14 @@ for i, q in enumerate(test_queries):
|
||||
start_seconds = time.perf_counter()
|
||||
server_seconds = 0
|
||||
for run in range(0, args.runs):
|
||||
run_id = f'{query_prefix}.run{run}'
|
||||
for conn_index, c in enumerate(connections):
|
||||
res = c.execute(q)
|
||||
print(f'query\t{tsv_escape(query_display_name)}\t{run}\t{conn_index}\t{c.last_query.elapsed}')
|
||||
res = c.execute(q, query_id = run_id)
|
||||
print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}')
|
||||
server_seconds += c.last_query.elapsed
|
||||
|
||||
client_seconds = time.perf_counter() - start_seconds
|
||||
print(f'client-time\t{tsv_escape(query_display_name)}\t{client_seconds}\t{server_seconds}')
|
||||
print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}')
|
||||
|
||||
report_stage_end('benchmark')
|
||||
|
||||
|
@ -25,6 +25,9 @@ very_unstable_queries = 0
|
||||
# max seconds to run one query by itself, not counting preparation
|
||||
allowed_single_run_time = 2
|
||||
|
||||
color_bad='#ffb0c0'
|
||||
color_good='#b0d050'
|
||||
|
||||
header_template = """
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
@ -188,8 +191,8 @@ if args.report == 'main':
|
||||
|
||||
print(tableStart('Changes in performance'))
|
||||
columns = [
|
||||
'Old, s.', # 0
|
||||
'New, s.', # 1
|
||||
'Old, s', # 0
|
||||
'New, s', # 1
|
||||
'Relative difference (new − old) / old', # 2
|
||||
'p < 0.001 threshold', # 3
|
||||
# Failed # 4
|
||||
@ -205,10 +208,10 @@ if args.report == 'main':
|
||||
if int(row[4]):
|
||||
if float(row[2]) < 0.:
|
||||
faster_queries += 1
|
||||
attrs[2] = 'style="background: #00ff00"'
|
||||
attrs[2] = f'style="background: {color_good}"'
|
||||
else:
|
||||
slower_queries += 1
|
||||
attrs[2] = 'style="background: #ff0000"'
|
||||
attrs[2] = f'style="background: {color_bad}"'
|
||||
else:
|
||||
attrs[2] = ''
|
||||
|
||||
@ -221,7 +224,7 @@ if args.report == 'main':
|
||||
slow_on_client_rows = tsvRows('report/slow-on-client.tsv')
|
||||
error_tests += len(slow_on_client_rows)
|
||||
printSimpleTable('Slow on client',
|
||||
['Client time, s.', 'Server time, s.', 'Ratio', 'Query'],
|
||||
['Client time, s', 'Server time, s', 'Ratio', 'Query'],
|
||||
slow_on_client_rows)
|
||||
|
||||
def print_unstable_queries():
|
||||
@ -252,7 +255,7 @@ if args.report == 'main':
|
||||
for r in unstable_rows:
|
||||
if int(r[4]):
|
||||
very_unstable_queries += 1
|
||||
attrs[3] = 'style="background: #ffb0a0"'
|
||||
attrs[3] = f'style="background: {color_bad}"'
|
||||
else:
|
||||
attrs[3] = ''
|
||||
|
||||
@ -266,7 +269,7 @@ if args.report == 'main':
|
||||
error_tests += len(run_error_rows)
|
||||
printSimpleTable('Run errors', ['Test', 'Error'], run_error_rows)
|
||||
|
||||
skipped_tests_rows = tsvRows('skipped-tests.tsv')
|
||||
skipped_tests_rows = tsvRows('analyze/skipped-tests.tsv')
|
||||
printSimpleTable('Skipped tests', ['Test', 'Reason'], skipped_tests_rows)
|
||||
|
||||
printSimpleTable('Tests with most unstable queries',
|
||||
@ -281,13 +284,13 @@ if args.report == 'main':
|
||||
|
||||
columns = [
|
||||
'Test', #0
|
||||
'Wall clock time, s.', #1
|
||||
'Total client time, s.', #2
|
||||
'Wall clock time, s', #1
|
||||
'Total client time, s', #2
|
||||
'Total queries', #3
|
||||
'Ignored short queries', #4
|
||||
'Longest query<br>(sum for all runs), s.', #5
|
||||
'Avg wall clock time<br>(sum for all runs), s.', #6
|
||||
'Shortest query<br>(sum for all runs), s.', #7
|
||||
'Longest query<br>(sum for all runs), s', #5
|
||||
'Avg wall clock time<br>(sum for all runs), s', #6
|
||||
'Shortest query<br>(sum for all runs), s', #7
|
||||
]
|
||||
|
||||
print(tableStart('Test times'))
|
||||
@ -300,13 +303,13 @@ if args.report == 'main':
|
||||
if float(r[6]) > 1.5 * total_runs:
|
||||
# FIXME should be 15s max -- investigate parallel_insert
|
||||
slow_average_tests += 1
|
||||
attrs[6] = 'style="background: #ffb0a0"'
|
||||
attrs[6] = f'style="background: {color_bad}"'
|
||||
else:
|
||||
attrs[6] = ''
|
||||
|
||||
if float(r[5]) > allowed_single_run_time * total_runs:
|
||||
slow_average_tests += 1
|
||||
attrs[5] = 'style="background: #ffb0a0"'
|
||||
attrs[5] = f'style="background: {color_bad}"'
|
||||
else:
|
||||
attrs[5] = ''
|
||||
|
||||
@ -320,9 +323,9 @@ if args.report == 'main':
|
||||
|
||||
print("""
|
||||
<p class="links">
|
||||
<a href="output.7z">Test output</a>
|
||||
<a href="all-queries.html">All queries</a>
|
||||
<a href="compare.log">Log</a>
|
||||
<a href="output.7z">Test output</a>
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
@ -382,8 +385,8 @@ elif args.report == 'all-queries':
|
||||
columns = [
|
||||
# Changed #0
|
||||
# Unstable #1
|
||||
'Old, s.', #2
|
||||
'New, s.', #3
|
||||
'Old, s', #2
|
||||
'New, s', #3
|
||||
'Relative difference (new − old) / old', #4
|
||||
'Times speedup / slowdown', #5
|
||||
'p < 0.001 threshold', #6
|
||||
@ -399,21 +402,21 @@ elif args.report == 'all-queries':
|
||||
attrs[1] = None
|
||||
for r in rows:
|
||||
if int(r[1]):
|
||||
attrs[6] = 'style="background: #ffb0a0"'
|
||||
attrs[6] = f'style="background: {color_bad}"'
|
||||
else:
|
||||
attrs[6] = ''
|
||||
|
||||
if int(r[0]):
|
||||
if float(r[4]) > 0.:
|
||||
attrs[4] = 'style="background: #ffb0a0"'
|
||||
attrs[4] = f'style="background: {color_bad}"'
|
||||
else:
|
||||
attrs[4] = 'style="background: #adbdff"'
|
||||
attrs[4] = f'style="background: {color_good}"'
|
||||
else:
|
||||
attrs[4] = ''
|
||||
|
||||
if (float(r[2]) + float(r[3])) / 2 > allowed_single_run_time:
|
||||
attrs[2] = 'style="background: #ffb0a0"'
|
||||
attrs[3] = 'style="background: #ffb0a0"'
|
||||
attrs[2] = f'style="background: {color_bad}"'
|
||||
attrs[3] = f'style="background: {color_bad}"'
|
||||
else:
|
||||
attrs[2] = ''
|
||||
attrs[3] = ''
|
||||
@ -428,9 +431,9 @@ elif args.report == 'all-queries':
|
||||
|
||||
print("""
|
||||
<p class="links">
|
||||
<a href="output.7z">Test output</a>
|
||||
<a href="report.html">Main report</a>
|
||||
<a href="compare.log">Log</a>
|
||||
<a href="output.7z">Test output</a>
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -1,10 +1,10 @@
|
||||
## system.table\_name {#system-tables_table-name}
|
||||
## system.table_name {#system-tables_table-name}
|
||||
|
||||
Description.
|
||||
|
||||
Columns:
|
||||
|
||||
- `column_name` ([data\_type\_name](path/to/data_type.md)) — Description.
|
||||
- `column_name` ([data_type_name](path/to/data_type.md)) — Description.
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -5,7 +5,7 @@ toc_title: How to Build ClickHouse on Mac OS X
|
||||
|
||||
# How to Build ClickHouse on Mac OS X {#how-to-build-clickhouse-on-mac-os-x}
|
||||
|
||||
Build should work on Mac OS X 10.15 (Catalina)
|
||||
Build should work on Mac OS X 10.15 (Catalina).
|
||||
|
||||
## Install Homebrew {#install-homebrew}
|
||||
|
||||
|
@ -7,7 +7,7 @@ Building of ClickHouse is supported on Linux, FreeBSD and Mac OS X.
|
||||
|
||||
# If You Use Windows {#if-you-use-windows}
|
||||
|
||||
If you use Windows, you need to create a virtual machine with Ubuntu. To start working with a virtual machine please install VirtualBox. You can download Ubuntu from the website: https://www.ubuntu.com/\#download. Please create a virtual machine from the downloaded image (you should reserve at least 4GB of RAM for it). To run a command-line terminal in Ubuntu, please locate a program containing the word “terminal” in its name (gnome-terminal, konsole etc.) or just press Ctrl+Alt+T.
|
||||
If you use Windows, you need to create a virtual machine with Ubuntu. To start working with a virtual machine please install VirtualBox. You can download Ubuntu from the website: https://www.ubuntu.com/#download. Please create a virtual machine from the downloaded image (you should reserve at least 4GB of RAM for it). To run a command-line terminal in Ubuntu, please locate a program containing the word “terminal” in its name (gnome-terminal, konsole etc.) or just press Ctrl+Alt+T.
|
||||
|
||||
# If You Use a 32-bit System {#if-you-use-a-32-bit-system}
|
||||
|
||||
|
@ -3,4 +3,4 @@ toc_folder_title: Engines
|
||||
toc_priority: 25
|
||||
---
|
||||
|
||||
|
||||
{## [Original article](https://clickhouse.tech/docs/en/engines/) ##}
|
||||
|
@ -4,3 +4,4 @@ toc_priority: 76
|
||||
---
|
||||
|
||||
|
||||
{## [Original article](https://clickhouse.tech/docs/en/faq) ##}
|
||||
|
@ -50,7 +50,7 @@ sudo rpm --import https://repo.clickhouse.tech/CLICKHOUSE-KEY.GPG
|
||||
sudo yum-config-manager --add-repo https://repo.clickhouse.tech/rpm/stable/x86_64
|
||||
```
|
||||
|
||||
If you want to use the most recent version, replace `stable` with `testing` (this is recommended for your testing environments). The `prestable` tag is sometimes available too.
|
||||
If you want to use the most recent version, replace `stable` with `testing` (this is recommended for your testing environments). `prestable` is sometimes also available.
|
||||
|
||||
Then run these commands to install packages:
|
||||
|
||||
|
@ -8,27 +8,27 @@ toc_title: Playground
|
||||
[ClickHouse Playground](https://play.clickhouse.tech) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster.
|
||||
Several example datasets are available in the Playground as well as sample queries that show ClickHouse features. There's also a selection of ClickHouse LTS releases to experiment with.
|
||||
|
||||
ClickHouse Playground gives the experience of m2.small [Managed Service for ClickHouse](https://cloud.yandex.com/services/managed-clickhouse) instance hosted in [Yandex.Cloud](https://cloud.yandex.com/). More information about [cloud providers](../commercial/cloud.md).
|
||||
ClickHouse Playground gives the experience of m2.small [Managed Service for ClickHouse](https://cloud.yandex.com/services/managed-clickhouse) instance (4 vCPU, 32 GB RAM) hosted in [Yandex.Cloud](https://cloud.yandex.com/). More information about [cloud providers](../commercial/cloud.md).
|
||||
|
||||
You can make queries to playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md).
|
||||
|
||||
## Credentials
|
||||
|
||||
| Parameter | Value |
|
||||
|:------------------|:----------------------------------------|
|
||||
| HTTPS endpoint | `https://play-api.clickhouse.tech:8443` |
|
||||
| Native endpoint | `play-api.clickhouse.tech:9440` |
|
||||
| User | `playground` |
|
||||
| Password | `clickhouse` |
|
||||
|
||||
!!! note "Note"
|
||||
Note that all endpoints require a secure TLS connection.
|
||||
| Parameter | Value |
|
||||
|:--------------------|:----------------------------------------|
|
||||
| HTTPS endpoint | `https://play-api.clickhouse.tech:8443` |
|
||||
| Native TCP endpoint | `play-api.clickhouse.tech:9440` |
|
||||
| User | `playground` |
|
||||
| Password | `clickhouse` |
|
||||
|
||||
There are additional endpoints with specific ClickHouse releases to experiment with their differences (ports and user/password are the same as above):
|
||||
|
||||
* 20.3 LTS: `play-api-v20-3.clickhouse.tech`
|
||||
* 19.14 LTS: `play-api-v19-14.clickhouse.tech`
|
||||
|
||||
!!! note "Note"
|
||||
All these endpoints require a secure TLS connection.
|
||||
|
||||
## Limitations
|
||||
|
||||
The queries are executed as a read-only user. It implies some limitations:
|
||||
@ -50,7 +50,7 @@ HTTPS endpoint example with `curl`:
|
||||
curl "https://play-api.clickhouse.tech:8443/?query=SELECT+'Play+ClickHouse!';&user=playground&password=clickhouse&database=datasets"
|
||||
```
|
||||
|
||||
TCP endpoint example with [../interfaces/cli.md]:
|
||||
TCP endpoint example with [CLI](../interfaces/cli.md):
|
||||
``` bash
|
||||
clickhouse client --secure -h play-api.clickhouse.tech --port 9440 -u playground --password clickhouse -q "SELECT 'Play ClickHouse!'"
|
||||
```
|
||||
|
@ -11,7 +11,7 @@ toc_title: Adopters
|
||||
| Company | Industry | Usecase | Cluster Size | (Un)Compressed Data Size<abbr title="of single replica"><sup>\*</sup></abbr> | Reference |
|
||||
|---------------------------------------------------------------------|---------------------------------|-----------------------|------------------------------------------------------------|------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| [2gis](https://2gis.ru){.favicon} | Maps | Monitoring | — | — | [Talk in Russian, July 2019](https://youtu.be/58sPkXfq6nw) |
|
||||
| [Aloha Browser](https://alohabrowser.com/){.favicon} | Mobile App | Browser backend | — | — | [Slides in Russian, May 2019](https://github.com/yandex/clickhouse-presentations/blob/master/meetup22/aloha.pdf) |
|
||||
| [Aloha Browser](https://alohabrowser.com/){.favicon} | Mobile App | Browser backend | — | — | [Slides in Russian, May 2019](https://presentations.clickhouse.tech/meetup22/aloha.pdf) |
|
||||
| [Amadeus](https://amadeus.com/){.favicon} | Travel | Analytics | — | — | [Press Release, April 2018](https://www.altinity.com/blog/2018/4/5/amadeus-technologies-launches-investment-and-insights-tool-based-on-machine-learning-and-strategy-algorithms) |
|
||||
| [Appsflyer](https://www.appsflyer.com){.favicon} | Mobile analytics | Main product | — | — | [Talk in Russian, July 2019](https://www.youtube.com/watch?v=M3wbRlcpBbY) |
|
||||
| [ArenaData](https://arenadata.tech/){.favicon} | Data Platform | Main product | — | — | [Slides in Russian, December 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/indexes.pdf) |
|
||||
|
@ -27,4 +27,4 @@ Under the same conditions, ClickHouse can handle several hundred queries per sec
|
||||
|
||||
We recommend inserting data in packets of at least 1000 rows, or no more than a single request per second. When inserting to a MergeTree table from a tab-separated dump, the insertion speed can be from 50 to 200 MB/s. If the inserted rows are around 1 Kb in size, the speed will be from 50,000 to 200,000 rows per second. If the rows are small, the performance can be higher in rows per second (on Banner System data -`>` 500,000 rows per second; on Graphite data -`>` 1,000,000 rows per second). To improve performance, you can make multiple INSERT queries in parallel, which scales linearly.
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/introduction/performance/) <!--hide-->
|
||||
{## [Original article](https://clickhouse.tech/docs/en/introduction/performance/) ##}
|
||||
|
@ -733,7 +733,7 @@ Example
|
||||
<mysql_port>9004</mysql_port>
|
||||
```
|
||||
|
||||
## tmp\_path {#server-settings-tmp_path}
|
||||
## tmp_path {#tmp-path}
|
||||
|
||||
Path to temporary data for processing large queries.
|
||||
|
||||
@ -746,16 +746,17 @@ Path to temporary data for processing large queries.
|
||||
<tmp_path>/var/lib/clickhouse/tmp/</tmp_path>
|
||||
```
|
||||
|
||||
## tmp\_policy {#server-settings-tmp-policy}
|
||||
## tmp_policy {#tmp-policy}
|
||||
|
||||
Policy from [`storage_configuration`](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) to store temporary files.
|
||||
If not set [`tmp_path`](#server-settings-tmp_path) is used, otherwise it is ignored.
|
||||
Policy from [storage_configuration](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) to store temporary files.
|
||||
|
||||
If not set, [tmp_path](#tmp-path) is used, otherwise it is ignored.
|
||||
|
||||
!!! note "Note"
|
||||
- `move_factor` is ignored
|
||||
- `keep_free_space_bytes` is ignored
|
||||
- `max_data_part_size_bytes` is ignored
|
||||
- you must have exactly one volume in that policy
|
||||
- `move_factor` is ignored.
|
||||
- `keep_free_space_bytes` is ignored.
|
||||
- `max_data_part_size_bytes` is ignored.
|
||||
- Уou must have exactly one volume in that policy.
|
||||
|
||||
## uncompressed\_cache\_size {#server-settings-uncompressed_cache_size}
|
||||
|
||||
|
@ -1026,27 +1026,32 @@ Possible values:
|
||||
|
||||
Default value: 0.
|
||||
|
||||
## optimize\_skip\_unused\_shards {#settings-optimize_skip_unused_shards}
|
||||
## optimize_skip_unused_shards {#optimize-skip-unused-shards}
|
||||
|
||||
Enables or disables skipping of unused shards for SELECT queries that have sharding key condition in PREWHERE/WHERE (assumes that the data is distributed by sharding key, otherwise do nothing).
|
||||
|
||||
Default value: 0
|
||||
|
||||
## force\_optimize\_skip\_unused\_shards {#settings-force_optimize_skip_unused_shards}
|
||||
|
||||
Enables or disables query execution if [`optimize_skip_unused_shards`](#settings-optimize_skip_unused_shards) enabled and skipping of unused shards is not possible. If the skipping is not possible and the setting is enabled exception will be thrown.
|
||||
Enables or disables skipping of unused shards for [SELECT](../../sql-reference/statements/select/index.md) queries that have sharding key condition in `WHERE/PREWHERE` (assuming that the data is distributed by sharding key, otherwise does nothing).
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 - Disabled (do not throws)
|
||||
- 1 - Disable query execution only if the table has sharding key
|
||||
- 2 - Disable query execution regardless sharding key is defined for the table
|
||||
- 0 — Disabled.
|
||||
- 1 — Enabled.
|
||||
|
||||
Default value: 0
|
||||
|
||||
## force_optimize_skip_unused_shards {#force-optimize-skip-unused-shards}
|
||||
|
||||
Enables or disables query execution if [optimize_skip_unused_shards](#optimize-skip-unused-shards) is enabled and skipping of unused shards is not possible. If the skipping is not possible and the setting is enabled, an exception will be thrown.
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 — Disabled. ClickHouse doesn't throw an exception.
|
||||
- 1 — Enabled. Query execution is disabled only if the table has a sharding key.
|
||||
- 2 — Enabled. Query execution is disabled regardless of whether a sharding key is defined for the table.
|
||||
|
||||
Default value: 0
|
||||
|
||||
## force\_optimize\_skip\_unused\_shards\_no\_nested {#settings-force_optimize_skip_unused_shards_no_nested}
|
||||
|
||||
Reset [`optimize_skip_unused_shards`](#settings-force_optimize_skip_unused_shards) for nested `Distributed` table
|
||||
Reset [`optimize_skip_unused_shards`](#optimize-skip-unused-shards) for nested `Distributed` table
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -1250,7 +1255,9 @@ Default value: Empty
|
||||
|
||||
## background\_pool\_size {#background_pool_size}
|
||||
|
||||
Sets the number of threads performing background operations in table engines (for example, merges in [MergeTree engine](../../engines/table-engines/mergetree-family/index.md) tables). This setting is applied at ClickHouse server start and can’t be changed in a user session. By adjusting this setting, you manage CPU and disk load. Smaller pool size utilizes less CPU and disk resources, but background processes advance slower which might eventually impact query performance.
|
||||
Sets the number of threads performing background operations in table engines (for example, merges in [MergeTree engine](../../engines/table-engines/mergetree-family/index.md) tables). This setting is applied from `default` profile at ClickHouse server start and can’t be changed in a user session. By adjusting this setting, you manage CPU and disk load. Smaller pool size utilizes less CPU and disk resources, but background processes advance slower which might eventually impact query performance.
|
||||
|
||||
Before changing it, please also take a look at related [MergeTree settings](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-merge_tree), such as `number_of_free_entries_in_pool_to_lower_max_size_of_merge` and `number_of_free_entries_in_pool_to_execute_mutation`.
|
||||
|
||||
Possible values:
|
||||
|
||||
|
@ -536,26 +536,26 @@ Contains logging entries. Logging level which goes to this table can be limited
|
||||
|
||||
Columns:
|
||||
|
||||
- `event_date` (`Date`) - Date of the entry.
|
||||
- `event_time` (`DateTime`) - Time of the entry.
|
||||
- `microseconds` (`UInt32`) - Microseconds of the entry.
|
||||
- `event_date` (Date) — Date of the entry.
|
||||
- `event_time` (DateTime) — Time of the entry.
|
||||
- `microseconds` (UInt32) — Microseconds of the entry.
|
||||
- `thread_name` (String) — Name of the thread from which the logging was done.
|
||||
- `thread_id` (UInt64) — OS thread ID.
|
||||
- `level` (`Enum8`) - Entry level.
|
||||
- `'Fatal' = 1`
|
||||
- `'Critical' = 2`
|
||||
- `'Error' = 3`
|
||||
- `'Warning' = 4`
|
||||
- `'Notice' = 5`
|
||||
- `'Information' = 6`
|
||||
- `'Debug' = 7`
|
||||
- `'Trace' = 8`
|
||||
- `query_id` (`String`) - ID of the query.
|
||||
- `logger_name` (`LowCardinality(String)`) - Name of the logger (i.e. `DDLWorker`)
|
||||
- `message` (`String`) - The message itself.
|
||||
- `revision` (`UInt32`) - ClickHouse revision.
|
||||
- `source_file` (`LowCardinality(String)`) - Source file from which the logging was done.
|
||||
- `source_line` (`UInt64`) - Source line from which the logging was done.
|
||||
- `level` (`Enum8`) — Entry level. Possible values:
|
||||
- `1` or `'Fatal'`.
|
||||
- `2` or `'Critical'`.
|
||||
- `3` or `'Error'`.
|
||||
- `4` or `'Warning'`.
|
||||
- `5` or `'Notice'`.
|
||||
- `6` or `'Information'`.
|
||||
- `7` or `'Debug'`.
|
||||
- `8` or `'Trace'`.
|
||||
- `query_id` (String) — ID of the query.
|
||||
- `logger_name` (LowCardinality(String)) — Name of the logger (i.e. `DDLWorker`).
|
||||
- `message` (String) — The message itself.
|
||||
- `revision` (UInt32) — ClickHouse revision.
|
||||
- `source_file` (LowCardinality(String)) — Source file from which the logging was done.
|
||||
- `source_line` (UInt64) — Source line from which the logging was done.
|
||||
|
||||
## system.query\_log {#system_tables-query_log}
|
||||
|
||||
|
@ -1543,20 +1543,32 @@ It represents an unbiased estimate of the variance of a random variable if passe
|
||||
|
||||
Returns `Float64`. When `n <= 1`, returns `+∞`.
|
||||
|
||||
!!! note "Note"
|
||||
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varSampStable` function. It works slower but provides a lower computational error.
|
||||
|
||||
## varPop(x) {#varpopx}
|
||||
|
||||
Calculates the amount `Σ((x - x̅)^2) / n`, where `n` is the sample size and `x̅`is the average value of `x`.
|
||||
|
||||
In other words, dispersion for a set of values. Returns `Float64`.
|
||||
|
||||
!!! note "Note"
|
||||
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varPopStable` function. It works slower but provides a lower computational error.
|
||||
|
||||
## stddevSamp(x) {#stddevsampx}
|
||||
|
||||
The result is equal to the square root of `varSamp(x)`.
|
||||
|
||||
!!! note "Note"
|
||||
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `stddevSampStable` function. It works slower but provides a lower computational error.
|
||||
|
||||
## stddevPop(x) {#stddevpopx}
|
||||
|
||||
The result is equal to the square root of `varPop(x)`.
|
||||
|
||||
!!! note "Note"
|
||||
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `stddevPopStable` function. It works slower but provides a lower computational error.
|
||||
|
||||
## topK(N)(x) {#topknx}
|
||||
|
||||
Returns an array of the approximately most frequent values in the specified column. The resulting array is sorted in descending order of approximate frequency of values (not by the values themselves).
|
||||
@ -1641,14 +1653,23 @@ Calculates the value of `Σ((x - x̅)(y - y̅)) / (n - 1)`.
|
||||
|
||||
Returns Float64. When `n <= 1`, returns +∞.
|
||||
|
||||
!!! note "Note"
|
||||
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `covarSampStable` function. It works slower but provides a lower computational error.
|
||||
|
||||
## covarPop(x, y) {#covarpopx-y}
|
||||
|
||||
Calculates the value of `Σ((x - x̅)(y - y̅)) / n`.
|
||||
|
||||
!!! note "Note"
|
||||
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `covarPopStable` function. It works slower but provides a lower computational error.
|
||||
|
||||
## corr(x, y) {#corrx-y}
|
||||
|
||||
Calculates the Pearson correlation coefficient: `Σ((x - x̅)(y - y̅)) / sqrt(Σ((x - x̅)^2) * Σ((y - y̅)^2))`.
|
||||
|
||||
!!! note "Note"
|
||||
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `corrStable` function. It works slower but provides a lower computational error.
|
||||
|
||||
## categoricalInformationValue {#categoricalinformationvalue}
|
||||
|
||||
Calculates the value of `(P(tag = 1) - P(tag = 0))(log(P(tag = 1)) - log(P(tag = 0)))` for each category.
|
||||
|
@ -53,16 +53,16 @@ An exception is thrown when dividing by zero or when dividing a minimal negative
|
||||
|
||||
Differs from ‘intDiv’ in that it returns zero when dividing by zero or when dividing a minimal negative number by minus one.
|
||||
|
||||
## modulo(a, b), a % b operator {#moduloa-b-a-b-operator}
|
||||
## modulo(a, b), a % b operator {#modulo}
|
||||
|
||||
Calculates the remainder after division.
|
||||
If arguments are floating-point numbers, they are pre-converted to integers by dropping the decimal portion.
|
||||
The remainder is taken in the same sense as in C++. Truncated division is used for negative numbers.
|
||||
An exception is thrown when dividing by zero or when dividing a minimal negative number by minus one.
|
||||
|
||||
## moduloOrZero(a, b) {#moduloorzeroa-b}
|
||||
## moduloOrZero(a, b) {#modulo-or-zero}
|
||||
|
||||
Differs from ‘modulo’ in that it returns zero when the divisor is zero.
|
||||
Differs from [modulo](#modulo) in that it returns zero when the divisor is zero.
|
||||
|
||||
## negate(a), -a operator {#negatea-a-operator}
|
||||
|
||||
|
@ -201,17 +201,17 @@ All changes on replicated tables are broadcasting to ZooKeeper so will be applie
|
||||
|
||||
The following operations with [partitions](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) are available:
|
||||
|
||||
- [DETACH PARTITION](#alter_detach-partition) – Moves a partition to the `detached` directory and forget it.
|
||||
- [DROP PARTITION](#alter_drop-partition) – Deletes a partition.
|
||||
- [ATTACH PART\|PARTITION](#alter_attach-partition) – Adds a part or partition from the `detached` directory to the table.
|
||||
- [ATTACH PARTITION FROM](#alter_attach-partition-from) – Copies the data partition from one table to another and adds.
|
||||
- [REPLACE PARTITION](#alter_replace-partition) - Copies the data partition from one table to another and replaces.
|
||||
- [MOVE PARTITION TO TABLE](#alter_move_to_table-partition)(#alter_move_to_table-partition) - Move the data partition from one table to another.
|
||||
- [CLEAR COLUMN IN PARTITION](#alter_clear-column-partition) - Resets the value of a specified column in a partition.
|
||||
- [CLEAR INDEX IN PARTITION](#alter_clear-index-partition) - Resets the specified secondary index in a partition.
|
||||
- [FREEZE PARTITION](#alter_freeze-partition) – Creates a backup of a partition.
|
||||
- [FETCH PARTITION](#alter_fetch-partition) – Downloads a partition from another server.
|
||||
- [MOVE PARTITION\|PART](#alter_move-partition) – Move partition/data part to another disk or volume.
|
||||
- [DETACH PARTITION](#alter_detach-partition) — Moves a partition to the `detached` directory and forget it.
|
||||
- [DROP PARTITION](#alter_drop-partition) — Deletes a partition.
|
||||
- [ATTACH PART\|PARTITION](#alter_attach-partition) — Adds a part or partition from the `detached` directory to the table.
|
||||
- [ATTACH PARTITION FROM](#alter_attach-partition-from) — Copies the data partition from one table to another and adds.
|
||||
- [REPLACE PARTITION](#alter_replace-partition) — Copies the data partition from one table to another and replaces.
|
||||
- [MOVE PARTITION TO TABLE](#alter_move_to_table-partition) — Moves the data partition from one table to another.
|
||||
- [CLEAR COLUMN IN PARTITION](#alter_clear-column-partition) — Resets the value of a specified column in a partition.
|
||||
- [CLEAR INDEX IN PARTITION](#alter_clear-index-partition) — Resets the specified secondary index in a partition.
|
||||
- [FREEZE PARTITION](#alter_freeze-partition) — Creates a backup of a partition.
|
||||
- [FETCH PARTITION](#alter_fetch-partition) — Downloads a partition from another server.
|
||||
- [MOVE PARTITION\|PART](#alter_move-partition) — Move partition/data part to another disk or volume.
|
||||
|
||||
<!-- -->
|
||||
|
||||
@ -307,13 +307,13 @@ For the query to run successfully, the following conditions must be met:
|
||||
ALTER TABLE table_source MOVE PARTITION partition_expr TO TABLE table_dest
|
||||
```
|
||||
|
||||
This query move the data partition from the `table_source` to `table_dest` with deleting the data from `table_source`.
|
||||
This query moves the data partition from the `table_source` to `table_dest` with deleting the data from `table_source`.
|
||||
|
||||
For the query to run successfully, the following conditions must be met:
|
||||
|
||||
- Both tables must have the same structure.
|
||||
- Both tables must have the same partition key.
|
||||
- Both tables must be the same engine family. (replicated or non-replicated)
|
||||
- Both tables must be the same engine family (replicated or non-replicated).
|
||||
- Both tables must have the same storage policy.
|
||||
|
||||
#### CLEAR COLUMN IN PARTITION {#alter_clear-column-partition}
|
||||
|
@ -28,9 +28,10 @@ There may be any number of space symbols between syntactical constructions (incl
|
||||
|
||||
## Comments {#comments}
|
||||
|
||||
ClickHouse supports either SQL-style and C-style comments.
|
||||
SQL-style comments start with `--` and continue to the end of the line, a space after `--` can be omitted.
|
||||
C-style are from `/*` to `*/`and can be multiline, spaces are not required either.
|
||||
ClickHouse supports either SQL-style and C-style comments:
|
||||
|
||||
- SQL-style comments start with `--` and continue to the end of the line, a space after `--` can be omitted.
|
||||
- C-style are from `/*` to `*/`and can be multiline, spaces are not required either.
|
||||
|
||||
## Keywords {#syntax-keywords}
|
||||
|
||||
|
@ -5,13 +5,12 @@ toc_title: Roadmap
|
||||
|
||||
# Roadmap {#roadmap}
|
||||
|
||||
## Q1 2020 {#q1-2020}
|
||||
|
||||
- Role-based access control
|
||||
|
||||
## Q2 2020 {#q2-2020}
|
||||
|
||||
- Integration with external authentication services
|
||||
|
||||
## Q3 2020 {#q3-2020}
|
||||
|
||||
- Resource pools for more precise distribution of cluster capacity between users
|
||||
|
||||
{## [Original article](https://clickhouse.tech/docs/en/roadmap/) ##}
|
||||
|
@ -25,6 +25,7 @@ toc_title: Integrations
|
||||
- Message queues
|
||||
- [Kafka](https://kafka.apache.org)
|
||||
- [clickhouse\_sinker](https://github.com/housepower/clickhouse_sinker) (uses [Go client](https://github.com/ClickHouse/clickhouse-go/))
|
||||
- [stream-loader-clickhouse](https://github.com/adform/stream-loader)
|
||||
- Stream processing
|
||||
- [Flink](https://flink.apache.org)
|
||||
- [flink-clickhouse-sink](https://github.com/ivi-ru/flink-clickhouse-sink)
|
||||
|
@ -27,6 +27,7 @@ toc_title: "\u06CC\u06A9\u067E\u0627\u0631\u0686\u06AF\u06CC"
|
||||
- صف پیام
|
||||
- [کافکا](https://kafka.apache.org)
|
||||
- [در حال بارگذاری](https://github.com/housepower/clickhouse_sinker) (استفاده [برو کارگیر](https://github.com/ClickHouse/clickhouse-go/))
|
||||
- [stream-loader-clickhouse](https://github.com/adform/stream-loader)
|
||||
- پردازش جریان
|
||||
- [لرزش](https://flink.apache.org)
|
||||
- [سینک فلینک-کلیک](https://github.com/ivi-ru/flink-clickhouse-sink)
|
||||
|
@ -27,6 +27,7 @@ toc_title: "Int\xE9gration"
|
||||
- Files d'attente de messages
|
||||
- [Kafka](https://kafka.apache.org)
|
||||
- [clickhouse\_sinker](https://github.com/housepower/clickhouse_sinker) (utiliser [Allez client](https://github.com/ClickHouse/clickhouse-go/))
|
||||
- [stream-loader-clickhouse](https://github.com/adform/stream-loader)
|
||||
- Traitement de flux
|
||||
- [Flink](https://flink.apache.org)
|
||||
- [flink-clickhouse-évier](https://github.com/ivi-ru/flink-clickhouse-sink)
|
||||
|
@ -27,6 +27,7 @@ toc_title: "\u7D71\u5408"
|
||||
- メッセージキュ
|
||||
- [カフカ](https://kafka.apache.org)
|
||||
- [clickhouse\_sinker](https://github.com/housepower/clickhouse_sinker) (用途 [Goクライアント](https://github.com/ClickHouse/clickhouse-go/))
|
||||
- [stream-loader-clickhouse](https://github.com/adform/stream-loader)
|
||||
- ストリーム処理
|
||||
- [フリンク](https://flink.apache.org)
|
||||
- [フリンク-クリックハウス-シンク](https://github.com/ivi-ru/flink-clickhouse-sink)
|
||||
|
@ -113,7 +113,7 @@ ClickHouse может слить куски данных таким образо
|
||||
|
||||
Если название вложенной таблицы заканчивается на `Map` и она содержит не менее двух столбцов, удовлетворяющих критериям:
|
||||
|
||||
- первый столбец - числовой `(*Int*, Date, DateTime)`, назовем его условно `key`,
|
||||
- первый столбец - числовой `(*Int*, Date, DateTime)` или строковый `(String, FixedString)`, назовем его условно `key`,
|
||||
- остальные столбцы - арифметические `(*Int*, Float32/64)`, условно `(values...)`,
|
||||
|
||||
то вложенная таблица воспринимается как отображение `key => (values...)` и при слиянии её строк выполняется слияние элементов двух множеств по `key` со сложением соответствующих `(values...)`.
|
||||
|
@ -38,7 +38,7 @@ sudo rpm --import https://repo.yandex.ru/clickhouse/CLICKHOUSE-KEY.GPG
|
||||
sudo yum-config-manager --add-repo https://repo.yandex.ru/clickhouse/rpm/stable/x86_64
|
||||
```
|
||||
|
||||
Для использования наиболее свежих версий нужно заменить `stable` на `testing` (рекомендуется для тестовых окружений).
|
||||
Для использования наиболее свежих версий нужно заменить `stable` на `testing` (рекомендуется для тестовых окружений). Также иногда доступен `prestable`.
|
||||
|
||||
Для, собственно, установки пакетов необходимо выполнить следующие команды:
|
||||
|
||||
|
@ -45,6 +45,7 @@
|
||||
- [ClickHouse.Net](https://github.com/ilyabreev/ClickHouse.Net)
|
||||
- Elixir
|
||||
- [clickhousex](https://github.com/appodeal/clickhousex/)
|
||||
- [pillar](https://github.com/sofakingworld/pillar)
|
||||
- Nim
|
||||
- [nim-clickhouse](https://github.com/leonardoce/nim-clickhouse)
|
||||
|
||||
|
@ -20,6 +20,7 @@
|
||||
- Очереди сообщений
|
||||
- [Kafka](https://kafka.apache.org)
|
||||
- [clickhouse\_sinker](https://github.com/housepower/clickhouse_sinker) (использует [Go client](https://github.com/ClickHouse/clickhouse-go/))
|
||||
- [stream-loader-clickhouse](https://github.com/adform/stream-loader)
|
||||
- Потоковая обработка
|
||||
- [Flink](https://flink.apache.org)
|
||||
- [flink-clickhouse-sink](https://github.com/ivi-ru/flink-clickhouse-sink)
|
||||
|
@ -686,7 +686,7 @@ TCP порт для защищённого обмена данными с кли
|
||||
<mysql_port>9004</mysql_port>
|
||||
```
|
||||
|
||||
## tmp\_path {#tmp-path}
|
||||
## tmp_path {#tmp-path}
|
||||
|
||||
Путь ко временным данным для обработки больших запросов.
|
||||
|
||||
@ -698,6 +698,17 @@ TCP порт для защищённого обмена данными с кли
|
||||
``` xml
|
||||
<tmp_path>/var/lib/clickhouse/tmp/</tmp_path>
|
||||
```
|
||||
## tmp_policy {#tmp-policy}
|
||||
|
||||
Политика из [storage_configuration](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) для хранения временных файлов.
|
||||
|
||||
Если политика не задана, используется [tmp_path](#tmp-path). В противном случае `tmp_path` игнорируется.
|
||||
|
||||
!!! note "Примечание"
|
||||
- `move_factor` игнорируется.
|
||||
- `keep_free_space_bytes` игнорируется.
|
||||
- `max_data_part_size_bytes` игнорируется.
|
||||
- В данной политике у вас должен быть ровно один том.
|
||||
|
||||
## uncompressed\_cache\_size {#server-settings-uncompressed_cache_size}
|
||||
|
||||
|
@ -1025,6 +1025,29 @@ ClickHouse генерирует исключение
|
||||
|
||||
Значение по умолчанию: 0.
|
||||
|
||||
## optimize_skip_unused_shards {#optimize-skip-unused-shards}
|
||||
|
||||
Включает или отключает пропуск неиспользуемых шардов для запросов [SELECT](../../sql-reference/statements/select/index.md) , в которых условие ключа шардирования задано в секции `WHERE/PREWHERE`. Предполагается, что данные распределены с помощью ключа шардирования, в противном случае настройка ничего не делает.
|
||||
|
||||
Возможные значения:
|
||||
|
||||
- 0 — Выключена.
|
||||
- 1 — Включена.
|
||||
|
||||
Значение по умолчанию: 0
|
||||
|
||||
## force_optimize_skip_unused_shards {#force-optimize-skip-unused-shards}
|
||||
|
||||
Разрешает или запрещает выполнение запроса, если настройка [optimize_skip_unused_shards](#optimize-skip-unused-shards) включена, а пропуск неиспользуемых шардов невозможен. Если данная настройка включена и пропуск невозможен, ClickHouse генерирует исключение.
|
||||
|
||||
Возможные значения:
|
||||
|
||||
- 0 — Выключена. ClickHouse не генерирует исключение.
|
||||
- 1 — Включена. Выполнение запроса запрещается, только если у таблицы есть ключ шардирования.
|
||||
- 2 — Включена. Выполнение запроса запрещается, даже если для таблицы не определен ключ шардирования.
|
||||
|
||||
Значение по умолчанию: 0
|
||||
|
||||
## optimize\_throw\_if\_noop {#setting-optimize_throw_if_noop}
|
||||
|
||||
Включает или отключает генерирование исключения в в случаях, когда запрос [OPTIMIZE](../../sql-reference/statements/misc.md#misc_operations-optimize) не выполняет мёрж.
|
||||
|
@ -517,6 +517,33 @@ CurrentMetric_ReplicatedChecks: 0
|
||||
- `query` (String) – текст запроса. Для запросов `INSERT` не содержит встаявляемые данные.
|
||||
- `query_id` (String) – идентификатор запроса, если был задан.
|
||||
|
||||
## system.text\_log {#system-tables-text-log}
|
||||
|
||||
Содержит записи логов. Уровень логирования для таблицы может быть ограничен параметром сервера `text_log.level`.
|
||||
|
||||
Столбцы:
|
||||
|
||||
- `event_date` (Date) — Дата создания записи.
|
||||
- `event_time` (DateTime) — Время создания записи.
|
||||
- `microseconds` (UInt32) — Время создания записи в микросекундах.
|
||||
- `thread_name` (String) — Название потока, из которого была сделана запись.
|
||||
- `thread_id` (UInt64) — Идентификатор потока ОС.
|
||||
- `level` (Enum8) — Уровень логирования записи. Возможные значения:
|
||||
- `1` или `'Fatal'`.
|
||||
- `2` или `'Critical'`.
|
||||
- `3` или `'Error'`.
|
||||
- `4` или `'Warning'`.
|
||||
- `5` или `'Notice'`.
|
||||
- `6` или `'Information'`.
|
||||
- `7` или `'Debug'`.
|
||||
- `8` или `'Trace'`.
|
||||
- `query_id` (String) — Идентификатор запроса.
|
||||
- `logger_name` (LowCardinality(String)) — Название логгера (`DDLWorker`).
|
||||
- `message` (String) — Само тело записи.
|
||||
- `revision` (UInt32) — Ревизия ClickHouse.
|
||||
- `source_file` (LowCardinality(String)) — Исходный файл, из которого была сделана запись.
|
||||
- `source_line` (UInt64) — Исходная строка, из которой была сделана запись.
|
||||
|
||||
## system.query\_log {#system_tables-query_log}
|
||||
|
||||
Содержит информацию о выполнении запросов. Для каждого запроса вы можете увидеть время начала обработки, продолжительность обработки, сообщения об ошибках и другую информацию.
|
||||
|
@ -1533,20 +1533,33 @@ SELECT medianDeterministic(val, 1) FROM t
|
||||
|
||||
Возвращает `Float64`. В случае, когда `n <= 1`, возвращается `+∞`.
|
||||
|
||||
!!! note "Примечание"
|
||||
Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `varSampStable`. Она работает медленнее, но обеспечиват меньшую вычислительную ошибку.
|
||||
|
||||
## varPop(x) {#varpopx}
|
||||
|
||||
Вычисляет величину `Σ((x - x̅)^2) / n`, где `n` - размер выборки, `x̅`- среднее значение `x`.
|
||||
|
||||
То есть, дисперсию для множества значений. Возвращает `Float64`.
|
||||
|
||||
!!! note "Примечание"
|
||||
Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `varPopStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку.
|
||||
|
||||
## stddevSamp(x) {#stddevsampx}
|
||||
|
||||
Результат равен квадратному корню от `varSamp(x)`.
|
||||
|
||||
!!! note "Примечание"
|
||||
Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `stddevSampStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку.
|
||||
|
||||
## stddevPop(x) {#stddevpopx}
|
||||
|
||||
Результат равен квадратному корню от `varPop(x)`.
|
||||
|
||||
!!! note "Примечание"
|
||||
Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `stddevPopStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку.
|
||||
|
||||
|
||||
## topK(N)(column) {#topkncolumn}
|
||||
|
||||
Возвращает массив наиболее часто встречающихся значений в указанном столбце. Результирующий массив упорядочен по убыванию частоты значения (не по самим значениям).
|
||||
@ -1626,14 +1639,24 @@ SELECT topKWeighted(10)(number, number) FROM numbers(1000)
|
||||
|
||||
Возвращает Float64. В случае, когда `n <= 1`, возвращается +∞.
|
||||
|
||||
!!! note "Примечание"
|
||||
Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `covarSampStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку.
|
||||
|
||||
## covarPop(x, y) {#covarpopx-y}
|
||||
|
||||
Вычисляет величину `Σ((x - x̅)(y - y̅)) / n`.
|
||||
|
||||
!!! note "Примечание"
|
||||
Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `covarPopStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку.
|
||||
|
||||
|
||||
## corr(x, y) {#corrx-y}
|
||||
|
||||
Вычисляет коэффициент корреляции Пирсона: `Σ((x - x̅)(y - y̅)) / sqrt(Σ((x - x̅)^2) * Σ((y - y̅)^2))`.
|
||||
|
||||
!!! note "Примечание"
|
||||
Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `corrStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку.
|
||||
|
||||
## simpleLinearRegression {#simplelinearregression}
|
||||
|
||||
Выполняет простую (одномерную) линейную регрессию.
|
||||
|
@ -48,13 +48,17 @@ SELECT toTypeName(0), toTypeName(0 + 0), toTypeName(0 + 0 + 0), toTypeName(0 + 0
|
||||
|
||||
Отличается от intDiv тем, что при делении на ноль или при делении минимального отрицательного числа на минус единицу, возвращается ноль.
|
||||
|
||||
## modulo(a, b), оператор a % b {#moduloa-b-operator-a-b}
|
||||
## modulo(a, b), оператор a % b {#modulo}
|
||||
|
||||
Вычисляет остаток от деления.
|
||||
Если аргументы - числа с плавающей запятой, то они предварительно преобразуются в целые числа, путём отбрасывания дробной части.
|
||||
Берётся остаток в том же смысле, как это делается в C++. По факту, для отрицательных чисел, используется truncated division.
|
||||
При делении на ноль или при делении минимального отрицательного числа на минус единицу, кидается исключение.
|
||||
|
||||
## moduloOrZero(a, b) {#modulo-or-zero}
|
||||
|
||||
В отличие от [modulo](#modulo), возвращает ноль при делении на ноль.
|
||||
|
||||
## negate(a), оператор -a {#negatea-operator-a}
|
||||
|
||||
Вычисляет число, обратное по знаку. Результат всегда имеет знаковый тип.
|
||||
|
@ -1,6 +1,6 @@
|
||||
# Функции для работы с внешними словарями {#ext_dict_functions}
|
||||
|
||||
Информацию о подключении и настройке внешних словарей смотрите в разделе [Внешние словари](../../sql-reference/functions/ext-dict-functions.md).
|
||||
Информацию о подключении и настройке внешних словарей смотрите в разделе [Внешние словари](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md).
|
||||
|
||||
## dictGet {#dictget}
|
||||
|
||||
|
@ -204,17 +204,17 @@ ALTER TABLE [db].name DROP CONSTRAINT constraint_name;
|
||||
|
||||
Для работы с [партициями](../../sql-reference/statements/alter.md) доступны следующие операции:
|
||||
|
||||
- [DETACH PARTITION](#alter_detach-partition) – перенести партицию в директорию `detached`;
|
||||
- [DROP PARTITION](#alter_drop-partition) – удалить партицию;
|
||||
- [ATTACH PARTITION\|PART](#alter_attach-partition) – добавить партицию/кусок в таблицу из директории `detached`;
|
||||
- [ATTACH PARTITION FROM](#alter_attach-partition-from) – скопировать партицию из другой таблицы;
|
||||
- [REPLACE PARTITION](#alter_replace-partition) – скопировать партицию из другой таблицы с заменой;
|
||||
- [MOVE PARTITION TO TABLE](#alter_move_to_table-partition) (\#alter\_move\_to\_table-partition) - переместить партицию в другую таблицу;
|
||||
- [CLEAR COLUMN IN PARTITION](#alter_clear-column-partition) – удалить все значения в столбце для заданной партиции;
|
||||
- [CLEAR INDEX IN PARTITION](#alter_clear-index-partition) - очистить построенные вторичные индексы для заданной партиции;
|
||||
- [FREEZE PARTITION](#alter_freeze-partition) – создать резервную копию партиции;
|
||||
- [FETCH PARTITION](#alter_fetch-partition) – скачать партицию с другого сервера;
|
||||
- [MOVE PARTITION\|PART](#alter_move-partition) – переместить партицию/кускок на другой диск или том.
|
||||
- [DETACH PARTITION](#alter_detach-partition) — перенести партицию в директорию `detached`;
|
||||
- [DROP PARTITION](#alter_drop-partition) — удалить партицию;
|
||||
- [ATTACH PARTITION\|PART](#alter_attach-partition) — добавить партицию/кусок в таблицу из директории `detached`;
|
||||
- [ATTACH PARTITION FROM](#alter_attach-partition-from) — скопировать партицию из другой таблицы;
|
||||
- [REPLACE PARTITION](#alter_replace-partition) — скопировать партицию из другой таблицы с заменой;
|
||||
- [MOVE PARTITION TO TABLE](#alter_move_to_table-partition) — переместить партицию в другую таблицу;
|
||||
- [CLEAR COLUMN IN PARTITION](#alter_clear-column-partition) — удалить все значения в столбце для заданной партиции;
|
||||
- [CLEAR INDEX IN PARTITION](#alter_clear-index-partition) — очистить построенные вторичные индексы для заданной партиции;
|
||||
- [FREEZE PARTITION](#alter_freeze-partition) — создать резервную копию партиции;
|
||||
- [FETCH PARTITION](#alter_fetch-partition) — скачать партицию с другого сервера;
|
||||
- [MOVE PARTITION\|PART](#alter_move-partition) — переместить партицию/кускок на другой диск или том.
|
||||
|
||||
#### DETACH PARTITION {#alter_detach-partition}
|
||||
|
||||
@ -312,12 +312,14 @@ ALTER TABLE table2 REPLACE PARTITION partition_expr FROM table1
|
||||
ALTER TABLE table_source MOVE PARTITION partition_expr TO TABLE table_dest
|
||||
```
|
||||
|
||||
Перемещает партицию из таблицы `table_source` в таблицу `table_dest` (добавляет к существующим данным в `table_dest`), с удалением данных из таблицы `table_source`.
|
||||
Перемещает партицию из таблицы `table_source` в таблицу `table_dest` (добавляет к существующим данным в `table_dest`) с удалением данных из таблицы `table_source`.
|
||||
|
||||
Следует иметь в виду:
|
||||
|
||||
- Таблицы должны иметь одинаковую структуру.
|
||||
- Для таблиц должен быть задан одинаковый ключ партиционирования.
|
||||
- Движки таблиц должны быть одинакового семейства (реплицированные или нереплицированные).
|
||||
- Для таблиц должна быть задана одинаковая политика хранения.
|
||||
|
||||
#### CLEAR COLUMN IN PARTITION {#alter_clear-column-partition}
|
||||
|
||||
|
@ -174,7 +174,7 @@ Upd. Всё ещё ждём удаление старого кода, котор
|
||||
|
||||
### 2.3. Перенос столбцового ser/de из DataType в Column {#perenos-stolbtsovogo-serde-iz-datatype-v-column}
|
||||
|
||||
В очереди.
|
||||
В очереди. Антон Попов.
|
||||
|
||||
### 2.4. Перевод LowCardinality из DataType в Column. Добавление ColumnSparse {#perevod-lowcardinality-iz-datatype-v-column-dobavlenie-columnsparse}
|
||||
|
||||
@ -977,10 +977,10 @@ Q2.
|
||||
|
||||
[Виталий Баранов](https://github.com/vitlibar) и Денис Глазачев, Altinity. Требует 12.1.
|
||||
|
||||
### 12.6. Информация о пользователях и квотах в системной таблице {#informatsiia-o-polzovateliakh-i-kvotakh-v-sistemnoi-tablitse}
|
||||
### 12.6. + Информация о пользователях и квотах в системной таблице {#informatsiia-o-polzovateliakh-i-kvotakh-v-sistemnoi-tablitse}
|
||||
|
||||
[Виталий Баранов](https://github.com/vitlibar). Требует 12.1.
|
||||
Есть pull request. Q2.
|
||||
Есть pull request. Q2. Готово.
|
||||
|
||||
|
||||
## 13. Разделение ресурсов, multi-tenancy {#razdelenie-resursov-multi-tenancy}
|
||||
|
@ -119,6 +119,11 @@ class PatchedMacrosPlugin(macros.plugin.MacrosPlugin):
|
||||
|
||||
def on_page_markdown(self, markdown, page, config, files):
|
||||
markdown = super(PatchedMacrosPlugin, self).on_page_markdown(markdown, page, config, files)
|
||||
|
||||
if os.path.islink(page.file.abs_src_path):
|
||||
lang = config.data['theme']['language']
|
||||
page.canonical_url = page.canonical_url.replace(f'/{lang}/', '/en/', 1)
|
||||
|
||||
if config.data['extra'].get('version_prefix') or config.data['extra'].get('single_page'):
|
||||
return markdown
|
||||
if self.skip_git_log:
|
||||
|
@ -44,7 +44,7 @@ then
|
||||
if [[ ! -z "${CLOUDFLARE_TOKEN}" ]]
|
||||
then
|
||||
sleep 1m
|
||||
git diff --stat="9999,9999" --diff-filter=M HEAD~1 | grep '|' | awk '$1 ~ /\.html$/ { if ($3>4) { url="https://clickhouse.tech/"$1; sub(/\/index.html/, "/", url); print "\""url"\""; }}' | split -l 25 /dev/stdin PURGE
|
||||
git diff --stat="9999,9999" --diff-filter=M HEAD~1 | grep '|' | awk '$1 ~ /\.html$/ { if ($3>8) { url="https://content.clickhouse.tech/"$1; sub(/\/index.html/, "/", url); print "\""url"\""; }}' | split -l 25 /dev/stdin PURGE
|
||||
for FILENAME in $(ls PURGE*)
|
||||
do
|
||||
POST_DATA=$(cat "${FILENAME}" | sed -n -e 'H;${x;s/\n/,/g;s/^,//;p;}' | awk '{print "{\"files\":["$0"]}";}')
|
||||
|
@ -1,7 +1,7 @@
|
||||
Babel==2.8.0
|
||||
backports-abc==0.5
|
||||
backports.functools-lru-cache==1.6.1
|
||||
beautifulsoup4==4.9.0
|
||||
beautifulsoup4==4.9.1
|
||||
certifi==2020.4.5.1
|
||||
chardet==3.0.4
|
||||
click==7.1.2
|
||||
@ -18,10 +18,10 @@ Markdown==3.2.1
|
||||
MarkupSafe==1.1.1
|
||||
mkdocs==1.1.2
|
||||
mkdocs-htmlproofer-plugin==0.0.3
|
||||
mkdocs-macros-plugin==0.4.7
|
||||
mkdocs-macros-plugin==0.4.9
|
||||
nltk==3.5
|
||||
nose==1.3.7
|
||||
protobuf==3.11.3
|
||||
protobuf==3.12.1
|
||||
numpy==1.18.4
|
||||
Pygments==2.5.2
|
||||
pymdown-extensions==7.1
|
||||
@ -31,7 +31,7 @@ repackage==0.7.3
|
||||
requests==2.23.0
|
||||
singledispatch==3.4.0.3
|
||||
six==1.14.0
|
||||
soupsieve==2.0
|
||||
soupsieve==2.0.1
|
||||
termcolor==1.1.0
|
||||
tornado==5.1.1
|
||||
Unidecode==1.1.1
|
||||
|
@ -1,9 +1,11 @@
|
||||
import concurrent.futures
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
import bs4
|
||||
import closure
|
||||
@ -20,25 +22,31 @@ def adjust_markdown_html(content):
|
||||
content,
|
||||
features='html.parser'
|
||||
)
|
||||
for a in soup.find_all('a'):
|
||||
a_class = a.attrs.get('class')
|
||||
if a_class and 'headerlink' in a_class:
|
||||
a.string = '\xa0'
|
||||
for details in soup.find_all('details'):
|
||||
for summary in details.find_all('summary'):
|
||||
if summary.parent != details:
|
||||
summary.extract()
|
||||
details.insert(0, summary)
|
||||
for div in soup.find_all('div'):
|
||||
div.attrs['role'] = 'alert'
|
||||
div_class = div.attrs.get('class')
|
||||
for a in div.find_all('a'):
|
||||
a_class = a.attrs.get('class')
|
||||
if a_class:
|
||||
a.attrs['class'] = a_class + ['alert-link']
|
||||
else:
|
||||
a.attrs['class'] = 'alert-link'
|
||||
is_admonition = div_class and 'admonition' in div.attrs.get('class')
|
||||
if is_admonition:
|
||||
for a in div.find_all('a'):
|
||||
a_class = a.attrs.get('class')
|
||||
if a_class:
|
||||
a.attrs['class'] = a_class + ['alert-link']
|
||||
else:
|
||||
a.attrs['class'] = 'alert-link'
|
||||
for p in div.find_all('p'):
|
||||
p_class = p.attrs.get('class')
|
||||
if p_class and ('admonition-title' in p_class):
|
||||
p.attrs['class'] = p_class + ['alert-heading', 'display-5', 'mb-2']
|
||||
if div_class and 'admonition' in div.attrs.get('class'):
|
||||
if is_admonition and p_class and ('admonition-title' in p_class):
|
||||
p.attrs['class'] = p_class + ['alert-heading', 'display-6', 'mb-2']
|
||||
if is_admonition:
|
||||
div.attrs['role'] = 'alert'
|
||||
if ('info' in div_class) or ('note' in div_class):
|
||||
mode = 'alert-primary'
|
||||
elif ('attention' in div_class) or ('warning' in div_class):
|
||||
@ -49,7 +57,7 @@ def adjust_markdown_html(content):
|
||||
mode = 'alert-info'
|
||||
else:
|
||||
mode = 'alert-secondary'
|
||||
div.attrs['class'] = div_class + ['alert', 'lead', 'pb-0', 'mb-4', mode]
|
||||
div.attrs['class'] = div_class + ['alert', 'pb-0', 'mb-4', mode]
|
||||
|
||||
return str(soup)
|
||||
|
||||
@ -138,6 +146,7 @@ def get_js_in(args):
|
||||
f"'{args.website_dir}/js/jquery.js'",
|
||||
f"'{args.website_dir}/js/popper.js'",
|
||||
f"'{args.website_dir}/js/bootstrap.js'",
|
||||
f"'{args.website_dir}/js/sentry.js'",
|
||||
f"'{args.website_dir}/js/base.js'",
|
||||
f"'{args.website_dir}/js/index.js'",
|
||||
f"'{args.website_dir}/js/docsearch.js'",
|
||||
@ -145,6 +154,28 @@ def get_js_in(args):
|
||||
]
|
||||
|
||||
|
||||
def minify_file(path, css_digest, js_digest):
|
||||
if not (
|
||||
path.endswith('.html') or
|
||||
path.endswith('.css')
|
||||
):
|
||||
return
|
||||
|
||||
logging.info('Minifying %s', path)
|
||||
with open(path, 'rb') as f:
|
||||
content = f.read().decode('utf-8')
|
||||
if path.endswith('.html'):
|
||||
content = minify_html(content)
|
||||
content = content.replace('base.css?css_digest', f'base.css?{css_digest}')
|
||||
content = content.replace('base.js?js_digest', f'base.js?{js_digest}')
|
||||
elif path.endswith('.css'):
|
||||
content = cssmin.cssmin(content)
|
||||
elif path.endswith('.js'):
|
||||
content = jsmin.jsmin(content)
|
||||
with open(path, 'wb') as f:
|
||||
f.write(content.encode('utf-8'))
|
||||
|
||||
|
||||
def minify_website(args):
|
||||
css_in = ' '.join(get_css_in(args))
|
||||
css_out = f'{args.output_dir}/css/base.css'
|
||||
@ -190,28 +221,17 @@ def minify_website(args):
|
||||
|
||||
if args.minify:
|
||||
logging.info('Minifying website')
|
||||
for root, _, filenames in os.walk(args.output_dir):
|
||||
for filename in filenames:
|
||||
path = os.path.join(root, filename)
|
||||
if not (
|
||||
filename.endswith('.html') or
|
||||
filename.endswith('.css')
|
||||
):
|
||||
continue
|
||||
|
||||
logging.info('Minifying %s', path)
|
||||
with open(path, 'rb') as f:
|
||||
content = f.read().decode('utf-8')
|
||||
if filename.endswith('.html'):
|
||||
content = minify_html(content)
|
||||
content = content.replace('base.css?css_digest', f'base.css?{css_digest}')
|
||||
content = content.replace('base.js?js_digest', f'base.js?{js_digest}')
|
||||
elif filename.endswith('.css'):
|
||||
content = cssmin.cssmin(content)
|
||||
elif filename.endswith('.js'):
|
||||
content = jsmin.jsmin(content)
|
||||
with open(path, 'wb') as f:
|
||||
f.write(content.encode('utf-8'))
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
futures = []
|
||||
for root, _, filenames in os.walk(args.output_dir):
|
||||
for filename in filenames:
|
||||
path = os.path.join(root, filename)
|
||||
futures.append(executor.submit(minify_file, path, css_digest, js_digest))
|
||||
for future in futures:
|
||||
exc = future.exception()
|
||||
if exc:
|
||||
logging.error(exc)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def process_benchmark_results(args):
|
||||
|
@ -27,6 +27,7 @@ toc_title: Entegrasyonlar
|
||||
- Mesaj kuyrukları
|
||||
- [Kafka](https://kafka.apache.org)
|
||||
- [clickhouse\_sinker](https://github.com/housepower/clickhouse_sinker) (kullanma [Go client](https://github.com/ClickHouse/clickhouse-go/))
|
||||
- [stream-loader-clickhouse](https://github.com/adform/stream-loader)
|
||||
- Akış işleme
|
||||
- [Flink](https://flink.apache.org)
|
||||
- [flink-clickhouse-lavabo](https://github.com/ivi-ru/flink-clickhouse-sink)
|
||||
|
@ -1,7 +1,5 @@
|
||||
---
|
||||
machine_translated: true
|
||||
machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
|
||||
toc_folder_title: "\u53D1\u52A8\u673A"
|
||||
toc_folder_title: "\u5f15\u64ce"
|
||||
toc_priority: 25
|
||||
---
|
||||
|
||||
|
@ -19,6 +19,7 @@
|
||||
- 消息队列
|
||||
- [卡夫卡](https://kafka.apache.org)
|
||||
- [clickhouse\_sinker](https://github.com/housepower/clickhouse_sinker) (使用 [去客户](https://github.com/ClickHouse/clickhouse-go/))
|
||||
- [stream-loader-clickhouse](https://github.com/adform/stream-loader)
|
||||
- 流处理
|
||||
- [Flink](https://flink.apache.org)
|
||||
- [flink-clickhouse-sink](https://github.com/ivi-ru/flink-clickhouse-sink)
|
||||
|
@ -61,13 +61,12 @@
|
||||
#include <Common/ThreadFuzzer.h>
|
||||
#include "MySQLHandlerFactory.h"
|
||||
|
||||
#if USE_OPENCL
|
||||
#include "Common/BitonicSort.h"
|
||||
#endif
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include "config_core.h"
|
||||
# include "Common/config_version.h"
|
||||
# include "config_core.h"
|
||||
# include "Common/config_version.h"
|
||||
# if USE_OPENCL
|
||||
# include "Common/BitonicSort.h" // Y_IGNORE
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(OS_LINUX)
|
||||
@ -225,8 +224,10 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
registerDictionaries();
|
||||
registerDisks();
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
#if USE_OPENCL
|
||||
BitonicSort::getInstance().configure();
|
||||
BitonicSort::getInstance().configure();
|
||||
#endif
|
||||
#endif
|
||||
|
||||
CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::get());
|
||||
@ -379,7 +380,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
std::string tmp_path = config().getString("tmp_path", path + "tmp/");
|
||||
std::string tmp_policy = config().getString("tmp_policy", "");
|
||||
const VolumePtr & volume = global_context->setTemporaryStorage(tmp_path, tmp_policy);
|
||||
for (const DiskPtr & disk : volume->disks)
|
||||
for (const DiskPtr & disk : volume->getDisks())
|
||||
setupTmpPath(log, disk->getPath());
|
||||
}
|
||||
|
||||
|
@ -405,6 +405,9 @@
|
||||
</prometheus>
|
||||
-->
|
||||
|
||||
<!-- Lazy system.*_log table creation -->
|
||||
<!-- <system_tables_lazy_load>false</system_tables_lazy_load> -->
|
||||
|
||||
<!-- Query log. Used only for queries with setting log_queries = 1. -->
|
||||
<query_log>
|
||||
<!-- What table to insert data. If table is not exist, it will be created.
|
||||
|
@ -21,7 +21,7 @@
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include <Common/config.h>
|
||||
# if USE_OPENCL
|
||||
# include "Common/BitonicSort.h"
|
||||
# include "Common/BitonicSort.h" // Y_IGNORE
|
||||
# endif
|
||||
#else
|
||||
#undef USE_OPENCL
|
||||
@ -38,6 +38,7 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int PARAMETER_OUT_OF_BOUND;
|
||||
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
|
||||
extern const int OPENCL_ERROR;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
@ -120,6 +121,30 @@ namespace
|
||||
};
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ColumnVector<T>::getSpecialPermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res,
|
||||
IColumn::SpecialSort special_sort) const
|
||||
{
|
||||
if (special_sort == IColumn::SpecialSort::OPENCL_BITONIC)
|
||||
{
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
#if USE_OPENCL
|
||||
if (!limit || limit >= data.size())
|
||||
{
|
||||
res.resize(data.size());
|
||||
|
||||
if (data.empty() || BitonicSort::getInstance().sort(data, res, !reverse))
|
||||
return;
|
||||
}
|
||||
#else
|
||||
throw DB::Exception("'special_sort = bitonic' specified but OpenCL not available", DB::ErrorCodes::OPENCL_ERROR);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
getPermutation(reverse, limit, nan_direction_hint, res);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ColumnVector<T>::getPermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const
|
||||
{
|
||||
@ -144,12 +169,6 @@ void ColumnVector<T>::getPermutation(bool reverse, size_t limit, int nan_directi
|
||||
}
|
||||
else
|
||||
{
|
||||
#if USE_OPENCL
|
||||
/// If bitonic sort if specified as preferred than `nan_direction_hint` equals specific value 42.
|
||||
if (nan_direction_hint == 42 && BitonicSort::getInstance().sort(data, res, !reverse))
|
||||
return;
|
||||
#endif
|
||||
|
||||
/// A case for radix sort
|
||||
if constexpr (is_arithmetic_v<T> && !std::is_same_v<T, UInt128>)
|
||||
{
|
||||
|
@ -189,6 +189,8 @@ public:
|
||||
}
|
||||
|
||||
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override;
|
||||
void getSpecialPermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res,
|
||||
IColumn::SpecialSort) const override;
|
||||
|
||||
void reserve(size_t n) override
|
||||
{
|
||||
|
@ -245,6 +245,17 @@ public:
|
||||
*/
|
||||
virtual void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const = 0;
|
||||
|
||||
enum class SpecialSort
|
||||
{
|
||||
NONE = 0,
|
||||
OPENCL_BITONIC,
|
||||
};
|
||||
|
||||
virtual void getSpecialPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, SpecialSort) const
|
||||
{
|
||||
getPermutation(reverse, limit, nan_direction_hint, res);
|
||||
}
|
||||
|
||||
/** Copies each element according offsets parameter.
|
||||
* (i-th element should be copied offsets[i] - offsets[i - 1] times.)
|
||||
* It is necessary in ARRAY JOIN operation.
|
||||
@ -306,8 +317,9 @@ public:
|
||||
|
||||
static MutablePtr mutate(Ptr ptr)
|
||||
{
|
||||
MutablePtr res = ptr->shallowMutate();
|
||||
res->forEachSubcolumn([](WrappedPtr & subcolumn) { subcolumn = IColumn::mutate(std::move(subcolumn)); });
|
||||
MutablePtr res = ptr->shallowMutate(); /// Now use_count is 2.
|
||||
ptr.reset(); /// Reset use_count to 1.
|
||||
res->forEachSubcolumn([](WrappedPtr & subcolumn) { subcolumn = IColumn::mutate(std::move(subcolumn).detach()); });
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -11,25 +11,32 @@
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
#include <cstdint>
|
||||
#include <map>
|
||||
#include <type_traits>
|
||||
|
||||
#include <ext/bit_cast.h>
|
||||
#include <Core/Types.h>
|
||||
#include <Core/Defines.h>
|
||||
#include <Common/PODArray.h>
|
||||
#include <Columns/ColumnsCommon.h>
|
||||
|
||||
#include "oclBasics.cpp"
|
||||
#include "oclBasics.h"
|
||||
#include "bitonicSortKernels.cl"
|
||||
|
||||
class BitonicSort
|
||||
{
|
||||
public:
|
||||
using KernelType = OCL::KernelType;
|
||||
|
||||
enum Types
|
||||
{
|
||||
KernelInt8 = 0,
|
||||
KernelUInt8,
|
||||
KernelInt16,
|
||||
KernelUInt16,
|
||||
KernelInt32,
|
||||
KernelUInt32,
|
||||
KernelInt64,
|
||||
KernelUInt64,
|
||||
KernelMax
|
||||
};
|
||||
|
||||
static BitonicSort & getInstance()
|
||||
{
|
||||
@ -39,40 +46,50 @@ public:
|
||||
|
||||
/// Sorts given array in specified order. Returns `true` if given sequence was sorted, `false` otherwise.
|
||||
template <typename T>
|
||||
bool sort(const DB::PaddedPODArray<T> & data, DB::IColumn::Permutation & res, cl_uint sort_ascending)
|
||||
bool sort(const DB::PaddedPODArray<T> & data, DB::IColumn::Permutation & res, cl_uint sort_ascending [[maybe_unused]]) const
|
||||
{
|
||||
size_t s = data.size();
|
||||
|
||||
/// Getting the nearest power of 2.
|
||||
size_t power = 1;
|
||||
|
||||
if (s <= 8) power = 8;
|
||||
else while (power < s) power <<= 1;
|
||||
|
||||
/// Allocates more space for additional stubs to be added if needed.
|
||||
std::vector<T> pairs_content(power);
|
||||
std::vector<UInt32> pairs_indices(power);
|
||||
for (UInt32 i = 0; i < s; ++i)
|
||||
if constexpr (
|
||||
std::is_same_v<T, Int8> ||
|
||||
std::is_same_v<T, UInt8> ||
|
||||
std::is_same_v<T, Int16> ||
|
||||
std::is_same_v<T, UInt16> ||
|
||||
std::is_same_v<T, Int32> ||
|
||||
std::is_same_v<T, UInt32> ||
|
||||
std::is_same_v<T, Int64> ||
|
||||
std::is_same_v<T, UInt64>)
|
||||
{
|
||||
pairs_content[i] = data[i];
|
||||
pairs_indices[i] = i;
|
||||
}
|
||||
size_t data_size = data.size();
|
||||
|
||||
bool result = sort(pairs_content.data(), pairs_indices.data(), s, power - s, sort_ascending);
|
||||
/// Getting the nearest power of 2.
|
||||
size_t power = 8;
|
||||
while (power < data_size)
|
||||
power <<= 1;
|
||||
|
||||
if (!result) return false;
|
||||
/// Allocates more space for additional stubs to be added if needed.
|
||||
std::vector<T> pairs_content(power);
|
||||
std::vector<UInt32> pairs_indices(power);
|
||||
|
||||
for (size_t i = 0, shift = 0; i < power; ++i)
|
||||
{
|
||||
if (pairs_indices[i] >= s)
|
||||
memcpy(&pairs_content[0], &data[0], sizeof(T) * data_size);
|
||||
for (UInt32 i = 0; i < data_size; ++i)
|
||||
pairs_indices[i] = i;
|
||||
|
||||
fillWithStubs(pairs_content.data(), pairs_indices.data(), data_size, power - data_size, sort_ascending);
|
||||
sort(pairs_content.data(), pairs_indices.data(), power, sort_ascending);
|
||||
|
||||
for (size_t i = 0, shift = 0; i < power; ++i)
|
||||
{
|
||||
++shift;
|
||||
continue;
|
||||
if (pairs_indices[i] >= data_size)
|
||||
{
|
||||
++shift;
|
||||
continue;
|
||||
}
|
||||
res[i - shift] = pairs_indices[i];
|
||||
}
|
||||
res[i - shift] = pairs_indices[i];
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Creating a configuration instance with making all OpenCl required variables
|
||||
@ -84,29 +101,36 @@ public:
|
||||
cl_platform_id platform = OCL::getPlatformID(settings);
|
||||
cl_device_id device = OCL::getDeviceID(platform, settings);
|
||||
cl_context gpu_context = OCL::makeContext(device, settings);
|
||||
cl_command_queue command_queue = OCL::makeCommandQueue(device, gpu_context, settings);
|
||||
cl_command_queue command_queue = OCL::makeCommandQueue<2>(device, gpu_context, settings);
|
||||
|
||||
cl_program program = OCL::makeProgram(bitonic_sort_kernels, gpu_context, device, settings);
|
||||
|
||||
/// Creating kernels for each specified data type.
|
||||
cl_int error = 0;
|
||||
kernels.resize(KernelMax);
|
||||
|
||||
kernels["char"] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_char", &error),
|
||||
clReleaseKernel);
|
||||
kernels["uchar"] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_uchar", &error),
|
||||
clReleaseKernel);
|
||||
kernels["short"] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_short", &error),
|
||||
clReleaseKernel);
|
||||
kernels["ushort"] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_ushort", &error),
|
||||
clReleaseKernel);
|
||||
kernels["int"] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_int", &error),
|
||||
clReleaseKernel);
|
||||
kernels["uint"] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_uint", &error),
|
||||
clReleaseKernel);
|
||||
kernels["long"] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_long", &error),
|
||||
clReleaseKernel);
|
||||
kernels["ulong"] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_ulong", &error),
|
||||
clReleaseKernel);
|
||||
kernels[KernelInt8] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_char", &error), clReleaseKernel);
|
||||
OCL::checkError(error);
|
||||
|
||||
kernels[KernelUInt8] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_uchar", &error), clReleaseKernel);
|
||||
OCL::checkError(error);
|
||||
|
||||
kernels[KernelInt16] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_short", &error), clReleaseKernel);
|
||||
OCL::checkError(error);
|
||||
|
||||
kernels[KernelUInt16] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_ushort", &error), clReleaseKernel);
|
||||
OCL::checkError(error);
|
||||
|
||||
kernels[KernelInt32] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_int", &error), clReleaseKernel);
|
||||
OCL::checkError(error);
|
||||
|
||||
kernels[KernelUInt32] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_uint", &error), clReleaseKernel);
|
||||
OCL::checkError(error);
|
||||
|
||||
kernels[KernelInt64] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_long", &error), clReleaseKernel);
|
||||
OCL::checkError(error);
|
||||
|
||||
kernels[KernelUInt64] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_ulong", &error), clReleaseKernel);
|
||||
OCL::checkError(error);
|
||||
|
||||
configuration = std::shared_ptr<OCL::Configuration>(new OCL::Configuration(device, gpu_context, command_queue, program));
|
||||
@ -114,97 +138,24 @@ public:
|
||||
|
||||
private:
|
||||
/// Dictionary with kernels for each type from list: uchar, char, ushort, short, uint, int, ulong and long.
|
||||
std::map<std::string, std::shared_ptr<KernelType>> kernels;
|
||||
std::vector<std::shared_ptr<KernelType>> kernels;
|
||||
/// Current configuration with core OpenCL instances.
|
||||
std::shared_ptr<OCL::Configuration> configuration = nullptr;
|
||||
|
||||
/// Returns `true` if given sequence was sorted, `false` otherwise.
|
||||
template <typename T>
|
||||
bool sort(T * p_input, cl_uint * indices, cl_int array_size, cl_int number_of_stubs, cl_uint sort_ascending)
|
||||
{
|
||||
if (typeid(T).name() == typeid(cl_char).name())
|
||||
sort_char(reinterpret_cast<cl_char *>(p_input), indices, array_size, number_of_stubs, sort_ascending);
|
||||
else if (typeid(T) == typeid(cl_uchar))
|
||||
sort_uchar(reinterpret_cast<cl_uchar *>(p_input), indices, array_size, number_of_stubs, sort_ascending);
|
||||
else if (typeid(T) == typeid(cl_short))
|
||||
sort_short(reinterpret_cast<cl_short *>(p_input), indices, array_size, number_of_stubs, sort_ascending);
|
||||
else if (typeid(T) == typeid(cl_ushort))
|
||||
sort_ushort(reinterpret_cast<cl_ushort *>(p_input), indices, array_size, number_of_stubs, sort_ascending);
|
||||
else if (typeid(T) == typeid(cl_int))
|
||||
sort_int(reinterpret_cast<cl_int *>(p_input), indices, array_size, number_of_stubs, sort_ascending);
|
||||
else if (typeid(T) == typeid(cl_uint))
|
||||
sort_uint(reinterpret_cast<cl_uint *>(p_input), indices, array_size, number_of_stubs, sort_ascending);
|
||||
else if (typeid(T) == typeid(cl_long))
|
||||
sort_long(reinterpret_cast<cl_long *>(p_input), indices, array_size, number_of_stubs, sort_ascending);
|
||||
else if (typeid(T) == typeid(cl_ulong))
|
||||
sort_ulong(reinterpret_cast<cl_ulong *>(p_input), indices, array_size, number_of_stubs, sort_ascending);
|
||||
else
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Specific functions for each integer type.
|
||||
void sort_char(cl_char * p_input, cl_uint * indices, cl_int array_size, cl_int number_of_stubs, cl_uint sort_ascending)
|
||||
{
|
||||
cl_char stubs_value = sort_ascending ? CHAR_MAX : CHAR_MIN;
|
||||
fillWithStubs(number_of_stubs, stubs_value, p_input, indices, array_size);
|
||||
sort(kernels["char"].get(), p_input, indices, array_size + number_of_stubs, sort_ascending);
|
||||
}
|
||||
|
||||
void sort_uchar(cl_uchar * p_input, cl_uint * indices, cl_int array_size, cl_int number_of_stubs, cl_uint sort_ascending)
|
||||
{
|
||||
cl_uchar stubs_value = sort_ascending ? UCHAR_MAX : 0;
|
||||
fillWithStubs(number_of_stubs, stubs_value, p_input, indices, array_size);
|
||||
sort(kernels["uchar"].get(), p_input, indices, array_size + number_of_stubs, sort_ascending);
|
||||
}
|
||||
|
||||
void sort_short(cl_short * p_input, cl_uint * indices, cl_int array_size, cl_int number_of_stubs, cl_uint sort_ascending)
|
||||
{
|
||||
cl_short stubs_value = sort_ascending ? SHRT_MAX : SHRT_MIN;
|
||||
fillWithStubs(number_of_stubs, stubs_value, p_input, indices, array_size);
|
||||
sort(kernels["short"].get(), p_input, indices, array_size + number_of_stubs, sort_ascending);
|
||||
}
|
||||
|
||||
void sort_ushort(cl_ushort * p_input, cl_uint * indices, cl_int array_size, cl_int number_of_stubs, cl_uint sort_ascending)
|
||||
{
|
||||
cl_ushort stubs_value = sort_ascending ? USHRT_MAX : 0;
|
||||
fillWithStubs(number_of_stubs, stubs_value, p_input, indices, array_size);
|
||||
sort(kernels["ushort"].get(), p_input, indices, array_size + number_of_stubs, sort_ascending);
|
||||
}
|
||||
|
||||
void sort_int(cl_int * p_input, cl_uint * indices, cl_int array_size, cl_int number_of_stubs, cl_uint sort_ascending)
|
||||
{
|
||||
cl_int stubs_value = sort_ascending ? INT_MAX : INT_MIN;
|
||||
fillWithStubs(number_of_stubs, stubs_value, p_input, indices, array_size);
|
||||
sort(kernels["int"].get(), p_input, indices, array_size + number_of_stubs, sort_ascending);
|
||||
}
|
||||
|
||||
void sort_uint(cl_uint * p_input, cl_uint * indices, cl_int array_size, cl_int number_of_stubs, cl_uint sort_ascending)
|
||||
{
|
||||
cl_uint stubs_value = sort_ascending ? UINT_MAX : 0;
|
||||
fillWithStubs(number_of_stubs, stubs_value, p_input, indices, array_size);
|
||||
sort(kernels["uint"].get(), p_input, indices, array_size + number_of_stubs, sort_ascending);
|
||||
}
|
||||
|
||||
void sort_long(cl_long * p_input, cl_uint * indices, cl_int array_size, cl_int number_of_stubs, cl_uint sort_ascending)
|
||||
{
|
||||
cl_long stubs_value = sort_ascending ? LONG_MAX : LONG_MIN;
|
||||
fillWithStubs(number_of_stubs, stubs_value, p_input, indices, array_size);
|
||||
sort(kernels["long"].get(), p_input, indices, array_size + number_of_stubs, sort_ascending);
|
||||
}
|
||||
|
||||
void sort_ulong(cl_ulong * p_input, cl_uint * indices, cl_int array_size, cl_int number_of_stubs, cl_uint sort_ascending)
|
||||
{
|
||||
cl_ulong stubs_value = sort_ascending ? ULONG_MAX : 0;
|
||||
fillWithStubs(number_of_stubs, stubs_value, p_input, indices, array_size);
|
||||
sort(kernels["ulong"].get(), p_input, indices, array_size + number_of_stubs, sort_ascending);
|
||||
}
|
||||
cl_kernel getKernel(Int8) const { return kernels[KernelInt8].get(); }
|
||||
cl_kernel getKernel(UInt8) const { return kernels[KernelUInt8].get(); }
|
||||
cl_kernel getKernel(Int16) const { return kernels[KernelInt16].get(); }
|
||||
cl_kernel getKernel(UInt16) const { return kernels[KernelUInt16].get(); }
|
||||
cl_kernel getKernel(Int32) const { return kernels[KernelInt32].get(); }
|
||||
cl_kernel getKernel(UInt32) const { return kernels[KernelUInt32].get(); }
|
||||
cl_kernel getKernel(Int64) const { return kernels[KernelInt64].get(); }
|
||||
cl_kernel getKernel(UInt64) const { return kernels[KernelUInt64].get(); }
|
||||
|
||||
/// Sorts p_input inplace with indices. Works only with arrays which size equals to power of two.
|
||||
template <class T>
|
||||
void sort(cl_kernel kernel, T * p_input, cl_uint * indices, cl_int array_size, cl_uint sort_ascending)
|
||||
void sort(T * p_input, cl_uint * indices, cl_int array_size, cl_uint sort_ascending) const
|
||||
{
|
||||
cl_kernel kernel = getKernel(T(0));
|
||||
cl_int error = CL_SUCCESS;
|
||||
cl_int num_stages = 0;
|
||||
|
||||
@ -246,7 +197,7 @@ private:
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void configureKernel(cl_kernel kernel, int number_of_argument, void * source)
|
||||
void configureKernel(cl_kernel kernel, int number_of_argument, void * source) const
|
||||
{
|
||||
cl_int error = clSetKernelArg(kernel, number_of_argument, sizeof(T), source);
|
||||
OCL::checkError(error);
|
||||
@ -254,9 +205,9 @@ private:
|
||||
|
||||
/// Fills given sequences from `arraySize` index with `numberOfStubs` values.
|
||||
template <class T>
|
||||
void fillWithStubs(cl_int number_of_stubs, T value, T * p_input,
|
||||
cl_uint * indices, cl_int array_size)
|
||||
void fillWithStubs(T * p_input, cl_uint * indices, cl_int array_size, cl_int number_of_stubs, cl_uint sort_ascending) const
|
||||
{
|
||||
T value = sort_ascending ? std::numeric_limits<T>::max() : std::numeric_limits<T>::min();
|
||||
for (cl_int index = 0; index < number_of_stubs; ++index)
|
||||
{
|
||||
p_input[array_size + index] = value;
|
||||
@ -264,7 +215,7 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
BitonicSort() {}
|
||||
BitonicSort(BitonicSort const &);
|
||||
void operator=(BitonicSort const &);
|
||||
BitonicSort() = default;
|
||||
BitonicSort(BitonicSort const &) = delete;
|
||||
void operator = (BitonicSort const &) = delete;
|
||||
};
|
||||
|
@ -217,6 +217,9 @@ protected:
|
||||
operator const immutable_ptr<T> & () const { return value; }
|
||||
operator immutable_ptr<T> & () { return value; }
|
||||
|
||||
/// Get internal immutable ptr. Does not change internal use counter.
|
||||
immutable_ptr<T> detach() && { return std::move(value); }
|
||||
|
||||
operator bool() const { return value != nullptr; }
|
||||
bool operator! () const { return value == nullptr; }
|
||||
|
||||
|
@ -496,6 +496,8 @@ namespace ErrorCodes
|
||||
extern const int OPENCL_ERROR = 522;
|
||||
extern const int UNKNOWN_ROW_POLICY = 523;
|
||||
extern const int ALTER_OF_COLUMN_IS_FORBIDDEN = 524;
|
||||
extern const int INCORRECT_DISK_INDEX = 525;
|
||||
extern const int UNKNOWN_VOLUME_TYPE = 526;
|
||||
|
||||
extern const int KEEPER_EXCEPTION = 999;
|
||||
extern const int POCO_EXCEPTION = 1000;
|
||||
|
@ -151,16 +151,22 @@ public:
|
||||
LOG_TRACE(log, BridgeHelperMixin::serviceAlias() + " is not running, will try to start it");
|
||||
startBridge();
|
||||
bool started = false;
|
||||
for (size_t counter : ext::range(1, 20))
|
||||
|
||||
uint64_t milliseconds_to_wait = 10; /// Exponential backoff
|
||||
uint64_t counter = 0;
|
||||
while (milliseconds_to_wait < 10000)
|
||||
{
|
||||
++counter;
|
||||
LOG_TRACE(log, "Checking " + BridgeHelperMixin::serviceAlias() + " is running, try " << counter);
|
||||
if (checkBridgeIsRunning())
|
||||
{
|
||||
started = true;
|
||||
break;
|
||||
}
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(milliseconds_to_wait));
|
||||
milliseconds_to_wait *= 2;
|
||||
}
|
||||
|
||||
if (!started)
|
||||
throw Exception(BridgeHelperMixin::getName() + "BridgeHelper: " + BridgeHelperMixin::serviceAlias() + " is not responding",
|
||||
ErrorCodes::EXTERNAL_SERVER_IS_NOT_RESPONDING);
|
||||
|
@ -1,3 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/config.h>
|
||||
#if USE_OPENCL
|
||||
|
||||
@ -15,24 +17,18 @@
|
||||
#include <Core/Types.h>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
#ifndef CL_VERSION_2_0
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
|
||||
#endif
|
||||
|
||||
|
||||
using KernelType = std::remove_reference<decltype(*cl_kernel())>::type;
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int OPENCL_ERROR;
|
||||
}
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int OPENCL_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
struct OCL
|
||||
{
|
||||
using KernelType = std::remove_reference<decltype(*cl_kernel())>::type;
|
||||
|
||||
/**
|
||||
* Structure which represents the most essential settings of common OpenCl entities.
|
||||
@ -209,7 +205,7 @@ struct OCL
|
||||
static void checkError(cl_int error)
|
||||
{
|
||||
if (error != CL_SUCCESS)
|
||||
throw DB::Exception("OpenCL error " + opencl_error_to_str(error), DB::ErrorCodes::OPENCL_ERROR);
|
||||
throw DB::Exception("OpenCL error: " + opencl_error_to_str(error), DB::ErrorCodes::OPENCL_ERROR);
|
||||
}
|
||||
|
||||
|
||||
@ -221,22 +217,18 @@ struct OCL
|
||||
cl_int error = clGetPlatformIDs(settings.number_of_platform_entries, &platform,
|
||||
settings.number_of_available_platforms);
|
||||
checkError(error);
|
||||
|
||||
return platform;
|
||||
}
|
||||
|
||||
|
||||
static cl_device_id getDeviceID(cl_platform_id & platform, const Settings & settings)
|
||||
{
|
||||
cl_device_id device;
|
||||
cl_int error = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, settings.number_of_devices_entries,
|
||||
&device, settings.number_of_available_devices);
|
||||
OCL::checkError(error);
|
||||
|
||||
return device;
|
||||
}
|
||||
|
||||
|
||||
static cl_context makeContext(cl_device_id & device, const Settings & settings)
|
||||
{
|
||||
cl_int error;
|
||||
@ -244,32 +236,43 @@ struct OCL
|
||||
&device, settings.context_callback, settings.context_callback_data,
|
||||
&error);
|
||||
OCL::checkError(error);
|
||||
|
||||
return gpu_context;
|
||||
}
|
||||
|
||||
|
||||
template <int version>
|
||||
static cl_command_queue makeCommandQueue(cl_device_id & device, cl_context & context, const Settings & settings [[maybe_unused]])
|
||||
{
|
||||
cl_int error;
|
||||
#ifdef CL_USE_DEPRECATED_OPENCL_1_2_APIS
|
||||
cl_command_queue command_queue = clCreateCommandQueue(context, device, settings.command_queue_properties, &error);
|
||||
#else
|
||||
cl_command_queue command_queue = clCreateCommandQueueWithProperties(context, device, nullptr, &error);
|
||||
#endif
|
||||
OCL::checkError(error);
|
||||
cl_command_queue command_queue;
|
||||
|
||||
if constexpr (version == 1)
|
||||
{
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
||||
command_queue = clCreateCommandQueue(context, device, settings.command_queue_properties, &error);
|
||||
#pragma GCC diagnostic pop
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef CL_VERSION_2_0
|
||||
command_queue = clCreateCommandQueueWithProperties(context, device, nullptr, &error);
|
||||
#else
|
||||
throw DB::Exception("Binary is built with OpenCL version < 2.0", DB::ErrorCodes::OPENCL_ERROR);
|
||||
#endif
|
||||
}
|
||||
|
||||
OCL::checkError(error);
|
||||
return command_queue;
|
||||
}
|
||||
|
||||
|
||||
static cl_program makeProgram(const char * source_code, cl_context context,
|
||||
cl_device_id device_id, const Settings & settings)
|
||||
{
|
||||
cl_int error = 0;
|
||||
size_t source_size = strlen(source_code);
|
||||
|
||||
cl_program program = clCreateProgramWithSource(context, settings.number_of_program_source_pointers, &source_code, &source_size, &error);
|
||||
cl_program program = clCreateProgramWithSource(context, settings.number_of_program_source_pointers,
|
||||
&source_code, &source_size, &error);
|
||||
checkError(error);
|
||||
|
||||
error = clBuildProgram(program, settings.number_of_devices_entries, &device_id, settings.build_options,
|
||||
@ -291,39 +294,30 @@ struct OCL
|
||||
}
|
||||
|
||||
checkError(error);
|
||||
|
||||
return program;
|
||||
}
|
||||
|
||||
|
||||
/// Configuring buffer for given input data
|
||||
|
||||
template<typename K>
|
||||
static cl_mem createBuffer(K * p_input, cl_int array_size, cl_context context,
|
||||
cl_int elements_size = sizeof(K))
|
||||
static cl_mem createBuffer(K * p_input, cl_int array_size, cl_context context, cl_int elements_size = sizeof(K))
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
cl_mem cl_input_buffer =
|
||||
clCreateBuffer
|
||||
(
|
||||
cl_mem cl_input_buffer = clCreateBuffer(
|
||||
context,
|
||||
CL_MEM_USE_HOST_PTR,
|
||||
zeroCopySizeAlignment(elements_size * array_size),
|
||||
p_input,
|
||||
&error
|
||||
);
|
||||
&error);
|
||||
checkError(error);
|
||||
|
||||
return cl_input_buffer;
|
||||
}
|
||||
|
||||
|
||||
static size_t zeroCopySizeAlignment(size_t required_size)
|
||||
{
|
||||
return required_size + (~required_size + 1) % 64;
|
||||
}
|
||||
|
||||
|
||||
/// Manipulating with common OpenCL variables.
|
||||
|
||||
static void finishCommandQueue(cl_command_queue command_queue)
|
||||
@ -333,10 +327,8 @@ struct OCL
|
||||
OCL::checkError(error);
|
||||
}
|
||||
|
||||
|
||||
template<class T>
|
||||
static void releaseData(T * origin, cl_int array_size, cl_mem cl_buffer,
|
||||
cl_command_queue command_queue, size_t offset = 0)
|
||||
static void releaseData(T * origin, cl_int array_size, cl_mem cl_buffer, cl_command_queue command_queue, size_t offset = 0)
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
@ -357,7 +349,6 @@ struct OCL
|
||||
error = clReleaseMemObject(cl_buffer);
|
||||
checkError(error);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
#endif
|
@ -37,7 +37,7 @@ target_link_libraries (radix_sort PRIVATE clickhouse_common_io)
|
||||
|
||||
if (USE_OPENCL)
|
||||
add_executable (bitonic_sort bitonic_sort.cpp)
|
||||
target_link_libraries (bitonic_sort PRIVATE clickhouse_common_io ${OPENCL_LINKER_FLAGS})
|
||||
target_link_libraries (bitonic_sort PRIVATE clickhouse_common_io ${OPENCL_LINKER_FLAGS} ${OpenCL_LIBRARIES})
|
||||
endif ()
|
||||
|
||||
add_executable (arena_with_free_lists arena_with_free_lists.cpp)
|
||||
|
@ -1,8 +1,6 @@
|
||||
#include <Common/config.h>
|
||||
#include <iostream>
|
||||
|
||||
#if USE_OPENCL
|
||||
|
||||
#if !defined(__APPLE__) && !defined(__FreeBSD__)
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
@ -16,13 +14,10 @@
|
||||
#include "Common/BitonicSort.h"
|
||||
|
||||
|
||||
using Key = cl_ulong;
|
||||
|
||||
|
||||
/// Generates vector of size 8 for testing.
|
||||
/// Vector contains max possible value, min possible value and duplicate values.
|
||||
template <class Type>
|
||||
static void generateTest(std::vector<Type>& data, Type min_value, Type max_value)
|
||||
static void generateTest(std::vector<Type> & data, Type min_value, Type max_value)
|
||||
{
|
||||
int size = 10;
|
||||
|
||||
@ -62,8 +57,7 @@ static void check(const std::vector<size_t> & indices, bool reverse = true)
|
||||
|
||||
|
||||
template <class Type>
|
||||
static void sortBitonicSortWithPodArrays(const std::vector<Type>& data,
|
||||
std::vector<size_t> & indices, bool ascending = true)
|
||||
static void sortBitonicSortWithPodArrays(const std::vector<Type> & data, std::vector<size_t> & indices, bool ascending = true)
|
||||
{
|
||||
DB::PaddedPODArray<Type> pod_array_data = DB::PaddedPODArray<Type>(data.size());
|
||||
DB::IColumn::Permutation pod_array_indices = DB::IColumn::Permutation(data.size());
|
||||
@ -74,7 +68,6 @@ static void sortBitonicSortWithPodArrays(const std::vector<Type>& data,
|
||||
*(pod_array_indices.data() + index) = index;
|
||||
}
|
||||
|
||||
BitonicSort::getInstance().configure();
|
||||
BitonicSort::getInstance().sort(pod_array_data, pod_array_indices, ascending);
|
||||
|
||||
for (size_t index = 0; index < data.size(); ++index)
|
||||
@ -83,7 +76,7 @@ static void sortBitonicSortWithPodArrays(const std::vector<Type>& data,
|
||||
|
||||
|
||||
template <class Type>
|
||||
static void testBitonicSort(std::string test_name, Type min_value, Type max_value)
|
||||
static void testBitonicSort(const std::string & test_name, Type min_value, Type max_value)
|
||||
{
|
||||
std::cerr << test_name << std::endl;
|
||||
|
||||
@ -102,147 +95,80 @@ static void testBitonicSort(std::string test_name, Type min_value, Type max_valu
|
||||
|
||||
static void straightforwardTests()
|
||||
{
|
||||
testBitonicSort<cl_char>("Test 01: cl_char.", CHAR_MIN, CHAR_MAX);
|
||||
testBitonicSort<cl_uchar>("Test 02: cl_uchar.", 0, UCHAR_MAX);
|
||||
testBitonicSort<cl_short>("Test 03: cl_short.", SHRT_MIN, SHRT_MAX);
|
||||
testBitonicSort<cl_ushort>("Test 04: cl_ushort.", 0, USHRT_MAX);
|
||||
testBitonicSort<cl_int>("Test 05: cl_int.", INT_MIN, INT_MAX);
|
||||
testBitonicSort<cl_uint >("Test 06: cl_uint.", 0, UINT_MAX);
|
||||
testBitonicSort<cl_long >("Test 07: cl_long.", LONG_MIN, LONG_MAX);
|
||||
testBitonicSort<cl_ulong >("Test 08: cl_ulong.", 0, ULONG_MAX);
|
||||
testBitonicSort<DB::Int8>("Test 01: Int8.", CHAR_MIN, CHAR_MAX);
|
||||
testBitonicSort<DB::UInt8>("Test 02: UInt8.", 0, UCHAR_MAX);
|
||||
testBitonicSort<DB::Int16>("Test 03: Int16.", SHRT_MIN, SHRT_MAX);
|
||||
testBitonicSort<DB::UInt16>("Test 04: UInt16.", 0, USHRT_MAX);
|
||||
testBitonicSort<DB::Int32>("Test 05: Int32.", INT_MIN, INT_MAX);
|
||||
testBitonicSort<DB::UInt32>("Test 06: UInt32.", 0, UINT_MAX);
|
||||
testBitonicSort<DB::Int64>("Test 07: Int64.", LONG_MIN, LONG_MAX);
|
||||
testBitonicSort<DB::UInt64>("Test 08: UInt64.", 0, ULONG_MAX);
|
||||
}
|
||||
|
||||
|
||||
static void NO_INLINE sort1(Key * data, size_t size)
|
||||
template <typename T>
|
||||
static void bitonicSort(std::vector<T> & data)
|
||||
{
|
||||
std::sort(data, data + size);
|
||||
}
|
||||
|
||||
|
||||
static void NO_INLINE sort2(std::vector<Key> & data, std::vector<size_t> & indices)
|
||||
{
|
||||
BitonicSort::getInstance().configure();
|
||||
size_t size = data.size();
|
||||
std::vector<size_t> indices(size);
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
indices[i] = i;
|
||||
|
||||
sortBitonicSortWithPodArrays(data, indices);
|
||||
|
||||
std::vector<Key> result(data.size());
|
||||
for (size_t index = 0; index < data.size(); ++index)
|
||||
result[index] = data[indices[index]];
|
||||
std::vector<T> result(size);
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
result[i] = data[indices[i]];
|
||||
|
||||
data = std::move(result);
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char ** argv)
|
||||
template <typename T>
|
||||
static bool checkSort(const std::vector<T> & data, size_t size)
|
||||
{
|
||||
straightforwardTests();
|
||||
std::vector<T> copy1(data.begin(), data.begin() + size);
|
||||
std::vector<T> copy2(data.begin(), data.begin() + size);
|
||||
|
||||
if (argc < 3)
|
||||
{
|
||||
std::cerr << "Not enough arguments were passed\n";
|
||||
return 1;
|
||||
}
|
||||
std::sort(copy1.data(), copy1.data() + size);
|
||||
bitonicSort<T>(copy2);
|
||||
|
||||
size_t n = DB::parse<size_t>(argv[1]);
|
||||
size_t method = DB::parse<size_t>(argv[2]);
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
if (copy1[i] != copy2[i])
|
||||
return false;
|
||||
|
||||
std::vector<Key> data(n);
|
||||
std::vector<size_t> indices(n);
|
||||
|
||||
{
|
||||
Stopwatch watch;
|
||||
|
||||
for (auto & elem : data)
|
||||
elem = static_cast<Key>(rand());
|
||||
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
indices[i] = i;
|
||||
|
||||
watch.stop();
|
||||
double elapsed = watch.elapsedSeconds();
|
||||
std::cerr
|
||||
<< "Filled in " << elapsed
|
||||
<< " (" << n / elapsed << " elem/sec., "
|
||||
<< n * sizeof(Key) / elapsed / 1048576 << " MB/sec.)"
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
if (n <= 100)
|
||||
{
|
||||
std::cerr << std::endl;
|
||||
for (const auto & elem : data)
|
||||
std::cerr << elem << ' ';
|
||||
std::cerr << std::endl;
|
||||
for (const auto & index : indices)
|
||||
std::cerr << index << ' ';
|
||||
std::cerr << std::endl;
|
||||
}
|
||||
|
||||
{
|
||||
Stopwatch watch;
|
||||
|
||||
if (method == 1) sort1(data.data(), n);
|
||||
if (method == 2) sort2(data, indices);
|
||||
|
||||
watch.stop();
|
||||
double elapsed = watch.elapsedSeconds();
|
||||
std::cerr
|
||||
<< "Sorted in " << elapsed
|
||||
<< " (" << n / elapsed << " elem/sec., "
|
||||
<< n * sizeof(Key) / elapsed / 1048576 << " MB/sec.)"
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
{
|
||||
Stopwatch watch;
|
||||
|
||||
size_t i = 1;
|
||||
while (i < n)
|
||||
{
|
||||
if (!(data[i - 1] <= data[i]))
|
||||
break;
|
||||
++i;
|
||||
}
|
||||
|
||||
watch.stop();
|
||||
double elapsed = watch.elapsedSeconds();
|
||||
std::cerr
|
||||
<< "Checked in " << elapsed
|
||||
<< " (" << n / elapsed << " elem/sec., "
|
||||
<< n * sizeof(Key) / elapsed / 1048576 << " MB/sec.)"
|
||||
<< std::endl
|
||||
<< "Result: " << (i == n ? "Ok." : "Fail!") << std::endl;
|
||||
}
|
||||
|
||||
if (n <= 1000)
|
||||
{
|
||||
std::cerr << std::endl;
|
||||
|
||||
std::cerr << data[0] << ' ';
|
||||
for (size_t i = 1; i < n; ++i)
|
||||
{
|
||||
if (!(data[i - 1] <= data[i]))
|
||||
std::cerr << "*** ";
|
||||
std::cerr << data[i] << ' ';
|
||||
}
|
||||
|
||||
std::cerr << std::endl;
|
||||
|
||||
for (const auto & index : indices)
|
||||
std::cerr << index << ' ';
|
||||
std::cerr << std::endl;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
int main()
|
||||
{
|
||||
std::cerr << "Openc CL disabled.";
|
||||
BitonicSort::getInstance().configure();
|
||||
|
||||
straightforwardTests();
|
||||
|
||||
size_t size = 1100;
|
||||
std::vector<int> data(size);
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
data[i] = rand();
|
||||
|
||||
for (size_t i = 0; i < 128; ++i)
|
||||
{
|
||||
if (!checkSort<int>(data, i))
|
||||
{
|
||||
std::cerr << "fail at length " << i << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 128; i < size; i += 7)
|
||||
{
|
||||
if (!checkSort<int>(data, i))
|
||||
{
|
||||
std::cerr << "fail at length " << i << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -52,6 +52,8 @@ struct Settings : public SettingsCollection<Settings>
|
||||
M(SettingUInt64, max_insert_block_size, DEFAULT_INSERT_BLOCK_SIZE, "The maximum block size for insertion, if we control the creation of blocks for insertion.", 0) \
|
||||
M(SettingUInt64, min_insert_block_size_rows, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough.", 0) \
|
||||
M(SettingUInt64, min_insert_block_size_bytes, (DEFAULT_INSERT_BLOCK_SIZE * 256), "Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough.", 0) \
|
||||
M(SettingUInt64, min_insert_block_size_rows_for_materialized_views, 0, "Like min_insert_block_size_rows, but applied only during pushing to MATERIALIZED VIEW (default: min_insert_block_size_rows)", 0) \
|
||||
M(SettingUInt64, min_insert_block_size_bytes_for_materialized_views, 0, "Like min_insert_block_size_bytes, but applied only during pushing to MATERIALIZED VIEW (default: min_insert_block_size_bytes)", 0) \
|
||||
M(SettingUInt64, max_joined_block_size_rows, DEFAULT_BLOCK_SIZE, "Maximum block size for JOIN result (if join algorithm supports it). 0 means unlimited.", 0) \
|
||||
M(SettingUInt64, max_insert_threads, 0, "The maximum number of threads to execute the INSERT SELECT query. Values 0 or 1 means that INSERT SELECT is not run in parallel. Higher values will lead to higher memory usage. Parallel INSERT SELECT has effect only if the SELECT part is run on parallel, see 'max_threads' setting.", 0) \
|
||||
M(SettingUInt64, max_final_threads, 16, "The maximum number of threads to read from table with FINAL.", 0) \
|
||||
|
@ -8,3 +8,4 @@
|
||||
#cmakedefine01 USE_EMBEDDED_COMPILER
|
||||
#cmakedefine01 USE_INTERNAL_LLVM_LIBRARY
|
||||
#cmakedefine01 USE_SSL
|
||||
#cmakedefine01 USE_OPENCL
|
||||
|
@ -40,10 +40,20 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
|
||||
/// We need special context for materialized views insertions
|
||||
if (!dependencies.empty())
|
||||
{
|
||||
views_context = std::make_unique<Context>(context);
|
||||
select_context = std::make_unique<Context>(context);
|
||||
insert_context = std::make_unique<Context>(context);
|
||||
|
||||
const auto & insert_settings = insert_context->getSettingsRef();
|
||||
|
||||
// Do not deduplicate insertions into MV if the main insertion is Ok
|
||||
if (disable_deduplication_for_children)
|
||||
views_context->setSetting("insert_deduplicate", false);
|
||||
insert_context->setSetting("insert_deduplicate", false);
|
||||
|
||||
// Separate min_insert_block_size_rows/min_insert_block_size_bytes for children
|
||||
if (insert_settings.min_insert_block_size_rows_for_materialized_views.changed)
|
||||
insert_context->setSetting("min_insert_block_size_rows", insert_settings.min_insert_block_size_rows_for_materialized_views.value);
|
||||
if (insert_settings.min_insert_block_size_bytes_for_materialized_views.changed)
|
||||
insert_context->setSetting("min_insert_block_size_bytes", insert_settings.min_insert_block_size_bytes_for_materialized_views.value);
|
||||
}
|
||||
|
||||
for (const auto & database_table : dependencies)
|
||||
@ -67,7 +77,7 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
|
||||
insert->table_id = inner_table_id;
|
||||
|
||||
/// Get list of columns we get from select query.
|
||||
auto header = InterpreterSelectQuery(query, *views_context, SelectQueryOptions().analyze())
|
||||
auto header = InterpreterSelectQuery(query, *select_context, SelectQueryOptions().analyze())
|
||||
.getSampleBlock();
|
||||
|
||||
/// Insert only columns returned by select.
|
||||
@ -81,14 +91,14 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
|
||||
insert->columns = std::move(list);
|
||||
|
||||
ASTPtr insert_query_ptr(insert.release());
|
||||
InterpreterInsertQuery interpreter(insert_query_ptr, *views_context);
|
||||
InterpreterInsertQuery interpreter(insert_query_ptr, *insert_context);
|
||||
BlockIO io = interpreter.execute();
|
||||
out = io.out;
|
||||
}
|
||||
else if (dynamic_cast<const StorageLiveView *>(dependent_table.get()))
|
||||
out = std::make_shared<PushingToViewsBlockOutputStream>(dependent_table, *views_context, ASTPtr(), true);
|
||||
out = std::make_shared<PushingToViewsBlockOutputStream>(dependent_table, *insert_context, ASTPtr(), true);
|
||||
else
|
||||
out = std::make_shared<PushingToViewsBlockOutputStream>(dependent_table, *views_context, ASTPtr());
|
||||
out = std::make_shared<PushingToViewsBlockOutputStream>(dependent_table, *insert_context, ASTPtr());
|
||||
|
||||
views.emplace_back(ViewInfo{std::move(query), database_table, std::move(out), nullptr});
|
||||
}
|
||||
@ -258,7 +268,7 @@ void PushingToViewsBlockOutputStream::process(const Block & block, size_t view_n
|
||||
/// but it will contain single block (that is INSERT-ed into main table).
|
||||
/// InterpreterSelectQuery will do processing of alias columns.
|
||||
|
||||
Context local_context = *views_context;
|
||||
Context local_context = *select_context;
|
||||
local_context.addViewSource(
|
||||
StorageValues::create(
|
||||
storage->getStorageID(), storage->getColumns(), block, storage->getVirtuals()));
|
||||
|
@ -44,7 +44,8 @@ private:
|
||||
};
|
||||
|
||||
std::vector<ViewInfo> views;
|
||||
std::unique_ptr<Context> views_context;
|
||||
std::unique_ptr<Context> select_context;
|
||||
std::unique_ptr<Context> insert_context;
|
||||
|
||||
void process(const Block & block, size_t view_num);
|
||||
};
|
||||
|
@ -376,8 +376,10 @@ void registerDataTypeString(DataTypeFactory & factory)
|
||||
/// These synonyms are added for compatibility.
|
||||
|
||||
factory.registerAlias("CHAR", "String", DataTypeFactory::CaseInsensitive);
|
||||
factory.registerAlias("NCHAR", "String", DataTypeFactory::CaseInsensitive);
|
||||
factory.registerAlias("CHARACTER", "String", DataTypeFactory::CaseInsensitive);
|
||||
factory.registerAlias("VARCHAR", "String", DataTypeFactory::CaseInsensitive);
|
||||
factory.registerAlias("NVARCHAR", "String", DataTypeFactory::CaseInsensitive);
|
||||
factory.registerAlias("VARCHAR2", "String", DataTypeFactory::CaseInsensitive); /// Oracle
|
||||
factory.registerAlias("TEXT", "String", DataTypeFactory::CaseInsensitive);
|
||||
factory.registerAlias("TINYTEXT", "String", DataTypeFactory::CaseInsensitive);
|
||||
|
@ -12,11 +12,13 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int UNKNOWN_ELEMENT_IN_CONFIG;
|
||||
extern const int EXCESSIVE_ELEMENT_IN_CONFIG;
|
||||
extern const int PATH_ACCESS_DENIED;
|
||||
extern const int INCORRECT_DISK_INDEX;
|
||||
}
|
||||
|
||||
std::mutex DiskLocal::reservation_mutex;
|
||||
@ -34,7 +36,9 @@ public:
|
||||
|
||||
UInt64 getSize() const override { return size; }
|
||||
|
||||
DiskPtr getDisk() const override { return disk; }
|
||||
DiskPtr getDisk(size_t i) const override;
|
||||
|
||||
Disks getDisks() const override { return {disk}; }
|
||||
|
||||
void update(UInt64 new_size) override;
|
||||
|
||||
@ -282,6 +286,15 @@ void DiskLocal::copy(const String & from_path, const std::shared_ptr<IDisk> & to
|
||||
IDisk::copy(from_path, to_disk, to_path); /// Copy files through buffers.
|
||||
}
|
||||
|
||||
DiskPtr DiskLocalReservation::getDisk(size_t i) const
|
||||
{
|
||||
if (i != 0)
|
||||
{
|
||||
throw Exception("Can't use i != 0 with single disk reservation", ErrorCodes::INCORRECT_DISK_INDEX);
|
||||
}
|
||||
return disk;
|
||||
}
|
||||
|
||||
void DiskLocalReservation::update(UInt64 new_size)
|
||||
{
|
||||
std::lock_guard lock(DiskLocal::reservation_mutex);
|
||||
|
@ -55,7 +55,7 @@ DiskSelectorPtr DiskSelector::updateFromConfig(
|
||||
|
||||
constexpr auto default_disk_name = "default";
|
||||
std::set<String> old_disks_minus_new_disks;
|
||||
for (const auto & [disk_name, _] : result->disks)
|
||||
for (const auto & [disk_name, _] : result->getDisksMap())
|
||||
{
|
||||
old_disks_minus_new_disks.insert(disk_name);
|
||||
}
|
||||
@ -65,10 +65,10 @@ DiskSelectorPtr DiskSelector::updateFromConfig(
|
||||
if (!std::all_of(disk_name.begin(), disk_name.end(), isWordCharASCII))
|
||||
throw Exception("Disk name can contain only alphanumeric and '_' (" + disk_name + ")", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
|
||||
|
||||
if (result->disks.count(disk_name) == 0)
|
||||
if (result->getDisksMap().count(disk_name) == 0)
|
||||
{
|
||||
auto disk_config_prefix = config_prefix + "." + disk_name;
|
||||
result->disks.emplace(disk_name, factory.create(disk_name, config, disk_config_prefix, context));
|
||||
result->addToDiskMap(disk_name, factory.create(disk_name, config, disk_config_prefix, context));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -29,6 +29,10 @@ public:
|
||||
|
||||
/// Get all disks with names
|
||||
const auto & getDisksMap() const { return disks; }
|
||||
void addToDiskMap(String name, DiskPtr disk)
|
||||
{
|
||||
disks.emplace(name, disk);
|
||||
}
|
||||
|
||||
private:
|
||||
std::map<String, DiskPtr> disks;
|
||||
|
@ -206,8 +206,11 @@ public:
|
||||
/// Get reservation size.
|
||||
virtual UInt64 getSize() const = 0;
|
||||
|
||||
/// Get disk where reservation take place.
|
||||
virtual DiskPtr getDisk() const = 0;
|
||||
/// Get i-th disk where reservation take place.
|
||||
virtual DiskPtr getDisk(size_t i = 0) const = 0;
|
||||
|
||||
/// Get all disks, used in reservation
|
||||
virtual Disks getDisks() const = 0;
|
||||
|
||||
/// Changes amount of reserved space.
|
||||
virtual void update(UInt64 new_size) = 0;
|
||||
|
@ -8,6 +8,17 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
enum class VolumeType
|
||||
{
|
||||
JBOD,
|
||||
SINGLE_DISK,
|
||||
UNKNOWN
|
||||
};
|
||||
|
||||
class IVolume;
|
||||
using VolumePtr = std::shared_ptr<IVolume>;
|
||||
using Volumes = std::vector<VolumePtr>;
|
||||
|
||||
/**
|
||||
* Disks group by some (user) criteria. For example,
|
||||
* - VolumeJBOD("slow_disks", [d1, d2], 100)
|
||||
@ -22,7 +33,7 @@ namespace DB
|
||||
class IVolume : public Space
|
||||
{
|
||||
public:
|
||||
IVolume(String name_, Disks disks_): disks(std::move(disks_)), name(std::move(name_))
|
||||
IVolume(String name_, Disks disks_): disks(std::move(disks_)), name(name_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -37,16 +48,17 @@ public:
|
||||
|
||||
/// Volume name from config
|
||||
const String & getName() const override { return name; }
|
||||
virtual VolumeType getType() const = 0;
|
||||
|
||||
/// Return biggest unreserved space across all disks
|
||||
UInt64 getMaxUnreservedFreeSpace() const;
|
||||
|
||||
Disks disks;
|
||||
DiskPtr getDisk(size_t i = 0) const { return disks[i]; }
|
||||
const Disks & getDisks() const { return disks; }
|
||||
|
||||
protected:
|
||||
Disks disks;
|
||||
const String name;
|
||||
};
|
||||
|
||||
using VolumePtr = std::shared_ptr<IVolume>;
|
||||
using Volumes = std::vector<VolumePtr>;
|
||||
|
||||
}
|
||||
|
@ -28,6 +28,7 @@ namespace ErrorCodes
|
||||
extern const int FILE_ALREADY_EXISTS;
|
||||
extern const int CANNOT_SEEK_THROUGH_FILE;
|
||||
extern const int UNKNOWN_FORMAT;
|
||||
extern const int INCORRECT_DISK_INDEX;
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -369,7 +370,16 @@ public:
|
||||
|
||||
UInt64 getSize() const override { return size; }
|
||||
|
||||
DiskPtr getDisk() const override { return disk; }
|
||||
DiskPtr getDisk(size_t i) const override
|
||||
{
|
||||
if (i != 0)
|
||||
{
|
||||
throw Exception("Can't use i != 0 with single disk reservation", ErrorCodes::INCORRECT_DISK_INDEX);
|
||||
}
|
||||
return disk;
|
||||
}
|
||||
|
||||
Disks getDisks() const override { return {disk}; }
|
||||
|
||||
void update(UInt64 new_size) override
|
||||
{
|
||||
|
@ -7,6 +7,11 @@
|
||||
#include <Poco/Net/HTTPResponse.h>
|
||||
#include <common/logger_useful.h>
|
||||
|
||||
namespace DB::ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
namespace DB::S3
|
||||
{
|
||||
ProxyResolverConfiguration::ProxyResolverConfiguration(const Poco::URI & endpoint_, String proxy_scheme_, unsigned proxy_port_)
|
||||
@ -30,13 +35,16 @@ Aws::Client::ClientConfigurationPerRequest ProxyResolverConfiguration::getConfig
|
||||
Aws::Client::ClientConfigurationPerRequest cfg;
|
||||
try
|
||||
{
|
||||
/// It should be just empty GET / request.
|
||||
Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_1_1);
|
||||
/// It should be just empty GET request.
|
||||
Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, endpoint.getPath(), Poco::Net::HTTPRequest::HTTP_1_1);
|
||||
session->sendRequest(request);
|
||||
|
||||
Poco::Net::HTTPResponse response;
|
||||
auto & response_body_stream = session->receiveResponse(response);
|
||||
|
||||
if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK)
|
||||
throw Exception("Proxy resolver returned not OK status: " + response.getReason(), ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
String proxy_host;
|
||||
/// Read proxy host as string from response body.
|
||||
Poco::StreamCopier::copyToString(response_body_stream, proxy_host);
|
||||
|
@ -6,7 +6,7 @@ namespace DB::S3
|
||||
{
|
||||
/**
|
||||
* Proxy configuration where proxy host is obtained each time from specified endpoint.
|
||||
* For each request to S3 it makes GET request to specified endpoint and reads proxy host from a response body.
|
||||
* For each request to S3 it makes GET request to specified endpoint URL and reads proxy host from a response body.
|
||||
* Specified scheme and port added to obtained proxy host to form completed proxy URL.
|
||||
*/
|
||||
class ProxyResolverConfiguration : public ProxyConfiguration
|
||||
|
@ -37,13 +37,14 @@ namespace
|
||||
|
||||
void checkRemoveAccess(IDisk & disk) { disk.remove("test_acl"); }
|
||||
|
||||
std::shared_ptr<S3::ProxyResolverConfiguration> getProxyResolverConfiguration(const Poco::Util::AbstractConfiguration * proxy_resolver_config)
|
||||
std::shared_ptr<S3::ProxyResolverConfiguration> getProxyResolverConfiguration(
|
||||
const String & prefix, const Poco::Util::AbstractConfiguration & proxy_resolver_config)
|
||||
{
|
||||
auto endpoint = Poco::URI(proxy_resolver_config->getString("endpoint"));
|
||||
auto proxy_scheme = proxy_resolver_config->getString("proxy_scheme");
|
||||
auto endpoint = Poco::URI(proxy_resolver_config.getString(prefix + ".endpoint"));
|
||||
auto proxy_scheme = proxy_resolver_config.getString(prefix + ".proxy_scheme");
|
||||
if (proxy_scheme != "http" && proxy_scheme != "https")
|
||||
throw Exception("Only HTTP/HTTPS schemas allowed in proxy resolver config: " + proxy_scheme, ErrorCodes::BAD_ARGUMENTS);
|
||||
auto proxy_port = proxy_resolver_config->getUInt("proxy_port");
|
||||
auto proxy_port = proxy_resolver_config.getUInt(prefix + ".proxy_port");
|
||||
|
||||
LOG_DEBUG(
|
||||
&Logger::get("DiskS3"), "Configured proxy resolver: " << endpoint.toString() << ", Scheme: " << proxy_scheme << ", Port: " << proxy_port);
|
||||
@ -51,16 +52,17 @@ namespace
|
||||
return std::make_shared<S3::ProxyResolverConfiguration>(endpoint, proxy_scheme, proxy_port);
|
||||
}
|
||||
|
||||
std::shared_ptr<S3::ProxyListConfiguration> getProxyListConfiguration(const Poco::Util::AbstractConfiguration * proxy_config)
|
||||
std::shared_ptr<S3::ProxyListConfiguration> getProxyListConfiguration(
|
||||
const String & prefix, const Poco::Util::AbstractConfiguration & proxy_config)
|
||||
{
|
||||
std::vector<String> keys;
|
||||
proxy_config->keys(keys);
|
||||
proxy_config.keys(prefix, keys);
|
||||
|
||||
std::vector<Poco::URI> proxies;
|
||||
for (const auto & key : keys)
|
||||
if (startsWith(key, "uri"))
|
||||
{
|
||||
Poco::URI proxy_uri(proxy_config->getString(key));
|
||||
Poco::URI proxy_uri(proxy_config.getString(prefix + "." + key));
|
||||
|
||||
if (proxy_uri.getScheme() != "http" && proxy_uri.getScheme() != "https")
|
||||
throw Exception("Only HTTP/HTTPS schemas allowed in proxy uri: " + proxy_uri.toString(), ErrorCodes::BAD_ARGUMENTS);
|
||||
@ -78,25 +80,23 @@ namespace
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::shared_ptr<S3::ProxyConfiguration> getProxyConfiguration(const Poco::Util::AbstractConfiguration * config)
|
||||
std::shared_ptr<S3::ProxyConfiguration> getProxyConfiguration(const String & prefix, const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
if (!config->has("proxy"))
|
||||
if (!config.has(prefix + ".proxy"))
|
||||
return nullptr;
|
||||
|
||||
const auto * proxy_config = config->createView("proxy");
|
||||
|
||||
std::vector<String> config_keys;
|
||||
proxy_config->keys(config_keys);
|
||||
config.keys(prefix + ".proxy", config_keys);
|
||||
|
||||
if (auto resolver_configs = std::count(config_keys.begin(), config_keys.end(), "resolver"))
|
||||
{
|
||||
if (resolver_configs > 1)
|
||||
throw Exception("Multiple proxy resolver configurations aren't allowed", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
return getProxyResolverConfiguration(proxy_config->createView("resolver"));
|
||||
return getProxyResolverConfiguration(prefix + ".proxy.resolver", config);
|
||||
}
|
||||
|
||||
return getProxyListConfiguration(proxy_config);
|
||||
return getProxyListConfiguration(prefix + ".proxy", config);
|
||||
}
|
||||
}
|
||||
|
||||
@ -107,27 +107,25 @@ void registerDiskS3(DiskFactory & factory)
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const String & config_prefix,
|
||||
const Context & context) -> DiskPtr {
|
||||
const auto * disk_config = config.createView(config_prefix);
|
||||
|
||||
Poco::File disk{context.getPath() + "disks/" + name};
|
||||
disk.createDirectories();
|
||||
|
||||
Aws::Client::ClientConfiguration cfg;
|
||||
|
||||
S3::URI uri(Poco::URI(disk_config->getString("endpoint")));
|
||||
S3::URI uri(Poco::URI(config.getString(config_prefix + ".endpoint")));
|
||||
if (uri.key.back() != '/')
|
||||
throw Exception("S3 path must ends with '/', but '" + uri.key + "' doesn't.", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
cfg.endpointOverride = uri.endpoint;
|
||||
|
||||
auto proxy_config = getProxyConfiguration(disk_config);
|
||||
auto proxy_config = getProxyConfiguration(config_prefix, config);
|
||||
if (proxy_config)
|
||||
cfg.perRequestConfiguration = [proxy_config](const auto & request) { return proxy_config->getConfiguration(request); };
|
||||
|
||||
auto client = S3::ClientFactory::instance().create(
|
||||
cfg,
|
||||
disk_config->getString("access_key_id", ""),
|
||||
disk_config->getString("secret_access_key", ""));
|
||||
config.getString(config_prefix + ".access_key_id", ""),
|
||||
config.getString(config_prefix + ".secret_access_key", ""));
|
||||
|
||||
String metadata_path = context.getPath() + "disks/" + name + "/";
|
||||
|
||||
|
6
src/Disks/SingleDiskVolume.cpp
Normal file
6
src/Disks/SingleDiskVolume.cpp
Normal file
@ -0,0 +1,6 @@
|
||||
#include <Disks/SingleDiskVolume.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
}
|
26
src/Disks/SingleDiskVolume.h
Normal file
26
src/Disks/SingleDiskVolume.h
Normal file
@ -0,0 +1,26 @@
|
||||
#pragma once
|
||||
|
||||
#include <Disks/IVolume.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class SingleDiskVolume : public IVolume
|
||||
{
|
||||
public:
|
||||
SingleDiskVolume(const String & name_, DiskPtr disk): IVolume(name_, {disk})
|
||||
{
|
||||
}
|
||||
|
||||
ReservationPtr reserve(UInt64 bytes) override
|
||||
{
|
||||
return disks[0]->reserve(bytes);
|
||||
}
|
||||
|
||||
VolumeType getType() const override { return VolumeType::SINGLE_DISK; }
|
||||
|
||||
};
|
||||
|
||||
using VolumeSingleDiskPtr = std::shared_ptr<SingleDiskVolume>;
|
||||
|
||||
}
|
@ -55,7 +55,7 @@ StoragePolicy::StoragePolicy(
|
||||
std::set<String> disk_names;
|
||||
for (const auto & volume : volumes)
|
||||
{
|
||||
for (const auto & disk : volume->disks)
|
||||
for (const auto & disk : volume->getDisks())
|
||||
{
|
||||
if (disk_names.find(disk->getName()) != disk_names.end())
|
||||
throw Exception(
|
||||
@ -102,7 +102,7 @@ bool StoragePolicy::isDefaultPolicy() const
|
||||
if (volumes[0]->getName() != "default")
|
||||
return false;
|
||||
|
||||
const auto & disks = volumes[0]->disks;
|
||||
const auto & disks = volumes[0]->getDisks();
|
||||
if (disks.size() != 1)
|
||||
return false;
|
||||
|
||||
@ -117,7 +117,7 @@ Disks StoragePolicy::getDisks() const
|
||||
{
|
||||
Disks res;
|
||||
for (const auto & volume : volumes)
|
||||
for (const auto & disk : volume->disks)
|
||||
for (const auto & disk : volume->getDisks())
|
||||
res.push_back(disk);
|
||||
return res;
|
||||
}
|
||||
@ -130,17 +130,17 @@ DiskPtr StoragePolicy::getAnyDisk() const
|
||||
if (volumes.empty())
|
||||
throw Exception("StoragePolicy has no volumes. It's a bug.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
if (volumes[0]->disks.empty())
|
||||
if (volumes[0]->getDisks().empty())
|
||||
throw Exception("Volume '" + volumes[0]->getName() + "' has no disks. It's a bug.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
return volumes[0]->disks[0];
|
||||
return volumes[0]->getDisks()[0];
|
||||
}
|
||||
|
||||
|
||||
DiskPtr StoragePolicy::getDiskByName(const String & disk_name) const
|
||||
{
|
||||
for (auto && volume : volumes)
|
||||
for (auto && disk : volume->disks)
|
||||
for (auto && disk : volume->getDisks())
|
||||
if (disk->getName() == disk_name)
|
||||
return disk;
|
||||
return {};
|
||||
@ -181,7 +181,7 @@ ReservationPtr StoragePolicy::makeEmptyReservationOnLargestDisk() const
|
||||
DiskPtr max_disk;
|
||||
for (const auto & volume : volumes)
|
||||
{
|
||||
for (const auto & disk : volume->disks)
|
||||
for (const auto & disk : volume->getDisks())
|
||||
{
|
||||
auto avail_space = disk->getAvailableSpace();
|
||||
if (avail_space > max_space)
|
||||
@ -207,10 +207,10 @@ void StoragePolicy::checkCompatibleWith(const StoragePolicyPtr & new_storage_pol
|
||||
throw Exception("New storage policy shall contain volumes of old one", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
std::unordered_set<String> new_disk_names;
|
||||
for (const auto & disk : new_storage_policy->getVolumeByName(volume->getName())->disks)
|
||||
for (const auto & disk : new_storage_policy->getVolumeByName(volume->getName())->getDisks())
|
||||
new_disk_names.insert(disk->getName());
|
||||
|
||||
for (const auto & disk : volume->disks)
|
||||
for (const auto & disk : volume->getDisks())
|
||||
if (new_disk_names.count(disk->getName()) == 0)
|
||||
throw Exception("New storage policy shall contain disks of old one", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
@ -222,7 +222,7 @@ size_t StoragePolicy::getVolumeIndexByDisk(const DiskPtr & disk_ptr) const
|
||||
for (size_t i = 0; i < volumes.size(); ++i)
|
||||
{
|
||||
const auto & volume = volumes[i];
|
||||
for (const auto & disk : volume->disks)
|
||||
for (const auto & disk : volume->getDisks())
|
||||
if (disk->getName() == disk_ptr->getName())
|
||||
return i;
|
||||
}
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <Disks/IDisk.h>
|
||||
#include <Disks/IVolume.h>
|
||||
#include <Disks/VolumeJBOD.h>
|
||||
#include <Disks/SingleDiskVolume.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Common/CurrentMetrics.h>
|
||||
#include <Common/Exception.h>
|
||||
|
@ -25,6 +25,8 @@ public:
|
||||
DiskSelectorPtr disk_selector
|
||||
);
|
||||
|
||||
VolumeType getType() const override { return VolumeType::JBOD; }
|
||||
|
||||
/// Next disk (round-robin)
|
||||
///
|
||||
/// - Used with policy for temporary data
|
||||
|
17
src/Disks/createVolume.cpp
Normal file
17
src/Disks/createVolume.cpp
Normal file
@ -0,0 +1,17 @@
|
||||
#include "createVolume.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
VolumePtr createVolumeFromReservation(const ReservationPtr & reservation, VolumePtr other_volume)
|
||||
{
|
||||
if (other_volume->getType() == VolumeType::JBOD || other_volume->getType() == VolumeType::SINGLE_DISK)
|
||||
{
|
||||
/// Since reservation on JBOD chosies one of disks and makes reservation there, volume
|
||||
/// for such type of reservation will be with one disk.
|
||||
return std::make_shared<SingleDiskVolume>(other_volume->getName(), reservation->getDisk());
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
}
|
12
src/Disks/createVolume.h
Normal file
12
src/Disks/createVolume.h
Normal file
@ -0,0 +1,12 @@
|
||||
#pragma once
|
||||
|
||||
#include <Disks/IVolume.h>
|
||||
#include <Disks/VolumeJBOD.h>
|
||||
#include <Disks/SingleDiskVolume.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
VolumePtr createVolumeFromReservation(const ReservationPtr & reservation, VolumePtr other_volume);
|
||||
|
||||
}
|
@ -5,6 +5,7 @@ PEERDIR(
|
||||
)
|
||||
|
||||
SRCS(
|
||||
createVolume.cpp
|
||||
DiskFactory.cpp
|
||||
DiskLocal.cpp
|
||||
DiskMemory.cpp
|
||||
@ -12,6 +13,7 @@ SRCS(
|
||||
IDisk.cpp
|
||||
IVolume.cpp
|
||||
registerDisks.cpp
|
||||
SingleDiskVolume.cpp
|
||||
StoragePolicy.cpp
|
||||
VolumeJBOD.cpp
|
||||
)
|
||||
|
@ -283,6 +283,8 @@ SRCS(
|
||||
rand.cpp
|
||||
randomPrintableASCII.cpp
|
||||
randomString.cpp
|
||||
randomStringUTF8.cpp
|
||||
randomFixedString.cpp
|
||||
regexpQuoteMeta.cpp
|
||||
registerFunctionsArithmetic.cpp
|
||||
registerFunctionsComparison.cpp
|
||||
|
@ -471,7 +471,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
|
||||
argument_types.push_back(column.type);
|
||||
argument_names.push_back(column.name);
|
||||
}
|
||||
else if (identifier && node.name == "joinGet" && arg == 0)
|
||||
else if (identifier && (functionIsJoinGet(node.name) || functionIsDictGet(node.name)) && arg == 0)
|
||||
{
|
||||
auto table_id = IdentifierSemantic::extractDatabaseAndTable(*identifier);
|
||||
table_id = data.context.resolveStorageID(table_id, Context::ResolveOrdinary);
|
||||
@ -480,7 +480,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
|
||||
ColumnWithTypeAndName column(
|
||||
ColumnConst::create(std::move(column_string), 1),
|
||||
std::make_shared<DataTypeString>(),
|
||||
data.getUniqueName("__joinGet"));
|
||||
data.getUniqueName("__" + node.name));
|
||||
data.addAction(ExpressionAction::addColumn(column));
|
||||
argument_types.push_back(column.type);
|
||||
argument_names.push_back(column.name);
|
||||
|
@ -586,7 +586,7 @@ VolumeJBODPtr Context::setTemporaryStorage(const String & path, const String & p
|
||||
shared->tmp_volume = tmp_policy->getVolume(0);
|
||||
}
|
||||
|
||||
if (shared->tmp_volume->disks.empty())
|
||||
if (shared->tmp_volume->getDisks().empty())
|
||||
throw Exception("No disks volume for temporary files", ErrorCodes::NO_ELEMENTS_IN_CONFIG);
|
||||
|
||||
return shared->tmp_volume;
|
||||
|
@ -1025,6 +1025,12 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
|
||||
chain.clear();
|
||||
};
|
||||
|
||||
if (storage)
|
||||
{
|
||||
query_analyzer.makeSetsForIndex(query.where());
|
||||
query_analyzer.makeSetsForIndex(query.prewhere());
|
||||
}
|
||||
|
||||
{
|
||||
ExpressionActionsChain chain(context);
|
||||
Names additional_required_columns_after_prewhere;
|
||||
|
@ -243,8 +243,6 @@ public:
|
||||
const NamesAndTypesList & aggregationKeys() const { return aggregation_keys; }
|
||||
const AggregateDescriptions & aggregates() const { return aggregate_descriptions; }
|
||||
|
||||
/// Create Set-s that we make from IN section to use index on them.
|
||||
void makeSetsForIndex(const ASTPtr & node);
|
||||
const PreparedSets & getPreparedSets() const { return prepared_sets; }
|
||||
|
||||
/// Tables that will need to be sent to remote servers for distributed query processing.
|
||||
@ -275,6 +273,9 @@ private:
|
||||
*/
|
||||
SetPtr isPlainStorageSetInSubquery(const ASTPtr & subquery_or_table_name);
|
||||
|
||||
/// Create Set-s that we make from IN section to use index on them.
|
||||
void makeSetsForIndex(const ASTPtr & node);
|
||||
|
||||
JoinPtr makeTableJoin(const ASTTablesInSelectQueryElement & join_element);
|
||||
|
||||
const ASTSelectQuery * getAggregatingQuery() const;
|
||||
|
@ -309,12 +309,29 @@ InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
ASTSelectQuery & query = getSelectQuery();
|
||||
std::shared_ptr<TableJoin> table_join = joined_tables.makeTableJoin(query);
|
||||
|
||||
auto analyze = [&] (bool try_move_to_prewhere = true)
|
||||
ASTPtr row_policy_filter;
|
||||
if (storage)
|
||||
row_policy_filter = context->getRowPolicyCondition(table_id.getDatabaseName(), table_id.getTableName(), RowPolicy::SELECT_FILTER);
|
||||
|
||||
auto analyze = [&] (bool try_move_to_prewhere)
|
||||
{
|
||||
syntax_analyzer_result = SyntaxAnalyzer(*context).analyzeSelect(
|
||||
query_ptr, SyntaxAnalyzerResult(source_header.getNamesAndTypesList(), storage),
|
||||
options, joined_tables.tablesWithColumns(), required_result_column_names, table_join);
|
||||
|
||||
if (try_move_to_prewhere && storage && !row_policy_filter && query.where() && !query.prewhere() && !query.final())
|
||||
{
|
||||
/// PREWHERE optimization: transfer some condition from WHERE to PREWHERE if enabled and viable
|
||||
if (const auto * merge_tree = dynamic_cast<const MergeTreeData *>(storage.get()))
|
||||
{
|
||||
SelectQueryInfo current_info;
|
||||
current_info.query = query_ptr;
|
||||
current_info.syntax_analyzer_result = syntax_analyzer_result;
|
||||
|
||||
MergeTreeWhereOptimizer{current_info, *context, *merge_tree, syntax_analyzer_result->requiredSourceColumns(), log};
|
||||
}
|
||||
}
|
||||
|
||||
/// Save scalar sub queries's results in the query context
|
||||
if (!options.only_analyze && context->hasQueryContext())
|
||||
for (const auto & it : syntax_analyzer_result->getScalars())
|
||||
@ -365,7 +382,6 @@ InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
source_header = storage->getSampleBlockForColumns(required_columns);
|
||||
|
||||
/// Fix source_header for filter actions.
|
||||
auto row_policy_filter = context->getRowPolicyCondition(table_id.getDatabaseName(), table_id.getTableName(), RowPolicy::SELECT_FILTER);
|
||||
if (row_policy_filter)
|
||||
{
|
||||
filter_info = std::make_shared<FilterInfo>();
|
||||
@ -378,10 +394,10 @@ InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
throw Exception("PREWHERE is not supported if the table is filtered by row-level security expression", ErrorCodes::ILLEGAL_PREWHERE);
|
||||
|
||||
/// Calculate structure of the result.
|
||||
result_header = getSampleBlockImpl(try_move_to_prewhere);
|
||||
result_header = getSampleBlockImpl();
|
||||
};
|
||||
|
||||
analyze();
|
||||
analyze(settings.optimize_move_to_prewhere);
|
||||
|
||||
bool need_analyze_again = false;
|
||||
if (analysis_result.prewhere_constant_filter_description.always_false || analysis_result.prewhere_constant_filter_description.always_true)
|
||||
@ -481,40 +497,8 @@ QueryPipeline InterpreterSelectQuery::executeWithProcessors()
|
||||
}
|
||||
|
||||
|
||||
Block InterpreterSelectQuery::getSampleBlockImpl(bool try_move_to_prewhere)
|
||||
Block InterpreterSelectQuery::getSampleBlockImpl()
|
||||
{
|
||||
auto & query = getSelectQuery();
|
||||
const Settings & settings = context->getSettingsRef();
|
||||
|
||||
/// Do all AST changes here, because actions from analysis_result will be used later in readImpl.
|
||||
|
||||
if (storage)
|
||||
{
|
||||
query_analyzer->makeSetsForIndex(query.where());
|
||||
query_analyzer->makeSetsForIndex(query.prewhere());
|
||||
|
||||
/// PREWHERE optimization.
|
||||
/// Turn off, if the table filter (row-level security) is applied.
|
||||
if (!context->getRowPolicyCondition(table_id.getDatabaseName(), table_id.getTableName(), RowPolicy::SELECT_FILTER))
|
||||
{
|
||||
auto optimize_prewhere = [&](auto & merge_tree)
|
||||
{
|
||||
SelectQueryInfo current_info;
|
||||
current_info.query = query_ptr;
|
||||
current_info.syntax_analyzer_result = syntax_analyzer_result;
|
||||
current_info.sets = query_analyzer->getPreparedSets();
|
||||
|
||||
/// Try transferring some condition from WHERE to PREWHERE if enabled and viable
|
||||
if (settings.optimize_move_to_prewhere && try_move_to_prewhere && query.where() && !query.prewhere() && !query.final())
|
||||
MergeTreeWhereOptimizer{current_info, *context, merge_tree,
|
||||
syntax_analyzer_result->requiredSourceColumns(), log};
|
||||
};
|
||||
|
||||
if (const auto * merge_tree_data = dynamic_cast<const MergeTreeData *>(storage.get()))
|
||||
optimize_prewhere(*merge_tree_data);
|
||||
}
|
||||
}
|
||||
|
||||
if (storage && !options.only_analyze)
|
||||
from_stage = storage->getQueryProcessingStage(*context, options.to_stage, query_ptr);
|
||||
|
||||
|
@ -106,7 +106,7 @@ private:
|
||||
|
||||
ASTSelectQuery & getSelectQuery() { return query_ptr->as<ASTSelectQuery &>(); }
|
||||
|
||||
Block getSampleBlockImpl(bool try_move_to_prewhere);
|
||||
Block getSampleBlockImpl();
|
||||
|
||||
struct Pipeline
|
||||
{
|
||||
|
@ -38,16 +38,18 @@ void MarkTableIdentifiersMatcher::visit(const ASTFunction & func, ASTPtr &, Data
|
||||
if (functionIsInOrGlobalInOperator(func.name))
|
||||
{
|
||||
auto & ast = func.arguments->children.at(1);
|
||||
if (auto opt_name = tryGetIdentifierName(ast))
|
||||
if (!data.aliases.count(*opt_name))
|
||||
setIdentifierSpecial(ast);
|
||||
auto opt_name = tryGetIdentifierName(ast);
|
||||
if (opt_name && !data.aliases.count(*opt_name))
|
||||
setIdentifierSpecial(ast);
|
||||
}
|
||||
|
||||
// first argument of joinGet can be a table identifier
|
||||
if (func.name == "joinGet")
|
||||
// First argument of joinGet can be a table name, perhaps with a database.
|
||||
// First argument of dictGet can be a dictionary name, perhaps with a database.
|
||||
if (functionIsJoinGet(func.name) || functionIsDictGet(func.name))
|
||||
{
|
||||
auto & ast = func.arguments->children.at(0);
|
||||
if (auto opt_name = tryGetIdentifierName(ast))
|
||||
auto opt_name = tryGetIdentifierName(ast);
|
||||
if (opt_name && !data.aliases.count(*opt_name))
|
||||
setIdentifierSpecial(ast);
|
||||
}
|
||||
}
|
||||
|
@ -767,8 +767,13 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect(
|
||||
const auto & settings = context.getSettingsRef();
|
||||
|
||||
const NameSet & source_columns_set = result.source_columns_set;
|
||||
result.analyzed_join = table_join;
|
||||
if (!result.analyzed_join) /// ExpressionAnalyzer expects some not empty object here
|
||||
|
||||
if (table_join)
|
||||
{
|
||||
result.analyzed_join = table_join;
|
||||
result.analyzed_join->resetCollected();
|
||||
}
|
||||
else /// TODO: remove. For now ExpressionAnalyzer expects some not empty object here
|
||||
result.analyzed_join = std::make_shared<TableJoin>();
|
||||
|
||||
if (remove_duplicates)
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user