Merge branch 'master' into consistent_metadata

This commit is contained in:
alesapin 2020-05-21 18:54:52 +03:00
commit 87eab514fb
118 changed files with 1496 additions and 578 deletions

12
.arcignore Normal file
View File

@ -0,0 +1,12 @@
# .arcignore is the same as .gitignore but for Arc VCS.
# Arc VCS is a proprietary VCS in Yandex that is very similar to Git
# from the user perspective but with the following differences:
# 1. Data is stored in distributed object storage.
# 2. Local copy works via FUSE without downloading all the objects.
# For this reason, it is better suited for huge monorepositories that can be found in large companies (e.g. Yandex, Google).
# As ClickHouse developers, we don't use Arc as a VCS (we use Git).
# But the ClickHouse source code is also mirrored into internal monorepository and our collegues are using Arc.
# You can read more about Arc here: https://habr.com/en/company/yandex/blog/482926/
# Repository is synchronized without 3rd-party submodules.
contrib

View File

@ -9,7 +9,7 @@ Checks: '-*,
misc-unused-alias-decls,
misc-unused-parameters,
misc-unused-using-decls,
modernize-avoid-bind,
modernize-loop-convert,
modernize-make-shared,
@ -33,7 +33,7 @@ Checks: '-*,
performance-no-automatic-move,
performance-trivially-destructible,
performance-unnecessary-copy-initialization,
readability-avoid-const-params-in-decls,
readability-const-return-type,
readability-container-size-empty,
@ -58,7 +58,7 @@ Checks: '-*,
readability-simplify-boolean-expr,
readability-inconsistent-declaration-parameter-name,
readability-identifier-naming,
bugprone-undelegated-constructor,
bugprone-argument-comment,
bugprone-bad-signal-to-kill-thread,
@ -102,7 +102,7 @@ Checks: '-*,
bugprone-unused-return-value,
bugprone-use-after-move,
bugprone-virtual-near-miss,
cert-dcl21-cpp,
cert-dcl50-cpp,
cert-env33-c,
@ -112,7 +112,7 @@ Checks: '-*,
cert-mem57-cpp,
cert-msc50-cpp,
cert-oop58-cpp,
google-build-explicit-make-pair,
google-build-namespaces,
google-default-arguments,
@ -121,9 +121,9 @@ Checks: '-*,
google-readability-avoid-underscore-in-googletest-name,
google-runtime-int,
google-runtime-operator,
hicpp-exception-baseclass,
clang-analyzer-core.CallAndMessage,
clang-analyzer-core.DivideZero,
clang-analyzer-core.NonNullParamChecker,

View File

@ -5,6 +5,7 @@ RUN apt-get --allow-unauthenticated update -y && apt-get install --yes wget gnup
RUN wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add -
RUN echo "deb [trusted=yes] http://apt.llvm.org/eoan/ llvm-toolchain-eoan-10 main" >> /etc/apt/sources.list
RUN apt-get --allow-unauthenticated update -y \
&& env DEBIAN_FRONTEND=noninteractive \
apt-get --allow-unauthenticated install --yes --no-install-recommends \
@ -17,6 +18,14 @@ RUN apt-get --allow-unauthenticated update -y \
apt-transport-https \
ca-certificates
# Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able
# to compress files using pigz (https://zlib.net/pigz/) instead of gzip.
# Significantly increase deb packaging speed and compatible with old systems
RUN curl -O https://clickhouse-builds.s3.yandex.net/utils/dpkg-deb
RUN chmod +x dpkg-deb
RUN cp dpkg-deb /usr/bin
# Libraries from OS are only needed to test the "unbundled" build (that is not used in production).
RUN apt-get --allow-unauthenticated update -y \
&& env DEBIAN_FRONTEND=noninteractive \
@ -74,12 +83,6 @@ RUN apt-get --allow-unauthenticated update -y \
libldap2-dev
# Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able
# to compress files using pigz (https://zlib.net/pigz/) instead of gzip.
# Significantly increase deb packaging speed and compatible with old systems
RUN curl -O https://clickhouse-builds.s3.yandex.net/utils/dpkg-deb
RUN chmod +x dpkg-deb
RUN cp dpkg-deb /usr/bin
# This symlink required by gcc to find lld compiler
RUN ln -s /usr/bin/lld-10 /usr/bin/ld.lld

View File

@ -133,7 +133,7 @@ function run_tests
fi
# Delete old report files.
for x in {test-times,skipped-tests,wall-clock-times,report-thresholds,client-times}.tsv
for x in {test-times,wall-clock-times}.tsv
do
rm -v "$x" ||:
touch "$x"
@ -220,66 +220,127 @@ function get_profiles
# Build and analyze randomization distribution for all queries.
function analyze_queries
{
rm -v analyze-commands.txt analyze-errors.log all-queries.tsv unstable-queries.tsv ./*-report.tsv raw-queries.tsv client-times.tsv report-thresholds.tsv ||:
rm -v analyze-commands.txt analyze-errors.log all-queries.tsv unstable-queries.tsv ./*-report.tsv raw-queries.tsv ||:
rm -rfv analyze ||:
mkdir analyze ||:
# FIXME This loop builds column definitons from TSVWithNamesAndTypes in an
# absolutely atrocious way. This should be done by the file() function itself.
for x in {right,left}-{addresses,{query,query-thread,trace,metric}-log}.tsv
do
paste -d' ' \
<(sed -n '1{s/\t/\n/g;p;q}' "$x" | sed 's/\(^.*$\)/"\1"/') \
<(sed -n '2{s/\t/\n/g;p;q}' "$x" ) \
| tr '\n' ', ' | sed 's/,$//' > "$x.columns"
done
# Split the raw test output into files suitable for analysis.
IFS=$'\n'
for test_file in $(find . -maxdepth 1 -name "*-raw.tsv" -print)
do
test_name=$(basename "$test_file" "-raw.tsv")
sed -n "s/^query\t//p" < "$test_file" > "$test_name-queries.tsv"
sed -n "s/^client-time/$test_name/p" < "$test_file" >> "client-times.tsv"
sed -n "s/^report-threshold/$test_name/p" < "$test_file" >> "report-thresholds.tsv"
sed -n "s/^skipped/$test_name/p" < "$test_file" >> "skipped-tests.tsv"
sed -n "s/^query\t/$test_name\t/p" < "$test_file" >> "analyze/query-runs.tsv"
sed -n "s/^client-time/$test_name/p" < "$test_file" >> "analyze/client-times.tsv"
sed -n "s/^report-threshold/$test_name/p" < "$test_file" >> "analyze/report-thresholds.tsv"
sed -n "s/^skipped/$test_name/p" < "$test_file" >> "analyze/skipped-tests.tsv"
sed -n "s/^display-name/$test_name/p" < "$test_file" >> "analyze/query-display-names.tsv"
done
unset IFS
# for each query run, prepare array of metrics from query log
clickhouse-local --query "
create view query_runs as select * from file('analyze/query-runs.tsv', TSV,
'test text, query_index int, query_id text, version UInt8, time float');
create view left_query_log as select *
from file('left-query-log.tsv', TSVWithNamesAndTypes,
'$(cat "left-query-log.tsv.columns")');
create view right_query_log as select *
from file('right-query-log.tsv', TSVWithNamesAndTypes,
'$(cat "right-query-log.tsv.columns")');
create table query_metrics engine File(TSV, -- do not add header -- will parse with grep
'analyze/query-run-metrics.tsv')
as select
test, query_index, 0 run, version,
[
-- server-reported time
query_duration_ms / toFloat64(1000)
, toFloat64(memory_usage)
-- client-reported time
, query_runs.time
] metrics
from (
select *, 0 version from left_query_log
union all
select *, 1 version from right_query_log
) query_logs
right join query_runs
using (query_id, version)
;
"
# This is a lateral join in bash... please forgive me.
# We don't have arrayPermute(), so I have to make random permutations with
# We don't have arrayPermute(), so I have to make random permutations with
# `order by rand`, and it becomes really slow if I do it for more than one
# query. We also don't have lateral joins. So I just put all runs of each
# query into a separate file, and then compute randomization distribution
# for each file. I do this in parallel using GNU parallel.
query_index=1
IFS=$'\n'
for test_file in $(find . -maxdepth 1 -name "*-queries.tsv" -print)
for prefix in $(cut -f1,2 "analyze/query-run-metrics.tsv" | sort | uniq)
do
test_name=$(basename "$test_file" "-queries.tsv")
query_index=1
for query in $(cut -d' ' -f1 "$test_file" | sort | uniq)
do
query_prefix="$test_name.q$query_index"
query_index=$((query_index + 1))
grep -F "$query " "$test_file" > "$query_prefix.tmp"
printf "%s\0\n" \
"clickhouse-local \
--file \"$query_prefix.tmp\" \
--structure 'query text, run int, version UInt32, time float' \
--query \"$(cat "$script_dir/eqmed.sql")\" \
>> \"$test_name-report.tsv\"" \
2>> analyze-errors.log \
>> analyze-commands.txt
done
file="analyze/q$query_index.tmp"
grep -F "$prefix " "analyze/query-run-metrics.tsv" > "$file" &
printf "%s\0\n" \
"clickhouse-local \
--file \"$file\" \
--structure 'test text, query text, run int, version UInt8, metrics Array(float)' \
--query \"$(cat "$script_dir/eqmed.sql")\" \
>> \"analyze/query-reports.tsv\"" \
2>> analyze/errors.log \
>> analyze/commands.txt
query_index=$((query_index + 1))
done
wait
unset IFS
parallel --null < analyze-commands.txt
parallel --joblog analyze/parallel-log.txt --null < analyze/commands.txt
}
# Analyze results
function report
{
rm -r report ||:
mkdir report ||:
rm ./*.{rep,svg} test-times.tsv test-dump.tsv unstable.tsv unstable-query-ids.tsv unstable-query-metrics.tsv changed-perf.tsv unstable-tests.tsv unstable-queries.tsv bad-tests.tsv slow-on-client.tsv all-queries.tsv ||:
cat analyze-errors.log >> report/errors.log ||:
cat analyze/errors.log >> report/errors.log ||:
cat profile-errors.log >> report/errors.log ||:
clickhouse-local --query "
create view query_display_names as select * from
file('analyze/query-display-names.tsv', TSV,
'test text, query_index int, query_display_name text')
;
create table query_metric_stats engine File(TSVWithNamesAndTypes,
'report/query-metric-stats.tsv') as
select *, metric_name
from file ('analyze/query-reports.tsv', TSV, 'left Array(float),
right Array(float), diff Array(float), stat_threshold Array(float),
test text, query_index int') reports
left array join ['server_time', 'memory', 'client_time'] as metric_name,
left, right, diff, stat_threshold
left join query_display_names
on reports.test = query_display_names.test
and reports.query_index = query_display_names.query_index
;
-- Main statistics for queries -- query time as reported in query log.
create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
as select
-- FIXME Comparison mode doesn't make sense for queries that complete
@ -296,53 +357,54 @@ create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
left, right, diff, stat_threshold,
if(report_threshold > 0, report_threshold, 0.10) as report_threshold,
reports.test,
query
from
(
select *,
replaceAll(_file, '-report.tsv', '') test
from file('*-report.tsv', TSV, 'left float, right float, diff float, stat_threshold float, query text')
) reports
left join file('report-thresholds.tsv', TSV, 'test text, report_threshold float') thresholds
using test
;
test, query_index, query_display_name
from query_metric_stats
left join file('analyze/report-thresholds.tsv', TSV,
'test text, report_threshold float') thresholds
on query_metric_stats.test = thresholds.test
where metric_name = 'server_time'
order by test, query_index, metric_name
;
-- keep the table in old format so that we can analyze new and old data together
create table queries_old_format engine File(TSVWithNamesAndTypes, 'queries.rep')
as select short, changed_fail, unstable_fail, left, right, diff, stat_threshold, test, query
as select short, changed_fail, unstable_fail, left, right, diff,
stat_threshold, test, query_display_name query
from queries
;
-- save all test runs as JSON for the new comparison page
create table all_query_funs_json engine File(JSON, 'report/all-query-runs.json') as
select test, query, versions_runs[1] runs_left, versions_runs[2] runs_right
create table all_query_runs_json engine File(JSON, 'report/all-query-runs.json') as
select test, query_display_name query,
versions_runs[1] runs_left, versions_runs[2] runs_right
from (
select
test, query,
test, query_index,
groupArrayInsertAt(runs, version) versions_runs
from (
select
replaceAll(_file, '-queries.tsv', '') test,
query, version,
groupArray(time) runs
from file('*-queries.tsv', TSV, 'query text, run int, version UInt32, time float')
group by test, query, version
test, query_index, version,
groupArray(metrics[1]) runs
from file('analyze/query-run-metrics.tsv', TSV,
'test text, query_index int, run int, version UInt8, metrics Array(float)')
group by test, query_index, version
)
group by test, query
)
group by test, query_index
) runs
left join query_display_names using (test, query_index)
;
create table changed_perf_tsv engine File(TSV, 'report/changed-perf.tsv') as
select left, right, diff, stat_threshold, changed_fail, test, query from queries where changed_show
order by abs(diff) desc;
select left, right, diff, stat_threshold, changed_fail, test, query_display_name
from queries where changed_show order by abs(diff) desc;
create table unstable_queries_tsv engine File(TSV, 'report/unstable-queries.tsv') as
select left, right, diff, stat_threshold, unstable_fail, test, query from queries where unstable_show
order by stat_threshold desc;
select left, right, diff, stat_threshold, unstable_fail, test, query_display_name
from queries where unstable_show order by stat_threshold desc;
create table queries_for_flamegraph engine File(TSVWithNamesAndTypes, 'report/queries-for-flamegraph.tsv') as
select query, test from queries where unstable_show or changed_show
create table queries_for_flamegraph engine File(TSVWithNamesAndTypes,
'report/queries-for-flamegraph.tsv') as
select test, query_index from queries where unstable_show or changed_show
;
create table unstable_tests_tsv engine File(TSV, 'report/bad-tests.tsv') as
@ -350,23 +412,23 @@ create table unstable_tests_tsv engine File(TSV, 'report/bad-tests.tsv') as
group by test having s > 0 order by s desc;
create table query_time engine Memory as select *
from file('client-times.tsv', TSV, 'test text, query text, client float, server float');
from file('analyze/client-times.tsv', TSV,
'test text, query_index int, client float, server float');
create table wall_clock engine Memory as select *
from file('wall-clock-times.tsv', TSV, 'test text, real float, user float, system float');
create table slow_on_client_tsv engine File(TSV, 'report/slow-on-client.tsv') as
select client, server, floor(client/server, 3) p, query
from query_time where p > 1.02 order by p desc;
select client, server, floor(client/server, 3) p, query_display_name
from query_time left join query_display_names using (test, query_index)
where p > 1.02 order by p desc;
create table test_time engine Memory as
select test, sum(client) total_client_time,
maxIf(client, not short) query_max,
minIf(client, not short) query_min,
count(*) queries,
sum(short) short_queries
from query_time full join queries
using test, query
count(*) queries, sum(short) short_queries
from query_time full join queries using (test, query_index)
group by test;
create table test_times_tsv engine File(TSV, 'report/test-times.tsv') as
@ -378,40 +440,89 @@ create table test_times_tsv engine File(TSV, 'report/test-times.tsv') as
floor(real / queries, 3) avg_real_per_query,
floor(query_min, 3)
from test_time
-- wall clock times are also measured for skipped tests, so don't
-- do full join
left join wall_clock using test
-- wall clock times are also measured for skipped tests, so don't
-- do full join
left join wall_clock using test
order by avg_real_per_query desc;
-- report for all queries page, only main metric
create table all_tests_tsv engine File(TSV, 'report/all-queries.tsv') as
select changed_fail, unstable_fail,
left, right, diff,
floor(left > right ? left / right : right / left, 3),
stat_threshold, test, query
from queries order by test, query;
stat_threshold, test, query_display_name
from queries order by test, query_display_name;
-- new report for all queries with all metrics (no page yet)
create table all_query_metrics_tsv engine File(TSV, 'report/all-query-metrics.tsv') as
select metric_name, left, right, diff,
floor(left > right ? left / right : right / left, 3),
stat_threshold, test, query_index, query_display_name
from query_metric_stats
order by test, query_index;
" 2> >(tee -a report/errors.log 1>&2)
for x in {right,left}-{addresses,{query,query-thread,trace,metric}-log}.tsv
do
# FIXME This loop builds column definitons from TSVWithNamesAndTypes in an
# absolutely atrocious way. This should be done by the file() function itself.
paste -d' ' \
<(sed -n '1{s/\t/\n/g;p;q}' "$x" | sed 's/\(^.*$\)/"\1"/') \
<(sed -n '2{s/\t/\n/g;p;q}' "$x" ) \
| tr '\n' ', ' | sed 's/,$//' > "$x.columns"
done
# Prepare source data for metrics and flamegraphs for unstable queries.
for version in {right,left}
do
clickhouse-local --query "
do
rm -rf data
clickhouse-local --query "
create view queries_for_flamegraph as
select * from file('report/queries-for-flamegraph.tsv', TSVWithNamesAndTypes,
'query text, test text');
'test text, query_index int');
create view query_runs as
with 0 as left, 1 as right
select * from file('analyze/query-runs.tsv', TSV,
'test text, query_index int, query_id text, version UInt8, time float')
where version = $version
;
create view query_display_names as select * from
file('analyze/query-display-names.tsv', TSV,
'test text, query_index int, query_display_name text')
;
create table unstable_query_runs engine File(TSVWithNamesAndTypes,
'unstable-query-runs.$version.rep') as
select test, query_index, query_display_name, query_id
from query_runs
join queries_for_flamegraph on
query_runs.test = queries_for_flamegraph.test
and query_runs.query_index = queries_for_flamegraph.query_index
left join query_display_names on
query_runs.test = query_display_names.test
and query_runs.query_index = query_display_names.query_index
;
create view query_log as select *
from file('$version-query-log.tsv', TSVWithNamesAndTypes,
'$(cat "$version-query-log.tsv.columns")');
create table unstable_run_metrics engine File(TSVWithNamesAndTypes,
'unstable-run-metrics.$version.rep') as
select
test, query_index, query_id,
ProfileEvents.Values value, ProfileEvents.Names metric
from query_log array join ProfileEvents
join unstable_query_runs using (query_id)
;
create table unstable_run_metrics_2 engine File(TSVWithNamesAndTypes,
'unstable-run-metrics-2.$version.rep') as
select
test, query_index, query_id,
v, n
from (
select
test, query_index, query_id,
['memory_usage', 'read_bytes', 'written_bytes', 'query_duration_ms'] n,
[memory_usage, read_bytes, written_bytes, query_duration_ms] v
from query_log
join unstable_query_runs using (query_id)
)
array join v, n;
create view trace_log as select *
from file('$version-trace-log.tsv', TSVWithNamesAndTypes,
'$(cat "$version-trace-log.tsv.columns")');
@ -423,88 +534,64 @@ create view addresses_src as select *
create table addresses_join_$version engine Join(any, left, address) as
select addr address, name from addresses_src;
create table unstable_query_runs engine File(TSVWithNamesAndTypes,
'unstable-query-runs.$version.rep') as
select query, query_id from query_log
where query in (select query from queries_for_flamegraph)
and query_id not like 'prewarm %'
;
create table unstable_query_log engine File(Vertical,
'unstable-query-log.$version.rep') as
select * from query_log
where query_id in (select query_id from unstable_query_runs);
create table unstable_run_metrics engine File(TSVWithNamesAndTypes,
'unstable-run-metrics.$version.rep') as
select ProfileEvents.Values value, ProfileEvents.Names metric, query_id, query
from query_log array join ProfileEvents
where query_id in (select query_id from unstable_query_runs)
;
create table unstable_run_metrics_2 engine File(TSVWithNamesAndTypes,
'unstable-run-metrics-2.$version.rep') as
select v, n, query_id, query
from
(select
['memory_usage', 'read_bytes', 'written_bytes', 'query_duration_ms'] n,
[memory_usage, read_bytes, written_bytes, query_duration_ms] v,
query,
query_id
from query_log
where query_id in (select query_id from unstable_query_runs))
array join n, v;
create table unstable_run_traces engine File(TSVWithNamesAndTypes,
'unstable-run-traces.$version.rep') as
select
test, query_index, query_id,
count() value,
joinGet(addresses_join_$version, 'name', arrayJoin(trace)) metric,
unstable_query_runs.query_id,
any(unstable_query_runs.query) query
from unstable_query_runs
join trace_log on trace_log.query_id = unstable_query_runs.query_id
group by unstable_query_runs.query_id, metric
joinGet(addresses_join_$version, 'name', arrayJoin(trace)) metric
from trace_log
join unstable_query_runs using query_id
group by test, query_index, query_id, metric
order by count() desc
;
create table metric_devation engine File(TSVWithNamesAndTypes,
'metric-deviation.$version.rep') as
select query, floor((q[3] - q[1])/q[2], 3) d,
quantilesExact(0, 0.5, 1)(value) q, metric
from (select * from unstable_run_metrics
union all select * from unstable_run_traces
union all select * from unstable_run_metrics_2) mm
join queries_for_flamegraph using query
group by query, metric
having d > 0.5
order by query desc, d desc
-- first goes the key used to split the file with grep
select test, query_index, query_display_name,
d, q, metric
from (
select
test, query_index,
floor((q[3] - q[1])/q[2], 3) d,
quantilesExact(0, 0.5, 1)(value) q, metric
from (select * from unstable_run_metrics
union all select * from unstable_run_traces
union all select * from unstable_run_metrics_2) mm
group by test, query_index, metric
having d > 0.5
) metrics
left join unstable_query_runs using (test, query_index)
order by test, query_index, d desc
;
create table stacks engine File(TSV, 'stacks.$version.rep') as
select
query,
-- first goes the key used to split the file with grep
test, query_index, any(query_display_name),
arrayStringConcat(
arrayMap(x -> joinGet(addresses_join_$version, 'name', x),
arrayReverse(trace)
),
';'
) readable_trace,
count()
count() c
from trace_log
join unstable_query_runs using query_id
group by query, trace
group by test, query_index, trace
;
" 2> >(tee -a report/errors.log 1>&2) # do not run in parallel because they use the same data dir for StorageJoins which leads to weird errors.
done
wait
# Create per-query flamegraphs and files with metrics
IFS=$'\n'
for version in {right,left}
do
for query in $(cut -d' ' -f1 "stacks.$version.rep" | sort | uniq)
for query in $(cut -d' ' -f1,2,3 "stacks.$version.rep" | sort | uniq)
do
query_file=$(echo "$query" | cut -c-120 | sed 's/[/]/_/g')
query_file=$(echo "$query" | cut -c-120 | sed 's/[/ ]/_/g')
# Build separate .svg flamegraph for each query.
grep -F "$query " "stacks.$version.rep" \

View File

@ -119,5 +119,5 @@ done
dmesg -T > dmesg.log
7z a /output/output.7z ./*.{log,tsv,html,txt,rep,svg} {right,left}/{performance,db/preprocessed_configs,scripts} ./report
7z a /output/output.7z ./*.{log,tsv,html,txt,rep,svg} {right,left}/{performance,db/preprocessed_configs,scripts} report analyze
cp compare.log /output

View File

@ -1,32 +1,37 @@
-- input is table(query text, run UInt32, version int, time float)
-- input is table(test text, query text, run UInt32, version int, metrics Array(float))
select
floor(original_medians_array.time_by_version[1], 4) l,
floor(original_medians_array.time_by_version[2], 4) r,
floor((r - l) / l, 3) diff_percent,
floor(threshold / l, 3) threshold_percent,
query
arrayMap(x -> floor(x, 4), original_medians_array.medians_by_version[1] as l) l_rounded,
arrayMap(x -> floor(x, 4), original_medians_array.medians_by_version[2] as r) r_rounded,
arrayMap(x, y -> floor((y - x) / x, 3), l, r) diff_percent,
arrayMap(x, y -> floor(x / y, 3), threshold, l) threshold_percent,
test, query
from
(
-- quantiles of randomization distributions
select quantileExact(0.999)(abs(time_by_label[1] - time_by_label[2]) as d) threshold
select quantileExactForEach(0.999)(
arrayMap(x, y -> abs(x - y), metrics_by_label[1], metrics_by_label[2]) as d
) threshold
---- uncomment to see what the distribution is really like
--, uniqExact(d) u
--, uniqExact(d.1) u
--, arraySort(x->x.1,
-- arrayZip(
-- (sumMap([d], [1]) as f).1,
-- (sumMap([d.1], [1]) as f).1,
-- f.2)) full_histogram
from
(
select virtual_run, groupArrayInsertAt(median_time, random_label) time_by_label -- make array 'random label' -> 'median time'
-- make array 'random label' -> '[median metric]'
select virtual_run, groupArrayInsertAt(median_metrics, random_label) metrics_by_label
from (
select medianExact(time) median_time, virtual_run, random_label -- get median times, grouping by random label
-- get [median metric] arrays among virtual runs, grouping by random label
select medianExactForEach(metrics) median_metrics, virtual_run, random_label
from (
select *, toUInt32(rowNumberInAllBlocks() % 2) random_label -- randomly relabel measurements
-- randomly relabel measurements
select *, toUInt32(rowNumberInAllBlocks() % 2) random_label
from (
select time, number virtual_run
select metrics, number virtual_run
from
-- strip the query away before the join -- it might be several kB long;
(select time, run, version from table) no_query,
(select metrics, run, version from table) no_query,
-- duplicate input measurements into many virtual runs
numbers(1, 100000) nn
-- for each virtual run, randomly reorder measurements
@ -40,19 +45,19 @@ from
-- this select aggregates by virtual_run
) rd,
(
select groupArrayInsertAt(median_time, version) time_by_version
select groupArrayInsertAt(median_metrics, version) medians_by_version
from
(
select medianExact(time) median_time, version
select medianExactForEach(metrics) median_metrics, version
from table
group by version
) original_medians
) original_medians_array,
(
select any(query) query from table
select any(test) test, any(query) query from table
) any_query,
(
select throwIf(uniq(query) != 1) from table
select throwIf(uniq((test, query)) != 1) from table
) check_single_query -- this subselect checks that there is only one query in the input table;
-- written this way so that it is not optimized away (#10523)
;

View File

@ -29,6 +29,8 @@ parser.add_argument('--runs', type=int, default=int(os.environ.get('CHPC_RUNS',
parser.add_argument('--no-long', type=bool, default=True, help='Skip the tests tagged as long.')
args = parser.parse_args()
test_name = os.path.splitext(os.path.basename(args.file[0].name))[0]
tree = et.parse(args.file[0])
root = tree.getroot()
@ -141,19 +143,25 @@ test_queries = substitute_parameters(test_query_templates)
report_stage_end('substitute2')
for i, q in enumerate(test_queries):
for query_index, q in enumerate(test_queries):
query_prefix = f'{test_name}.query{query_index}'
# We have some crazy long queries (about 100kB), so trim them to a sane
# length.
# length. This means we can't use query text as an identifier and have to
# use the test name + the test-wide query index.
query_display_name = q
if len(query_display_name) > 1000:
query_display_name = f'{query_display_name[:1000]}...({i})'
print(f'display-name\t{query_index}\t{tsv_escape(query_display_name)}')
# Prewarm: run once on both servers. Helps to bring the data into memory,
# precompile the queries, etc.
try:
for conn_index, c in enumerate(connections):
res = c.execute(q, query_id = f'prewarm {0} {query_display_name}')
print(f'prewarm\t{tsv_escape(query_display_name)}\t{conn_index}\t{c.last_query.elapsed}')
prewarm_id = f'{query_prefix}.prewarm0'
res = c.execute(q, query_id = prewarm_id)
print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}')
except:
# If prewarm fails for some query -- skip it, and try to test the others.
# This might happen if the new test introduces some function that the
@ -170,13 +178,14 @@ for i, q in enumerate(test_queries):
start_seconds = time.perf_counter()
server_seconds = 0
for run in range(0, args.runs):
run_id = f'{query_prefix}.run{run}'
for conn_index, c in enumerate(connections):
res = c.execute(q)
print(f'query\t{tsv_escape(query_display_name)}\t{run}\t{conn_index}\t{c.last_query.elapsed}')
res = c.execute(q, query_id = run_id)
print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}')
server_seconds += c.last_query.elapsed
client_seconds = time.perf_counter() - start_seconds
print(f'client-time\t{tsv_escape(query_display_name)}\t{client_seconds}\t{server_seconds}')
print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}')
report_stage_end('benchmark')

View File

@ -25,6 +25,9 @@ very_unstable_queries = 0
# max seconds to run one query by itself, not counting preparation
allowed_single_run_time = 2
color_bad='#ffb0c0'
color_good='#b0d050'
header_template = """
<!DOCTYPE html>
<html>
@ -188,8 +191,8 @@ if args.report == 'main':
print(tableStart('Changes in performance'))
columns = [
'Old, s.', # 0
'New, s.', # 1
'Old, s', # 0
'New, s', # 1
'Relative difference (new&nbsp;&minus;&nbsp;old) / old', # 2
'p&nbsp;<&nbsp;0.001 threshold', # 3
# Failed # 4
@ -205,10 +208,10 @@ if args.report == 'main':
if int(row[4]):
if float(row[2]) < 0.:
faster_queries += 1
attrs[2] = 'style="background: #00ff00"'
attrs[2] = f'style="background: {color_good}"'
else:
slower_queries += 1
attrs[2] = 'style="background: #ff0000"'
attrs[2] = f'style="background: {color_bad}"'
else:
attrs[2] = ''
@ -221,7 +224,7 @@ if args.report == 'main':
slow_on_client_rows = tsvRows('report/slow-on-client.tsv')
error_tests += len(slow_on_client_rows)
printSimpleTable('Slow on client',
['Client time, s.', 'Server time, s.', 'Ratio', 'Query'],
['Client time, s', 'Server time, s', 'Ratio', 'Query'],
slow_on_client_rows)
def print_unstable_queries():
@ -252,7 +255,7 @@ if args.report == 'main':
for r in unstable_rows:
if int(r[4]):
very_unstable_queries += 1
attrs[3] = 'style="background: #ffb0a0"'
attrs[3] = f'style="background: {color_bad}"'
else:
attrs[3] = ''
@ -266,7 +269,7 @@ if args.report == 'main':
error_tests += len(run_error_rows)
printSimpleTable('Run errors', ['Test', 'Error'], run_error_rows)
skipped_tests_rows = tsvRows('skipped-tests.tsv')
skipped_tests_rows = tsvRows('analyze/skipped-tests.tsv')
printSimpleTable('Skipped tests', ['Test', 'Reason'], skipped_tests_rows)
printSimpleTable('Tests with most unstable queries',
@ -281,13 +284,13 @@ if args.report == 'main':
columns = [
'Test', #0
'Wall clock time, s.', #1
'Total client time, s.', #2
'Wall clock time, s', #1
'Total client time, s', #2
'Total queries', #3
'Ignored short queries', #4
'Longest query<br>(sum for all runs), s.', #5
'Avg wall clock time<br>(sum for all runs), s.', #6
'Shortest query<br>(sum for all runs), s.', #7
'Longest query<br>(sum for all runs), s', #5
'Avg wall clock time<br>(sum for all runs), s', #6
'Shortest query<br>(sum for all runs), s', #7
]
print(tableStart('Test times'))
@ -300,13 +303,13 @@ if args.report == 'main':
if float(r[6]) > 1.5 * total_runs:
# FIXME should be 15s max -- investigate parallel_insert
slow_average_tests += 1
attrs[6] = 'style="background: #ffb0a0"'
attrs[6] = f'style="background: {color_bad}"'
else:
attrs[6] = ''
if float(r[5]) > allowed_single_run_time * total_runs:
slow_average_tests += 1
attrs[5] = 'style="background: #ffb0a0"'
attrs[5] = f'style="background: {color_bad}"'
else:
attrs[5] = ''
@ -320,9 +323,9 @@ if args.report == 'main':
print("""
<p class="links">
<a href="output.7z">Test output</a>
<a href="all-queries.html">All queries</a>
<a href="compare.log">Log</a>
<a href="output.7z">Test output</a>
</p>
</body>
</html>
@ -382,8 +385,8 @@ elif args.report == 'all-queries':
columns = [
# Changed #0
# Unstable #1
'Old, s.', #2
'New, s.', #3
'Old, s', #2
'New, s', #3
'Relative difference (new&nbsp;&minus;&nbsp;old) / old', #4
'Times speedup / slowdown', #5
'p&nbsp;<&nbsp;0.001 threshold', #6
@ -399,21 +402,21 @@ elif args.report == 'all-queries':
attrs[1] = None
for r in rows:
if int(r[1]):
attrs[6] = 'style="background: #ffb0a0"'
attrs[6] = f'style="background: {color_bad}"'
else:
attrs[6] = ''
if int(r[0]):
if float(r[4]) > 0.:
attrs[4] = 'style="background: #ffb0a0"'
attrs[4] = f'style="background: {color_bad}"'
else:
attrs[4] = 'style="background: #adbdff"'
attrs[4] = f'style="background: {color_good}"'
else:
attrs[4] = ''
if (float(r[2]) + float(r[3])) / 2 > allowed_single_run_time:
attrs[2] = 'style="background: #ffb0a0"'
attrs[3] = 'style="background: #ffb0a0"'
attrs[2] = f'style="background: {color_bad}"'
attrs[3] = f'style="background: {color_bad}"'
else:
attrs[2] = ''
attrs[3] = ''
@ -428,9 +431,9 @@ elif args.report == 'all-queries':
print("""
<p class="links">
<a href="output.7z">Test output</a>
<a href="report.html">Main report</a>
<a href="compare.log">Log</a>
<a href="output.7z">Test output</a>
</p>
</body>
</html>

View File

@ -7,7 +7,7 @@ Building of ClickHouse is supported on Linux, FreeBSD and Mac OS X.
# If You Use Windows {#if-you-use-windows}
If you use Windows, you need to create a virtual machine with Ubuntu. To start working with a virtual machine please install VirtualBox. You can download Ubuntu from the website: https://www.ubuntu.com/\#download. Please create a virtual machine from the downloaded image (you should reserve at least 4GB of RAM for it). To run a command-line terminal in Ubuntu, please locate a program containing the word “terminal” in its name (gnome-terminal, konsole etc.) or just press Ctrl+Alt+T.
If you use Windows, you need to create a virtual machine with Ubuntu. To start working with a virtual machine please install VirtualBox. You can download Ubuntu from the website: https://www.ubuntu.com/#download. Please create a virtual machine from the downloaded image (you should reserve at least 4GB of RAM for it). To run a command-line terminal in Ubuntu, please locate a program containing the word “terminal” in its name (gnome-terminal, konsole etc.) or just press Ctrl+Alt+T.
# If You Use a 32-bit System {#if-you-use-a-32-bit-system}

View File

@ -72,7 +72,7 @@ Examples:
kafka_format = 'JSONEachRow',
kafka_num_consumers = 4;
CREATE TABLE queue2 (
CREATE TABLE queue3 (
timestamp UInt64,
level String,
message String

View File

@ -28,9 +28,10 @@ There may be any number of space symbols between syntactical constructions (incl
## Comments {#comments}
ClickHouse supports either SQL-style and C-style comments.
SQL-style comments start with `--` and continue to the end of the line, a space after `--` can be omitted.
C-style are from `/*` to `*/`and can be multiline, spaces are not required either.
ClickHouse supports either SQL-style and C-style comments:
- SQL-style comments start with `--` and continue to the end of the line, a space after `--` can be omitted.
- C-style are from `/*` to `*/`and can be multiline, spaces are not required either.
## Keywords {#syntax-keywords}

View File

@ -174,7 +174,7 @@ Upd. Всё ещё ждём удаление старого кода, котор
### 2.3. Перенос столбцового ser/de из DataType в Column {#perenos-stolbtsovogo-serde-iz-datatype-v-column}
В очереди.
В очереди. Антон Попов.
### 2.4. Перевод LowCardinality из DataType в Column. Добавление ColumnSparse {#perevod-lowcardinality-iz-datatype-v-column-dobavlenie-columnsparse}
@ -977,10 +977,10 @@ Q2.
[Виталий Баранов](https://github.com/vitlibar) и Денис Глазачев, Altinity. Требует 12.1.
### 12.6. Информация о пользователях и квотах в системной таблице {#informatsiia-o-polzovateliakh-i-kvotakh-v-sistemnoi-tablitse}
### 12.6. + Информация о пользователях и квотах в системной таблице {#informatsiia-o-polzovateliakh-i-kvotakh-v-sistemnoi-tablitse}
[Виталий Баранов](https://github.com/vitlibar). Требует 12.1.
Есть pull request. Q2.
Есть pull request. Q2. Готово.
## 13. Разделение ресурсов, multi-tenancy {#razdelenie-resursov-multi-tenancy}

View File

@ -21,7 +21,7 @@ mkdocs-htmlproofer-plugin==0.0.3
mkdocs-macros-plugin==0.4.9
nltk==3.5
nose==1.3.7
protobuf==3.12.0
protobuf==3.12.1
numpy==1.18.4
Pygments==2.5.2
pymdown-extensions==7.1

View File

@ -46,7 +46,7 @@ sudo yum-config-manager --add-repo https://repo.yandex.ru/clickhouse/rpm/stable/
sudo yum install clickhouse-server clickhouse-client
```
您也可以从此处手动下载和安装软件包https://repo.yandex.ru/clickhouse/rpm/stable/x86\_64。
您也可以从此处手动下载和安装软件包https://repo.yandex.ru/clickhouse/rpm/stable/x86_64。
### 来自Docker {#from-docker-image}

View File

@ -1,3 +1,7 @@
if (USE_CLANG_TIDY)
set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")
endif ()
# 'clickhouse' binary is a multi purpose tool,
# that contain multiple execution modes (client, server, etc.)
# each of them is built and linked as a separate library, defined below.

View File

@ -289,7 +289,7 @@ private:
connection_entries.emplace_back(std::make_shared<Entry>(
connection->get(ConnectionTimeouts::getTCPTimeoutsWithoutFailover(settings))));
pool.scheduleOrThrowOnError(std::bind(&Benchmark::thread, this, connection_entries));
pool.scheduleOrThrowOnError([this, connection_entries]() mutable { thread(connection_entries); });
}
}
catch (...)

View File

@ -485,7 +485,7 @@ private:
history_file = config().getString("history_file");
else
{
auto history_file_from_env = getenv("CLICKHOUSE_HISTORY_FILE");
auto * history_file_from_env = getenv("CLICKHOUSE_HISTORY_FILE");
if (history_file_from_env)
history_file = history_file_from_env;
else if (!home_path.empty())
@ -1480,7 +1480,7 @@ private:
"\033[1m↗\033[0m",
};
auto indicator = indicators[increment % 8];
const char * indicator = indicators[increment % 8];
if (!send_logs && written_progress_chars)
message << '\r';

View File

@ -51,7 +51,7 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati
{
std::string prompt{"Password for user (" + user + "): "};
char buf[1000] = {};
if (auto result = readpassphrase(prompt.c_str(), buf, sizeof(buf), 0))
if (auto * result = readpassphrase(prompt.c_str(), buf, sizeof(buf), 0))
password = result;
}

View File

@ -442,7 +442,7 @@ bool ClusterCopier::checkPartitionPieceIsDone(const TaskTable & task_table, cons
/// Collect all shards that contain partition piece number piece_number.
Strings piece_status_paths;
for (auto & shard : shards_with_partition)
for (const auto & shard : shards_with_partition)
{
ShardPartition & task_shard_partition = shard->partition_tasks.find(partition_name)->second;
ShardPartitionPiece & shard_partition_piece = task_shard_partition.pieces[piece_number];
@ -702,7 +702,7 @@ ASTPtr ClusterCopier::removeAliasColumnsFromCreateQuery(const ASTPtr & query_ast
auto new_columns_list = std::make_shared<ASTColumns>();
new_columns_list->set(new_columns_list->columns, new_columns);
if (auto indices = query_ast->as<ASTCreateQuery>()->columns_list->indices)
if (const auto * indices = query_ast->as<ASTCreateQuery>()->columns_list->indices)
new_columns_list->set(new_columns_list->indices, indices->clone());
new_query.replace(new_query.columns_list, new_columns_list);

View File

@ -94,7 +94,7 @@ void ClusterCopierApp::mainImpl()
StatusFile status_file(process_path + "/status");
ThreadStatus thread_status;
auto log = &logger();
auto * log = &logger();
LOG_INFO(log, "Starting clickhouse-copier ("
<< "id " << process_id << ", "
<< "host_id " << host_id << ", "

View File

@ -260,7 +260,7 @@ ShardPriority getReplicasPriority(const Cluster::Addresses & replicas, const std
return res;
res.is_remote = 1;
for (auto & replica : replicas)
for (const auto & replica : replicas)
{
if (isLocalAddress(DNSResolver::instance().resolveHost(replica.host_name)))
{
@ -270,7 +270,7 @@ ShardPriority getReplicasPriority(const Cluster::Addresses & replicas, const std
}
res.hostname_difference = std::numeric_limits<size_t>::max();
for (auto & replica : replicas)
for (const auto & replica : replicas)
{
size_t difference = getHostNameDifference(local_hostname, replica.host_name);
res.hostname_difference = std::min(difference, res.hostname_difference);

View File

@ -937,10 +937,10 @@ public:
if (typeid_cast<const DataTypeFixedString *>(&data_type))
return std::make_unique<FixedStringModel>(seed);
if (auto type = typeid_cast<const DataTypeArray *>(&data_type))
if (const auto * type = typeid_cast<const DataTypeArray *>(&data_type))
return std::make_unique<ArrayModel>(get(*type->getNestedType(), seed, markov_model_params));
if (auto type = typeid_cast<const DataTypeNullable *>(&data_type))
if (const auto * type = typeid_cast<const DataTypeNullable *>(&data_type))
return std::make_unique<NullableModel>(get(*type->getNestedType(), seed, markov_model_params));
throw Exception("Unsupported data type", ErrorCodes::NOT_IMPLEMENTED);

View File

@ -24,8 +24,8 @@ namespace
query.table_id.table_name = table_name;
query.columns = std::make_shared<ASTExpressionList>(',');
query.children.push_back(query.columns);
for (size_t i = 0; i < columns.size(); ++i)
query.columns->children.emplace_back(std::make_shared<ASTIdentifier>(columns[i].name));
for (const auto & column : columns)
query.columns->children.emplace_back(std::make_shared<ASTIdentifier>(column.name));
std::stringstream ss;
IAST::FormatSettings settings(ss, true);

View File

@ -195,7 +195,7 @@ void HTTPHandler::pushDelayedResults(Output & used_output)
std::vector<ReadBufferPtr> read_buffers;
std::vector<ReadBuffer *> read_buffers_raw_ptr;
auto cascade_buffer = typeid_cast<CascadeWriteBuffer *>(used_output.out_maybe_delayed_and_compressed.get());
auto * cascade_buffer = typeid_cast<CascadeWriteBuffer *>(used_output.out_maybe_delayed_and_compressed.get());
if (!cascade_buffer)
throw Exception("Expected CascadeWriteBuffer", ErrorCodes::LOGICAL_ERROR);
@ -383,7 +383,7 @@ void HTTPHandler::processQuery(
{
auto push_memory_buffer_and_continue = [next_buffer = used_output.out_maybe_compressed] (const WriteBufferPtr & prev_buf)
{
auto prev_memory_buffer = typeid_cast<MemoryWriteBuffer *>(prev_buf.get());
auto * prev_memory_buffer = typeid_cast<MemoryWriteBuffer *>(prev_buf.get());
if (!prev_memory_buffer)
throw Exception("Expected MemoryWriteBuffer", ErrorCodes::LOGICAL_ERROR);

View File

@ -28,7 +28,7 @@ HTTPRequestHandlerFactoryMain::HTTPRequestHandlerFactoryMain(const std::string &
{
}
Poco::Net::HTTPRequestHandler * HTTPRequestHandlerFactoryMain::createRequestHandler(const Poco::Net::HTTPServerRequest & request) // override
Poco::Net::HTTPRequestHandler * HTTPRequestHandlerFactoryMain::createRequestHandler(const Poco::Net::HTTPServerRequest & request)
{
LOG_TRACE(log, "HTTP Request for " << name << ". "
<< "Method: " << request.getMethod()
@ -40,7 +40,7 @@ Poco::Net::HTTPRequestHandler * HTTPRequestHandlerFactoryMain::createRequestHand
for (auto & handler_factory : child_factories)
{
auto handler = handler_factory->createRequestHandler(request);
auto * handler = handler_factory->createRequestHandler(request);
if (handler != nullptr)
return handler;
}
@ -72,80 +72,98 @@ HTTPRequestHandlerFactoryMain::TThis * HTTPRequestHandlerFactoryMain::addHandler
static inline auto createHandlersFactoryFromConfig(IServer & server, const std::string & name, const String & prefix)
{
auto main_handler_factory = new HTTPRequestHandlerFactoryMain(name);
auto main_handler_factory = std::make_unique<HTTPRequestHandlerFactoryMain>(name);
try
Poco::Util::AbstractConfiguration::Keys keys;
server.config().keys(prefix, keys);
for (const auto & key : keys)
{
Poco::Util::AbstractConfiguration::Keys keys;
server.config().keys(prefix, keys);
if (!startsWith(key, "rule"))
throw Exception("Unknown element in config: " + prefix + "." + key + ", must be 'rule'", ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
for (const auto & key : keys)
{
if (!startsWith(key, "rule"))
throw Exception("Unknown element in config: " + prefix + "." + key + ", must be 'rule'", ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
const auto & handler_type = server.config().getString(prefix + "." + key + ".handler.type", "");
const auto & handler_type = server.config().getString(prefix + "." + key + ".handler.type", "");
if (handler_type == "static")
main_handler_factory->addHandler(createStaticHandlerFactory(server, prefix + "." + key));
else if (handler_type == "dynamic_query_handler")
main_handler_factory->addHandler(createDynamicHandlerFactory(server, prefix + "." + key));
else if (handler_type == "predefined_query_handler")
main_handler_factory->addHandler(createPredefinedHandlerFactory(server, prefix + "." + key));
else if (handler_type.empty())
throw Exception("Handler type in config is not specified here: " +
prefix + "." + key + ".handler.type", ErrorCodes::INVALID_CONFIG_PARAMETER);
else
throw Exception("Unknown handler type '" + handler_type +"' in config here: " +
prefix + "." + key + ".handler.type",ErrorCodes::INVALID_CONFIG_PARAMETER);
}
return main_handler_factory;
}
catch (...)
{
delete main_handler_factory;
throw;
if (handler_type == "static")
main_handler_factory->addHandler(createStaticHandlerFactory(server, prefix + "." + key));
else if (handler_type == "dynamic_query_handler")
main_handler_factory->addHandler(createDynamicHandlerFactory(server, prefix + "." + key));
else if (handler_type == "predefined_query_handler")
main_handler_factory->addHandler(createPredefinedHandlerFactory(server, prefix + "." + key));
else if (handler_type.empty())
throw Exception("Handler type in config is not specified here: " +
prefix + "." + key + ".handler.type", ErrorCodes::INVALID_CONFIG_PARAMETER);
else
throw Exception("Unknown handler type '" + handler_type +"' in config here: " +
prefix + "." + key + ".handler.type",ErrorCodes::INVALID_CONFIG_PARAMETER);
}
return main_handler_factory.release();
}
static const auto ping_response_expression = "Ok.\n";
static const auto root_response_expression = "config://http_server_default_response";
static inline Poco::Net::HTTPRequestHandlerFactory * createHTTPHandlerFactory(IServer & server, const std::string & name, AsynchronousMetrics & async_metrics)
static inline Poco::Net::HTTPRequestHandlerFactory * createHTTPHandlerFactory(
IServer & server, const std::string & name, AsynchronousMetrics & async_metrics)
{
if (server.config().has("http_handlers"))
return createHandlersFactoryFromConfig(server, name, "http_handlers");
else
{
auto factory = (new HTTPRequestHandlerFactoryMain(name))
->addHandler((new HandlingRuleHTTPHandlerFactory<StaticRequestHandler>(server, root_response_expression))
->attachStrictPath("/")->allowGetAndHeadRequest())
->addHandler((new HandlingRuleHTTPHandlerFactory<StaticRequestHandler>(server, ping_response_expression))
->attachStrictPath("/ping")->allowGetAndHeadRequest())
->addHandler((new HandlingRuleHTTPHandlerFactory<ReplicasStatusHandler>(server))
->attachNonStrictPath("/replicas_status")->allowGetAndHeadRequest())
->addHandler((new HandlingRuleHTTPHandlerFactory<DynamicQueryHandler>(server, "query"))->allowPostAndGetParamsRequest());
auto factory = std::make_unique<HTTPRequestHandlerFactoryMain>(name);
auto root_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<StaticRequestHandler>>(server, root_response_expression);
root_handler->attachStrictPath("/")->allowGetAndHeadRequest();
factory->addHandler(root_handler.release());
auto ping_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<StaticRequestHandler>>(server, ping_response_expression);
ping_handler->attachStrictPath("/ping")->allowGetAndHeadRequest();
factory->addHandler(ping_handler.release());
auto replicas_status_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<ReplicasStatusHandler>>(server);
replicas_status_handler->attachNonStrictPath("/replicas_status")->allowGetAndHeadRequest();
factory->addHandler(replicas_status_handler.release());
auto query_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<DynamicQueryHandler>>(server, "query");
query_handler->allowPostAndGetParamsRequest();
factory->addHandler(query_handler.release());
/// We check that prometheus handler will be served on current (default) port.
/// Otherwise it will be created separately, see below.
if (server.config().has("prometheus") && server.config().getInt("prometheus.port", 0) == 0)
factory->addHandler((new HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>(
server, PrometheusMetricsWriter(server.config(), "prometheus", async_metrics)))
->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"))->allowGetAndHeadRequest());
{
auto prometheus_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>>(
server, PrometheusMetricsWriter(server.config(), "prometheus", async_metrics));
prometheus_handler->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"))->allowGetAndHeadRequest();
factory->addHandler(prometheus_handler.release());
}
return factory;
return factory.release();
}
}
static inline Poco::Net::HTTPRequestHandlerFactory * createInterserverHTTPHandlerFactory(IServer & server, const std::string & name)
{
return (new HTTPRequestHandlerFactoryMain(name))
->addHandler((new HandlingRuleHTTPHandlerFactory<StaticRequestHandler>(server, root_response_expression))
->attachStrictPath("/")->allowGetAndHeadRequest())
->addHandler((new HandlingRuleHTTPHandlerFactory<StaticRequestHandler>(server, ping_response_expression))
->attachStrictPath("/ping")->allowGetAndHeadRequest())
->addHandler((new HandlingRuleHTTPHandlerFactory<ReplicasStatusHandler>(server))
->attachNonStrictPath("/replicas_status")->allowGetAndHeadRequest())
->addHandler((new HandlingRuleHTTPHandlerFactory<InterserverIOHTTPHandler>(server))->allowPostAndGetParamsRequest());
auto factory = std::make_unique<HTTPRequestHandlerFactoryMain>(name);
auto root_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<StaticRequestHandler>>(server, root_response_expression);
root_handler->attachStrictPath("/")->allowGetAndHeadRequest();
factory->addHandler(root_handler.release());
auto ping_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<StaticRequestHandler>>(server, ping_response_expression);
ping_handler->attachStrictPath("/ping")->allowGetAndHeadRequest();
factory->addHandler(ping_handler.release());
auto replicas_status_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<ReplicasStatusHandler>>(server);
replicas_status_handler->attachNonStrictPath("/replicas_status")->allowGetAndHeadRequest();
factory->addHandler(replicas_status_handler.release());
auto main_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<InterserverIOHTTPHandler>>(server);
main_handler->allowPostAndGetParamsRequest();
factory->addHandler(main_handler.release());
return factory.release();
}
Poco::Net::HTTPRequestHandlerFactory * createHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & name)
@ -155,9 +173,14 @@ Poco::Net::HTTPRequestHandlerFactory * createHandlerFactory(IServer & server, As
else if (name == "InterserverIOHTTPHandler-factory" || name == "InterserverIOHTTPSHandler-factory")
return createInterserverHTTPHandlerFactory(server, name);
else if (name == "PrometheusHandler-factory")
return (new HTTPRequestHandlerFactoryMain(name))->addHandler((new HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>(
server, PrometheusMetricsWriter(server.config(), "prometheus", async_metrics)))
->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"))->allowGetAndHeadRequest());
{
auto factory = std::make_unique<HTTPRequestHandlerFactoryMain>(name);
auto handler = std::make_unique<HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>>(
server, PrometheusMetricsWriter(server.config(), "prometheus", async_metrics));
handler->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"))->allowGetAndHeadRequest();
factory->addHandler(handler.release());
return factory.release();
}
throw Exception("LOGICAL ERROR: Unknown HTTP handler factory name.", ErrorCodes::LOGICAL_ERROR);
}

View File

@ -46,7 +46,7 @@ void ReplicasStatusHandler::handleRequest(Poco::Net::HTTPServerRequest & request
for (auto iterator = db.second->getTablesIterator(); iterator->isValid(); iterator->next())
{
auto & table = iterator->table();
const auto & table = iterator->table();
StorageReplicatedMergeTree * table_replicated = dynamic_cast<StorageReplicatedMergeTree *>(table.get());
if (!table_replicated)

View File

@ -405,6 +405,9 @@
</prometheus>
-->
<!-- Lazy system.*_log table creation -->
<!-- <system_tables_lazy_load>false</system_tables_lazy_load> -->
<!-- Query log. Used only for queries with setting log_queries = 1. -->
<query_log>
<!-- What table to insert data. If table is not exist, it will be created.

View File

@ -4,6 +4,7 @@
#include <AggregateFunctions/IAggregateFunction.h>
#include <Columns/ColumnNullable.h>
#include <Common/assert_cast.h>
#include <Columns/ColumnsCommon.h>
#include <DataTypes/DataTypeNullable.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
@ -53,13 +54,13 @@ protected:
static void initFlag(AggregateDataPtr place) noexcept
{
if (result_is_nullable)
if constexpr (result_is_nullable)
place[0] = 0;
}
static void setFlag(AggregateDataPtr place) noexcept
{
if (result_is_nullable)
if constexpr (result_is_nullable)
place[0] = 1;
}
@ -72,7 +73,7 @@ public:
AggregateFunctionNullBase(AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params)
: IAggregateFunctionHelper<Derived>(arguments, params), nested_function{nested_function_}
{
if (result_is_nullable)
if constexpr (result_is_nullable)
prefix_size = nested_function->alignOfData();
else
prefix_size = 0;
@ -128,7 +129,7 @@ public:
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
{
bool flag = getFlag(place);
if (result_is_nullable)
if constexpr (result_is_nullable)
writeBinary(flag, buf);
if (flag)
nested_function->serialize(nestedPlace(place), buf);
@ -137,7 +138,7 @@ public:
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
{
bool flag = 1;
if (result_is_nullable)
if constexpr (result_is_nullable)
readBinary(flag, buf);
if (flag)
{
@ -148,7 +149,7 @@ public:
void insertResultInto(AggregateDataPtr place, IColumn & to) const override
{
if (result_is_nullable)
if constexpr (result_is_nullable)
{
ColumnNullable & to_concrete = assert_cast<ColumnNullable &>(to);
if (getFlag(place))
@ -194,13 +195,26 @@ public:
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
const ColumnNullable * column = assert_cast<const ColumnNullable *>(columns[0]);
const IColumn * nested_column = &column->getNestedColumn();
if (!column->isNullAt(row_num))
{
this->setFlag(place);
const IColumn * nested_column = &column->getNestedColumn();
this->nested_function->add(this->nestedPlace(place), &nested_column, row_num, arena);
}
}
void addBatchSinglePlace(size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena) const override
{
const ColumnNullable * column = assert_cast<const ColumnNullable *>(columns[0]);
const IColumn * nested_column = &column->getNestedColumn();
const UInt8 * null_map = column->getNullMapData().data();
this->nested_function->addBatchSinglePlaceNotNull(batch_size, this->nestedPlace(place), &nested_column, null_map, arena);
if constexpr (result_is_nullable)
if (!memoryIsByte(null_map, batch_size, 1))
this->setFlag(place);
}
};

View File

@ -20,11 +20,72 @@ struct AggregateFunctionSumData
{
T sum{};
void add(T value)
void ALWAYS_INLINE add(T value)
{
sum += value;
}
/// Vectorized version
template <typename Value>
void NO_INLINE addMany(const Value * __restrict ptr, size_t count)
{
/// Compiler cannot unroll this loop, do it manually.
/// (at least for floats, most likely due to the lack of -fassociative-math)
/// Something around the number of SSE registers * the number of elements fit in register.
constexpr size_t unroll_count = 128 / sizeof(T);
T partial_sums[unroll_count]{};
const auto * end = ptr + count;
const auto * unrolled_end = ptr + (count / unroll_count * unroll_count);
while (ptr < unrolled_end)
{
for (size_t i = 0; i < unroll_count; ++i)
partial_sums[i] += ptr[i];
ptr += unroll_count;
}
for (size_t i = 0; i < unroll_count; ++i)
sum += partial_sums[i];
while (ptr < end)
{
sum += *ptr;
++ptr;
}
}
template <typename Value>
void NO_INLINE addManyNotNull(const Value * __restrict ptr, const UInt8 * __restrict null_map, size_t count)
{
constexpr size_t unroll_count = 128 / sizeof(T);
T partial_sums[unroll_count]{};
const auto * end = ptr + count;
const auto * unrolled_end = ptr + (count / unroll_count * unroll_count);
while (ptr < unrolled_end)
{
for (size_t i = 0; i < unroll_count; ++i)
if (!null_map[i])
partial_sums[i] += ptr[i];
ptr += unroll_count;
null_map += unroll_count;
}
for (size_t i = 0; i < unroll_count; ++i)
sum += partial_sums[i];
while (ptr < end)
{
if (!*null_map)
sum += *ptr;
++ptr;
++null_map;
}
}
void merge(const AggregateFunctionSumData & rhs)
{
sum += rhs.sum;
@ -55,21 +116,95 @@ struct AggregateFunctionSumKahanData
T sum{};
T compensation{};
void add(T value)
template <typename Value>
void ALWAYS_INLINE addImpl(Value value, T & out_sum, T & out_compensation)
{
auto compensated_value = value - compensation;
auto new_sum = sum + compensated_value;
compensation = (new_sum - sum) - compensated_value;
sum = new_sum;
auto compensated_value = value - out_compensation;
auto new_sum = out_sum + compensated_value;
out_compensation = (new_sum - out_sum) - compensated_value;
out_sum = new_sum;
}
void ALWAYS_INLINE add(T value)
{
addImpl(value, sum, compensation);
}
/// Vectorized version
template <typename Value>
void NO_INLINE addMany(const Value * __restrict ptr, size_t count)
{
/// Less than in ordinary sum, because the algorithm is more complicated and too large loop unrolling is questionable.
/// But this is just a guess.
constexpr size_t unroll_count = 4;
T partial_sums[unroll_count]{};
T partial_compensations[unroll_count]{};
const auto * end = ptr + count;
const auto * unrolled_end = ptr + (count / unroll_count * unroll_count);
while (ptr < unrolled_end)
{
for (size_t i = 0; i < unroll_count; ++i)
addImpl(ptr[i], partial_sums[i], partial_compensations[i]);
ptr += unroll_count;
}
for (size_t i = 0; i < unroll_count; ++i)
mergeImpl(sum, compensation, partial_sums[i], partial_compensations[i]);
while (ptr < end)
{
addImpl(*ptr, sum, compensation);
++ptr;
}
}
template <typename Value>
void NO_INLINE addManyNotNull(const Value * __restrict ptr, const UInt8 * __restrict null_map, size_t count)
{
constexpr size_t unroll_count = 4;
T partial_sums[unroll_count]{};
T partial_compensations[unroll_count]{};
const auto * end = ptr + count;
const auto * unrolled_end = ptr + (count / unroll_count * unroll_count);
while (ptr < unrolled_end)
{
for (size_t i = 0; i < unroll_count; ++i)
if (!null_map[i])
addImpl(ptr[i], partial_sums[i], partial_compensations[i]);
ptr += unroll_count;
null_map += unroll_count;
}
for (size_t i = 0; i < unroll_count; ++i)
mergeImpl(sum, compensation, partial_sums[i], partial_compensations[i]);
while (ptr < end)
{
if (!*null_map)
addImpl(*ptr, sum, compensation);
++ptr;
++null_map;
}
}
void ALWAYS_INLINE mergeImpl(T & to_sum, T & to_compensation, T from_sum, T from_compensation)
{
auto raw_sum = to_sum + from_sum;
auto rhs_compensated = raw_sum - to_sum;
/// Kahan summation is tricky because it depends on non-associativity of float arithmetic.
/// Do not simplify this expression if you are not sure.
auto compensations = ((from_sum - rhs_compensated) + (to_sum - (raw_sum - rhs_compensated))) + compensation + from_compensation;
to_sum = raw_sum + compensations;
to_compensation = compensations - (to_sum - raw_sum);
}
void merge(const AggregateFunctionSumKahanData & rhs)
{
auto raw_sum = sum + rhs.sum;
auto rhs_compensated = raw_sum - sum;
auto compensations = ((rhs.sum - rhs_compensated) + (sum - (raw_sum - rhs_compensated))) + compensation + rhs.compensation;
sum = raw_sum + compensations;
compensation = compensations - (sum - raw_sum);
mergeImpl(sum, compensation, rhs.sum, rhs.compensation);
}
void write(WriteBuffer & buf) const
@ -141,6 +276,20 @@ public:
this->data(place).add(column.getData()[row_num]);
}
/// Vectorized version when there is no GROUP BY keys.
void addBatchSinglePlace(size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena *) const override
{
const auto & column = static_cast<const ColVecType &>(*columns[0]);
this->data(place).addMany(column.getData().data(), batch_size);
}
void addBatchSinglePlaceNotNull(
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, const UInt8 * null_map, Arena *) const override
{
const auto & column = static_cast<const ColVecType &>(*columns[0]);
this->data(place).addManyNotNull(column.getData().data(), null_map, batch_size);
}
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
{
this->data(place).merge(this->data(rhs));

View File

@ -145,6 +145,11 @@ public:
*/
virtual void addBatchSinglePlace(size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena) const = 0;
/** The same for single place when need to aggregate only filtered data.
*/
virtual void addBatchSinglePlaceNotNull(
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, const UInt8 * null_map, Arena * arena) const = 0;
/** In addition to addBatch, this method collects multiple rows of arguments into array "places"
* as long as they are between offsets[i-1] and offsets[i]. This is used for arrayReduce and
* -Array combinator. It might also be used generally to break data dependency when array
@ -201,6 +206,14 @@ public:
static_cast<const Derived *>(this)->add(place, columns, i, arena);
}
void addBatchSinglePlaceNotNull(
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, const UInt8 * null_map, Arena * arena) const override
{
for (size_t i = 0; i < batch_size; ++i)
if (!null_map[i])
static_cast<const Derived *>(this)->add(place, columns, i, arena);
}
void addBatchArray(
size_t batch_size, AggregateDataPtr * places, size_t place_offset, const IColumn ** columns, const UInt64 * offsets, Arena * arena)
const override

View File

@ -56,8 +56,8 @@ int main(int, char **)
MutableColumnPtr mut = IColumn::mutate(std::move(y));
mut->set(2);
std::cerr << "refcounts: " << x->use_count() << ", " << y->use_count() << ", " << mut->use_count() << "\n";
std::cerr << "addresses: " << x.get() << ", " << y.get() << ", " << mut.get() << "\n";
std::cerr << "refcounts: " << x->use_count() << ", " << mut->use_count() << "\n";
std::cerr << "addresses: " << x.get() << ", " << mut.get() << "\n";
y = std::move(mut);
}
@ -75,8 +75,8 @@ int main(int, char **)
MutableColumnPtr mut = IColumn::mutate(std::move(y));
mut->set(3);
std::cerr << "refcounts: " << x->use_count() << ", " << y->use_count() << ", " << mut->use_count() << "\n";
std::cerr << "addresses: " << x.get() << ", " << y.get() << ", " << mut.get() << "\n";
std::cerr << "refcounts: " << x->use_count() << ", " << mut->use_count() << "\n";
std::cerr << "addresses: " << x.get() << ", " << mut.get() << "\n";
y = std::move(mut);
}

View File

@ -75,8 +75,8 @@ int main(int, char **)
MutableColumnPtr mut = IColumn::mutate(std::move(y));
mut->set(2);
std::cerr << "refcounts: " << x->use_count() << ", " << y->use_count() << ", " << mut->use_count() << "\n";
std::cerr << "addresses: " << x.get() << ", " << y.get() << ", " << mut.get() << "\n";
std::cerr << "refcounts: " << x->use_count() << ", " << mut->use_count() << "\n";
std::cerr << "addresses: " << x.get() << ", " << mut.get() << "\n";
y = std::move(mut);
}
@ -94,8 +94,8 @@ int main(int, char **)
MutableColumnPtr mut = IColumn::mutate(std::move(y));
mut->set(3);
std::cerr << "refcounts: " << x->use_count() << ", " << y->use_count() << ", " << mut->use_count() << "\n";
std::cerr << "addresses: " << x.get() << ", " << y.get() << ", " << mut.get() << "\n";
std::cerr << "refcounts: " << x->use_count() << ", " << mut->use_count() << "\n";
std::cerr << "addresses: " << x.get() << ", " << ", " << mut.get() << "\n";
y = std::move(mut);
}

View File

@ -52,6 +52,8 @@ struct Settings : public SettingsCollection<Settings>
M(SettingUInt64, max_insert_block_size, DEFAULT_INSERT_BLOCK_SIZE, "The maximum block size for insertion, if we control the creation of blocks for insertion.", 0) \
M(SettingUInt64, min_insert_block_size_rows, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough.", 0) \
M(SettingUInt64, min_insert_block_size_bytes, (DEFAULT_INSERT_BLOCK_SIZE * 256), "Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough.", 0) \
M(SettingUInt64, min_insert_block_size_rows_for_materialized_views, 0, "Like min_insert_block_size_rows, but applied only during pushing to MATERIALIZED VIEW (default: min_insert_block_size_rows)", 0) \
M(SettingUInt64, min_insert_block_size_bytes_for_materialized_views, 0, "Like min_insert_block_size_bytes, but applied only during pushing to MATERIALIZED VIEW (default: min_insert_block_size_bytes)", 0) \
M(SettingUInt64, max_joined_block_size_rows, DEFAULT_BLOCK_SIZE, "Maximum block size for JOIN result (if join algorithm supports it). 0 means unlimited.", 0) \
M(SettingUInt64, max_insert_threads, 0, "The maximum number of threads to execute the INSERT SELECT query. Values 0 or 1 means that INSERT SELECT is not run in parallel. Higher values will lead to higher memory usage. Parallel INSERT SELECT has effect only if the SELECT part is run on parallel, see 'max_threads' setting.", 0) \
M(SettingUInt64, max_final_threads, 16, "The maximum number of threads to read from table with FINAL.", 0) \

View File

@ -40,10 +40,20 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
/// We need special context for materialized views insertions
if (!dependencies.empty())
{
views_context = std::make_unique<Context>(context);
select_context = std::make_unique<Context>(context);
insert_context = std::make_unique<Context>(context);
const auto & insert_settings = insert_context->getSettingsRef();
// Do not deduplicate insertions into MV if the main insertion is Ok
if (disable_deduplication_for_children)
views_context->setSetting("insert_deduplicate", false);
insert_context->setSetting("insert_deduplicate", false);
// Separate min_insert_block_size_rows/min_insert_block_size_bytes for children
if (insert_settings.min_insert_block_size_rows_for_materialized_views.changed)
insert_context->setSetting("min_insert_block_size_rows", insert_settings.min_insert_block_size_rows_for_materialized_views.value);
if (insert_settings.min_insert_block_size_bytes_for_materialized_views.changed)
insert_context->setSetting("min_insert_block_size_bytes", insert_settings.min_insert_block_size_bytes_for_materialized_views.value);
}
for (const auto & database_table : dependencies)
@ -67,7 +77,7 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
insert->table_id = inner_table_id;
/// Get list of columns we get from select query.
auto header = InterpreterSelectQuery(query, *views_context, SelectQueryOptions().analyze())
auto header = InterpreterSelectQuery(query, *select_context, SelectQueryOptions().analyze())
.getSampleBlock();
/// Insert only columns returned by select.
@ -81,14 +91,14 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
insert->columns = std::move(list);
ASTPtr insert_query_ptr(insert.release());
InterpreterInsertQuery interpreter(insert_query_ptr, *views_context);
InterpreterInsertQuery interpreter(insert_query_ptr, *insert_context);
BlockIO io = interpreter.execute();
out = io.out;
}
else if (dynamic_cast<const StorageLiveView *>(dependent_table.get()))
out = std::make_shared<PushingToViewsBlockOutputStream>(dependent_table, *views_context, ASTPtr(), true);
out = std::make_shared<PushingToViewsBlockOutputStream>(dependent_table, *insert_context, ASTPtr(), true);
else
out = std::make_shared<PushingToViewsBlockOutputStream>(dependent_table, *views_context, ASTPtr());
out = std::make_shared<PushingToViewsBlockOutputStream>(dependent_table, *insert_context, ASTPtr());
views.emplace_back(ViewInfo{std::move(query), database_table, std::move(out), nullptr});
}
@ -258,7 +268,7 @@ void PushingToViewsBlockOutputStream::process(const Block & block, size_t view_n
/// but it will contain single block (that is INSERT-ed into main table).
/// InterpreterSelectQuery will do processing of alias columns.
Context local_context = *views_context;
Context local_context = *select_context;
local_context.addViewSource(
StorageValues::create(
storage->getStorageID(), storage->getColumns(), block, storage->getVirtuals()));

View File

@ -44,7 +44,8 @@ private:
};
std::vector<ViewInfo> views;
std::unique_ptr<Context> views_context;
std::unique_ptr<Context> select_context;
std::unique_ptr<Context> insert_context;
void process(const Block & block, size_t view_num);
};

View File

@ -376,8 +376,10 @@ void registerDataTypeString(DataTypeFactory & factory)
/// These synonyms are added for compatibility.
factory.registerAlias("CHAR", "String", DataTypeFactory::CaseInsensitive);
factory.registerAlias("NCHAR", "String", DataTypeFactory::CaseInsensitive);
factory.registerAlias("CHARACTER", "String", DataTypeFactory::CaseInsensitive);
factory.registerAlias("VARCHAR", "String", DataTypeFactory::CaseInsensitive);
factory.registerAlias("NVARCHAR", "String", DataTypeFactory::CaseInsensitive);
factory.registerAlias("VARCHAR2", "String", DataTypeFactory::CaseInsensitive); /// Oracle
factory.registerAlias("TEXT", "String", DataTypeFactory::CaseInsensitive);
factory.registerAlias("TINYTEXT", "String", DataTypeFactory::CaseInsensitive);

View File

@ -7,6 +7,11 @@
#include <Poco/Net/HTTPResponse.h>
#include <common/logger_useful.h>
namespace DB::ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
namespace DB::S3
{
ProxyResolverConfiguration::ProxyResolverConfiguration(const Poco::URI & endpoint_, String proxy_scheme_, unsigned proxy_port_)
@ -30,13 +35,16 @@ Aws::Client::ClientConfigurationPerRequest ProxyResolverConfiguration::getConfig
Aws::Client::ClientConfigurationPerRequest cfg;
try
{
/// It should be just empty GET / request.
Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_1_1);
/// It should be just empty GET request.
Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, endpoint.getPath(), Poco::Net::HTTPRequest::HTTP_1_1);
session->sendRequest(request);
Poco::Net::HTTPResponse response;
auto & response_body_stream = session->receiveResponse(response);
if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK)
throw Exception("Proxy resolver returned not OK status: " + response.getReason(), ErrorCodes::BAD_ARGUMENTS);
String proxy_host;
/// Read proxy host as string from response body.
Poco::StreamCopier::copyToString(response_body_stream, proxy_host);

View File

@ -6,7 +6,7 @@ namespace DB::S3
{
/**
* Proxy configuration where proxy host is obtained each time from specified endpoint.
* For each request to S3 it makes GET request to specified endpoint and reads proxy host from a response body.
* For each request to S3 it makes GET request to specified endpoint URL and reads proxy host from a response body.
* Specified scheme and port added to obtained proxy host to form completed proxy URL.
*/
class ProxyResolverConfiguration : public ProxyConfiguration

View File

@ -37,13 +37,14 @@ namespace
void checkRemoveAccess(IDisk & disk) { disk.remove("test_acl"); }
std::shared_ptr<S3::ProxyResolverConfiguration> getProxyResolverConfiguration(const Poco::Util::AbstractConfiguration * proxy_resolver_config)
std::shared_ptr<S3::ProxyResolverConfiguration> getProxyResolverConfiguration(
const String & prefix, const Poco::Util::AbstractConfiguration & proxy_resolver_config)
{
auto endpoint = Poco::URI(proxy_resolver_config->getString("endpoint"));
auto proxy_scheme = proxy_resolver_config->getString("proxy_scheme");
auto endpoint = Poco::URI(proxy_resolver_config.getString(prefix + ".endpoint"));
auto proxy_scheme = proxy_resolver_config.getString(prefix + ".proxy_scheme");
if (proxy_scheme != "http" && proxy_scheme != "https")
throw Exception("Only HTTP/HTTPS schemas allowed in proxy resolver config: " + proxy_scheme, ErrorCodes::BAD_ARGUMENTS);
auto proxy_port = proxy_resolver_config->getUInt("proxy_port");
auto proxy_port = proxy_resolver_config.getUInt(prefix + ".proxy_port");
LOG_DEBUG(
&Logger::get("DiskS3"), "Configured proxy resolver: " << endpoint.toString() << ", Scheme: " << proxy_scheme << ", Port: " << proxy_port);
@ -51,16 +52,17 @@ namespace
return std::make_shared<S3::ProxyResolverConfiguration>(endpoint, proxy_scheme, proxy_port);
}
std::shared_ptr<S3::ProxyListConfiguration> getProxyListConfiguration(const Poco::Util::AbstractConfiguration * proxy_config)
std::shared_ptr<S3::ProxyListConfiguration> getProxyListConfiguration(
const String & prefix, const Poco::Util::AbstractConfiguration & proxy_config)
{
std::vector<String> keys;
proxy_config->keys(keys);
proxy_config.keys(prefix, keys);
std::vector<Poco::URI> proxies;
for (const auto & key : keys)
if (startsWith(key, "uri"))
{
Poco::URI proxy_uri(proxy_config->getString(key));
Poco::URI proxy_uri(proxy_config.getString(prefix + "." + key));
if (proxy_uri.getScheme() != "http" && proxy_uri.getScheme() != "https")
throw Exception("Only HTTP/HTTPS schemas allowed in proxy uri: " + proxy_uri.toString(), ErrorCodes::BAD_ARGUMENTS);
@ -78,25 +80,23 @@ namespace
return nullptr;
}
std::shared_ptr<S3::ProxyConfiguration> getProxyConfiguration(const Poco::Util::AbstractConfiguration * config)
std::shared_ptr<S3::ProxyConfiguration> getProxyConfiguration(const String & prefix, const Poco::Util::AbstractConfiguration & config)
{
if (!config->has("proxy"))
if (!config.has(prefix + ".proxy"))
return nullptr;
const auto * proxy_config = config->createView("proxy");
std::vector<String> config_keys;
proxy_config->keys(config_keys);
config.keys(prefix + ".proxy", config_keys);
if (auto resolver_configs = std::count(config_keys.begin(), config_keys.end(), "resolver"))
{
if (resolver_configs > 1)
throw Exception("Multiple proxy resolver configurations aren't allowed", ErrorCodes::BAD_ARGUMENTS);
return getProxyResolverConfiguration(proxy_config->createView("resolver"));
return getProxyResolverConfiguration(prefix + ".proxy.resolver", config);
}
return getProxyListConfiguration(proxy_config);
return getProxyListConfiguration(prefix + ".proxy", config);
}
}
@ -107,27 +107,25 @@ void registerDiskS3(DiskFactory & factory)
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix,
const Context & context) -> DiskPtr {
const auto * disk_config = config.createView(config_prefix);
Poco::File disk{context.getPath() + "disks/" + name};
disk.createDirectories();
Aws::Client::ClientConfiguration cfg;
S3::URI uri(Poco::URI(disk_config->getString("endpoint")));
S3::URI uri(Poco::URI(config.getString(config_prefix + ".endpoint")));
if (uri.key.back() != '/')
throw Exception("S3 path must ends with '/', but '" + uri.key + "' doesn't.", ErrorCodes::BAD_ARGUMENTS);
cfg.endpointOverride = uri.endpoint;
auto proxy_config = getProxyConfiguration(disk_config);
auto proxy_config = getProxyConfiguration(config_prefix, config);
if (proxy_config)
cfg.perRequestConfiguration = [proxy_config](const auto & request) { return proxy_config->getConfiguration(request); };
auto client = S3::ClientFactory::instance().create(
cfg,
disk_config->getString("access_key_id", ""),
disk_config->getString("secret_access_key", ""));
config.getString(config_prefix + ".access_key_id", ""),
config.getString(config_prefix + ".secret_access_key", ""));
String metadata_path = context.getPath() + "disks/" + name + "/";

View File

@ -44,7 +44,7 @@ namespace
{
template <typename Polygon, typename PointInPolygonImpl>
ColumnPtr callPointInPolygonImplWithPool(const IColumn & x, const IColumn & y, Polygon & polygon)
UInt8 callPointInPolygonImplWithPool(Float64 x, Float64 y, Polygon & polygon)
{
using Pool = ObjectPoolMap<PointInPolygonImpl, std::string>;
/// C++11 has thread-safe function-local statics on most modern compilers.
@ -63,19 +63,19 @@ ColumnPtr callPointInPolygonImplWithPool(const IColumn & x, const IColumn & y, P
std::string serialized_polygon = serialize(polygon);
auto impl = known_polygons.get(serialized_polygon, factory);
return pointInPolygon(x, y, *impl);
return impl->contains(x, y);
}
template <typename Polygon, typename PointInPolygonImpl>
ColumnPtr callPointInPolygonImpl(const IColumn & x, const IColumn & y, Polygon & polygon)
UInt8 callPointInPolygonImpl(Float64 x, Float64 y, Polygon & polygon)
{
PointInPolygonImpl impl(polygon);
return pointInPolygon(x, y, impl);
return impl.contains(x, y);
}
}
template <typename PointInPolygonImpl, bool use_object_pool>
template <typename PointInConstPolygonImpl, typename PointInNonConstPolygonImpl>
class FunctionPointInPolygon : public IFunction
{
public:
@ -91,7 +91,8 @@ public:
static FunctionPtr create(const Context & context)
{
return std::make_shared<FunctionPointInPolygon<PointInPolygonImpl, use_object_pool>>(context.getSettingsRef().validate_polygons);
return std::make_shared<FunctionPointInPolygon<PointInConstPolygonImpl, PointInNonConstPolygonImpl>>(
context.getSettingsRef().validate_polygons);
}
String getName() const override
@ -116,74 +117,192 @@ public:
throw Exception("Too few arguments", ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION);
}
auto get_message_prefix = [this](size_t i) { return "Argument " + toString(i + 1) + " for function " + getName(); };
for (size_t i = 1; i < arguments.size(); ++i)
auto validate_tuple = [this](size_t i, const DataTypeTuple * tuple)
{
const auto * array = checkAndGetDataType<DataTypeArray>(arguments[i].get());
if (array == nullptr && i != 1)
throw Exception(get_message_prefix(i) + " must be array of tuples.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
const auto * tuple = checkAndGetDataType<DataTypeTuple>(array ? array->getNestedType().get() : arguments[i].get());
if (tuple == nullptr)
throw Exception(get_message_prefix(i) + " must contains tuple.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
throw Exception(getMessagePrefix(i) + " must contain a tuple", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
const DataTypes & elements = tuple->getElements();
if (elements.size() != 2)
throw Exception(get_message_prefix(i) + " must have exactly two elements.", ErrorCodes::BAD_ARGUMENTS);
throw Exception(getMessagePrefix(i) + " must have exactly two elements", ErrorCodes::BAD_ARGUMENTS);
for (auto j : ext::range(0, elements.size()))
{
if (!isNativeNumber(elements[j]))
{
throw Exception(get_message_prefix(i) + " must contains numeric tuple at position " + toString(j + 1),
throw Exception(getMessagePrefix(i) + " must contain numeric tuple at position " + toString(j + 1),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
}
};
validate_tuple(0, checkAndGetDataType<DataTypeTuple>(arguments[0].get()));
if (arguments.size() == 2)
{
const auto * array = checkAndGetDataType<DataTypeArray>(arguments[1].get());
if (array == nullptr)
throw Exception(getMessagePrefix(1) + " must contain an array of tuples or an array of arrays of tuples.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
const auto * nested_array = checkAndGetDataType<DataTypeArray>(array->getNestedType().get());
if (nested_array != nullptr)
{
array = nested_array;
}
validate_tuple(1, checkAndGetDataType<DataTypeTuple>(array->getNestedType().get()));
}
else
{
for (size_t i = 1; i < arguments.size(); i++)
{
const auto * array = checkAndGetDataType<DataTypeArray>(arguments[i].get());
if (array == nullptr)
throw Exception(getMessagePrefix(i) + " must contain an array of tuples",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
validate_tuple(i, checkAndGetDataType<DataTypeTuple>(array->getNestedType().get()));
}
}
return std::make_shared<DataTypeUInt8>();
}
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
{
const IColumn * point_col = block.getByPosition(arguments[0]).column.get();
const auto * const_tuple_col = checkAndGetColumn<ColumnConst>(point_col);
if (const_tuple_col)
point_col = &const_tuple_col->getDataColumn();
const auto * tuple_col = checkAndGetColumn<ColumnTuple>(point_col);
const auto * tuple_col = checkAndGetColumn<ColumnTuple>(point_col);
if (!tuple_col)
throw Exception("First argument for function " + getName() + " must be constant array of tuples.",
ErrorCodes::ILLEGAL_COLUMN);
auto & result_column = block.safeGetByPosition(result).column;
const auto & tuple_columns = tuple_col->getColumns();
result_column = executeForType(*tuple_columns[0], *tuple_columns[1], block, arguments);
if (const_tuple_col)
const IColumn * poly_col = block.getByPosition(arguments[1]).column.get();
const auto * const_poly_col = checkAndGetColumn<ColumnConst>(poly_col);
bool point_is_const = const_tuple_col != nullptr;
bool poly_is_const = const_poly_col != nullptr;
auto call_impl = poly_is_const
? callPointInPolygonImplWithPool<Polygon, PointInConstPolygonImpl>
: callPointInPolygonImpl<Polygon, PointInNonConstPolygonImpl>;
size_t size = point_is_const && poly_is_const ? 1 : input_rows_count;
auto execution_result = ColumnVector<UInt8>::create(size);
auto & data = execution_result->getData();
Polygon polygon;
for (auto i : ext::range(0, size))
{
if (!poly_is_const || i == 0)
{
polygon = parsePolygon(block, arguments, i);
}
size_t point_index = point_is_const ? 0 : i;
data[i] = call_impl(tuple_columns[0]->getFloat64(point_index), tuple_columns[1]->getFloat64(point_index), polygon);
}
auto & result_column = block.safeGetByPosition(result).column;
result_column = std::move(execution_result);
if (point_is_const && poly_is_const)
result_column = ColumnConst::create(result_column, const_tuple_col->size());
}
private:
bool validate;
ColumnPtr executeForType(const IColumn & x, const IColumn & y, Block & block, const ColumnNumbers & arguments)
std::string getMessagePrefix(size_t i) const
{
return "Argument " + toString(i + 1) + " for function " + getName();
}
Polygon parsePolygonFromSingleColumn(Block & block, const ColumnNumbers & arguments, size_t i) const
{
const auto & poly = block.getByPosition(arguments[1]).column.get();
const auto * column_const = checkAndGetColumn<ColumnConst>(poly);
const auto * array_col =
column_const ? checkAndGetColumn<ColumnArray>(column_const->getDataColumn()) : checkAndGetColumn<ColumnArray>(poly);
if (!array_col)
throw Exception(getMessagePrefix(1) + " must contain an array of tuples or an array of arrays of tuples",
ErrorCodes::ILLEGAL_COLUMN);
const auto * nested_array_col = checkAndGetColumn<ColumnArray>(array_col->getData());
const auto & tuple_data = nested_array_col ? nested_array_col->getData() : array_col->getData();
const auto & tuple_col = checkAndGetColumn<ColumnTuple>(tuple_data);
if (!tuple_col)
throw Exception(getMessagePrefix(1) + " must contain an array of tuples or an array of arrays of tuples",
ErrorCodes::ILLEGAL_COLUMN);
const auto & tuple_columns = tuple_col->getColumns();
const auto & x_column = tuple_columns[0];
const auto & y_column = tuple_columns[1];
auto parse_polygon_part = [&x_column, &y_column](auto & container, size_t l, size_t r)
{
for (auto j : ext::range(l, r))
{
CoordinateType x_coord = x_column->getFloat64(j);
CoordinateType y_coord = y_column->getFloat64(j);
container.push_back(Point(x_coord, y_coord));
}
};
Polygon polygon;
if (nested_array_col)
{
for (auto j : ext::range(array_col->getOffsets()[i - 1], array_col->getOffsets()[i]))
{
size_t l = nested_array_col->getOffsets()[j - 1];
size_t r = nested_array_col->getOffsets()[j];
if (polygon.outer().empty())
{
parse_polygon_part(polygon.outer(), l, r);
}
else
{
polygon.inners().emplace_back();
parse_polygon_part(polygon.inners().back(), l, r);
}
}
}
else
{
size_t l = array_col->getOffsets()[i - 1];
size_t r = array_col->getOffsets()[i];
parse_polygon_part(polygon.outer(), l, r);
}
return polygon;
}
Polygon parsePolygonFromMultipleColumns(Block & block, const ColumnNumbers & arguments, size_t) const
{
Polygon polygon;
auto get_message_prefix = [this](size_t i) { return "Argument " + toString(i + 1) + " for function " + getName(); };
for (size_t i = 1; i < arguments.size(); ++i)
{
const auto * const_col = checkAndGetColumn<ColumnConst>(block.getByPosition(arguments[i]).column.get());
const auto * array_col = const_col ? checkAndGetColumn<ColumnArray>(&const_col->getDataColumn()) : nullptr;
if (!const_col)
throw Exception("Multi-argument version of function " + getName() + " works only with const polygon",
ErrorCodes::BAD_ARGUMENTS);
const auto * array_col = checkAndGetColumn<ColumnArray>(&const_col->getDataColumn());
const auto * tuple_col = array_col ? checkAndGetColumn<ColumnTuple>(&array_col->getData()) : nullptr;
if (!tuple_col)
throw Exception(get_message_prefix(i) + " must be constant array of tuples.", ErrorCodes::ILLEGAL_COLUMN);
throw Exception(getMessagePrefix(i) + " must be constant array of tuples", ErrorCodes::ILLEGAL_COLUMN);
const auto & tuple_columns = tuple_col->getColumns();
const auto & column_x = tuple_columns[0];
@ -197,7 +316,7 @@ private:
auto size = column_x->size();
if (size == 0)
throw Exception(get_message_prefix(i) + " shouldn't be empty.", ErrorCodes::ILLEGAL_COLUMN);
throw Exception(getMessagePrefix(i) + " shouldn't be empty.", ErrorCodes::ILLEGAL_COLUMN);
for (auto j : ext::range(0, size))
{
@ -207,6 +326,21 @@ private:
}
}
return polygon;
}
Polygon parsePolygon(Block & block, const ColumnNumbers & arguments, size_t i) const
{
Polygon polygon;
if (arguments.size() == 2)
{
polygon = parsePolygonFromSingleColumn(block, arguments, i);
}
else
{
polygon = parsePolygonFromMultipleColumns(block, arguments, i);
}
boost::geometry::correct(polygon);
#if !defined(__clang_analyzer__) /// It does not like boost.
@ -218,19 +352,14 @@ private:
throw Exception("Polygon is not valid: " + failure_message, ErrorCodes::BAD_ARGUMENTS);
}
#endif
auto call_impl = use_object_pool
? callPointInPolygonImplWithPool<Polygon, PointInPolygonImpl>
: callPointInPolygonImpl<Polygon, PointInPolygonImpl>;
return call_impl(x, y, polygon);
return polygon;
}
};
void registerFunctionPointInPolygon(FunctionFactory & factory)
{
factory.registerFunction<FunctionPointInPolygon<PointInPolygonWithGrid<Float64>, true>>();
factory.registerFunction<FunctionPointInPolygon<PointInPolygonWithGrid<Float64>, PointInPolygonTrivial<Float64>>>();
}
}

View File

@ -471,7 +471,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
argument_types.push_back(column.type);
argument_names.push_back(column.name);
}
else if (identifier && node.name == "joinGet" && arg == 0)
else if (identifier && (functionIsJoinGet(node.name) || functionIsDictGet(node.name)) && arg == 0)
{
auto table_id = IdentifierSemantic::extractDatabaseAndTable(*identifier);
table_id = data.context.resolveStorageID(table_id, Context::ResolveOrdinary);
@ -480,7 +480,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
ColumnWithTypeAndName column(
ColumnConst::create(std::move(column_string), 1),
std::make_shared<DataTypeString>(),
data.getUniqueName("__joinGet"));
data.getUniqueName("__" + node.name));
data.addAction(ExpressionAction::addColumn(column));
argument_types.push_back(column.type);
argument_names.push_back(column.name);

View File

@ -1025,6 +1025,12 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
chain.clear();
};
if (storage)
{
query_analyzer.makeSetsForIndex(query.where());
query_analyzer.makeSetsForIndex(query.prewhere());
}
{
ExpressionActionsChain chain(context);
Names additional_required_columns_after_prewhere;

View File

@ -243,8 +243,6 @@ public:
const NamesAndTypesList & aggregationKeys() const { return aggregation_keys; }
const AggregateDescriptions & aggregates() const { return aggregate_descriptions; }
/// Create Set-s that we make from IN section to use index on them.
void makeSetsForIndex(const ASTPtr & node);
const PreparedSets & getPreparedSets() const { return prepared_sets; }
/// Tables that will need to be sent to remote servers for distributed query processing.
@ -275,6 +273,9 @@ private:
*/
SetPtr isPlainStorageSetInSubquery(const ASTPtr & subquery_or_table_name);
/// Create Set-s that we make from IN section to use index on them.
void makeSetsForIndex(const ASTPtr & node);
JoinPtr makeTableJoin(const ASTTablesInSelectQueryElement & join_element);
const ASTSelectQuery * getAggregatingQuery() const;

View File

@ -309,12 +309,29 @@ InterpreterSelectQuery::InterpreterSelectQuery(
ASTSelectQuery & query = getSelectQuery();
std::shared_ptr<TableJoin> table_join = joined_tables.makeTableJoin(query);
auto analyze = [&] (bool try_move_to_prewhere = true)
ASTPtr row_policy_filter;
if (storage)
row_policy_filter = context->getRowPolicyCondition(table_id.getDatabaseName(), table_id.getTableName(), RowPolicy::SELECT_FILTER);
auto analyze = [&] (bool try_move_to_prewhere)
{
syntax_analyzer_result = SyntaxAnalyzer(*context).analyzeSelect(
query_ptr, SyntaxAnalyzerResult(source_header.getNamesAndTypesList(), storage),
options, joined_tables.tablesWithColumns(), required_result_column_names, table_join);
if (try_move_to_prewhere && storage && !row_policy_filter && query.where() && !query.prewhere() && !query.final())
{
/// PREWHERE optimization: transfer some condition from WHERE to PREWHERE if enabled and viable
if (const auto * merge_tree = dynamic_cast<const MergeTreeData *>(storage.get()))
{
SelectQueryInfo current_info;
current_info.query = query_ptr;
current_info.syntax_analyzer_result = syntax_analyzer_result;
MergeTreeWhereOptimizer{current_info, *context, *merge_tree, syntax_analyzer_result->requiredSourceColumns(), log};
}
}
/// Save scalar sub queries's results in the query context
if (!options.only_analyze && context->hasQueryContext())
for (const auto & it : syntax_analyzer_result->getScalars())
@ -365,7 +382,6 @@ InterpreterSelectQuery::InterpreterSelectQuery(
source_header = storage->getSampleBlockForColumns(required_columns);
/// Fix source_header for filter actions.
auto row_policy_filter = context->getRowPolicyCondition(table_id.getDatabaseName(), table_id.getTableName(), RowPolicy::SELECT_FILTER);
if (row_policy_filter)
{
filter_info = std::make_shared<FilterInfo>();
@ -378,10 +394,10 @@ InterpreterSelectQuery::InterpreterSelectQuery(
throw Exception("PREWHERE is not supported if the table is filtered by row-level security expression", ErrorCodes::ILLEGAL_PREWHERE);
/// Calculate structure of the result.
result_header = getSampleBlockImpl(try_move_to_prewhere);
result_header = getSampleBlockImpl();
};
analyze();
analyze(settings.optimize_move_to_prewhere);
bool need_analyze_again = false;
if (analysis_result.prewhere_constant_filter_description.always_false || analysis_result.prewhere_constant_filter_description.always_true)
@ -481,40 +497,8 @@ QueryPipeline InterpreterSelectQuery::executeWithProcessors()
}
Block InterpreterSelectQuery::getSampleBlockImpl(bool try_move_to_prewhere)
Block InterpreterSelectQuery::getSampleBlockImpl()
{
auto & query = getSelectQuery();
const Settings & settings = context->getSettingsRef();
/// Do all AST changes here, because actions from analysis_result will be used later in readImpl.
if (storage)
{
query_analyzer->makeSetsForIndex(query.where());
query_analyzer->makeSetsForIndex(query.prewhere());
/// PREWHERE optimization.
/// Turn off, if the table filter (row-level security) is applied.
if (!context->getRowPolicyCondition(table_id.getDatabaseName(), table_id.getTableName(), RowPolicy::SELECT_FILTER))
{
auto optimize_prewhere = [&](auto & merge_tree)
{
SelectQueryInfo current_info;
current_info.query = query_ptr;
current_info.syntax_analyzer_result = syntax_analyzer_result;
current_info.sets = query_analyzer->getPreparedSets();
/// Try transferring some condition from WHERE to PREWHERE if enabled and viable
if (settings.optimize_move_to_prewhere && try_move_to_prewhere && query.where() && !query.prewhere() && !query.final())
MergeTreeWhereOptimizer{current_info, *context, merge_tree,
syntax_analyzer_result->requiredSourceColumns(), log};
};
if (const auto * merge_tree_data = dynamic_cast<const MergeTreeData *>(storage.get()))
optimize_prewhere(*merge_tree_data);
}
}
if (storage && !options.only_analyze)
from_stage = storage->getQueryProcessingStage(*context, options.to_stage, query_ptr);

View File

@ -106,7 +106,7 @@ private:
ASTSelectQuery & getSelectQuery() { return query_ptr->as<ASTSelectQuery &>(); }
Block getSampleBlockImpl(bool try_move_to_prewhere);
Block getSampleBlockImpl();
struct Pipeline
{

View File

@ -38,16 +38,18 @@ void MarkTableIdentifiersMatcher::visit(const ASTFunction & func, ASTPtr &, Data
if (functionIsInOrGlobalInOperator(func.name))
{
auto & ast = func.arguments->children.at(1);
if (auto opt_name = tryGetIdentifierName(ast))
if (!data.aliases.count(*opt_name))
setIdentifierSpecial(ast);
auto opt_name = tryGetIdentifierName(ast);
if (opt_name && !data.aliases.count(*opt_name))
setIdentifierSpecial(ast);
}
// first argument of joinGet can be a table identifier
if (func.name == "joinGet")
// First argument of joinGet can be a table name, perhaps with a database.
// First argument of dictGet can be a dictionary name, perhaps with a database.
if (functionIsJoinGet(func.name) || functionIsDictGet(func.name))
{
auto & ast = func.arguments->children.at(0);
if (auto opt_name = tryGetIdentifierName(ast))
auto opt_name = tryGetIdentifierName(ast);
if (opt_name && !data.aliases.count(*opt_name))
setIdentifierSpecial(ast);
}
}

View File

@ -767,8 +767,13 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect(
const auto & settings = context.getSettingsRef();
const NameSet & source_columns_set = result.source_columns_set;
result.analyzed_join = table_join;
if (!result.analyzed_join) /// ExpressionAnalyzer expects some not empty object here
if (table_join)
{
result.analyzed_join = table_join;
result.analyzed_join->resetCollected();
}
else /// TODO: remove. For now ExpressionAnalyzer expects some not empty object here
result.analyzed_join = std::make_shared<TableJoin>();
if (remove_duplicates)

View File

@ -97,7 +97,7 @@ SystemLogs::SystemLogs(Context & global_context, const Poco::Util::AbstractConfi
if (metric_log)
logs.emplace_back(metric_log.get());
bool lazy_load = config.getBool("system_tables_lazy_load", true);
bool lazy_load = config.getBool("system_tables_lazy_load", false);
try
{

View File

@ -29,6 +29,18 @@ TableJoin::TableJoin(const Settings & settings, VolumeJBODPtr tmp_volume_)
join_algorithm = JoinAlgorithm::PREFER_PARTIAL_MERGE;
}
void TableJoin::resetCollected()
{
key_names_left.clear();
key_names_right.clear();
key_asts_left.clear();
key_asts_right.clear();
columns_from_joined_table.clear();
columns_added_by_join.clear();
original_names.clear();
renames.clear();
}
void TableJoin::addUsingKey(const ASTPtr & ast)
{
key_names_left.push_back(ast->getColumnName());

View File

@ -112,6 +112,7 @@ public:
const String & temporaryFilesCodec() const { return temporary_files_codec; }
bool enablePartialMergeJoinOptimizations() const { return partial_merge_join_optimizations; }
void resetCollected();
void addUsingKey(const ASTPtr & ast);
void addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast);

View File

@ -1,5 +1,7 @@
#pragma once
#include <Common/StringUtils/StringUtils.h>
namespace DB
{
@ -18,4 +20,14 @@ inline bool functionIsLikeOperator(const std::string & name)
return name == "like" || name == "notLike";
}
inline bool functionIsJoinGet(const std::string & name)
{
return name == "joinGet" || startsWith(name, "dictGet");
}
inline bool functionIsDictGet(const std::string & name)
{
return startsWith(name, "dictGet") || (name == "dictHas") || (name == "dictIsIn");
}
}

View File

@ -319,7 +319,7 @@ protected:
/// checksums.txt and columns.txt. 0 - if not counted;
UInt64 bytes_on_disk{0};
/// Columns description. Cannot be changed, after part initialiation.
/// Columns description. Cannot be changed, after part initialization.
NamesAndTypesList columns;
const Type part_type;
@ -352,7 +352,7 @@ private:
/// For the older format version calculates rows count from the size of a column with a fixed size.
void loadRowsCount();
/// Loads ttl infos in json format from file ttl.txt. If file doesn`t exists assigns ttl infos with all zeros
/// Loads ttl infos in json format from file ttl.txt. If file doesn't exists assigns ttl infos with all zeros
void loadTTLInfos();
void loadPartitionAndMinMaxIndex();

View File

@ -136,7 +136,7 @@ protected:
size_t next_mark = 0;
size_t next_index_offset = 0;
/// Number of marsk in data from which skip indices have to start
/// Number of marks in data from which skip indices have to start
/// aggregation. I.e. it's data mark number, not skip indices mark.
size_t skip_index_data_mark = 0;

View File

@ -70,7 +70,7 @@ namespace ErrorCodes
/// [Column].mrk - marks, pointing to seek positions allowing to skip n * k rows.
///
/// File structure of tables with custom partitioning (format_version >= 1):
/// Part directory - / partiiton-id _ min-id _ max-id _ level /
/// Part directory - / partition-id _ min-id _ max-id _ level /
/// Inside the part directory:
/// The same files as for month-partitioned tables, plus
/// count.txt - contains total number of rows in this part.

View File

@ -160,7 +160,7 @@ private:
NamesAndTypesList storage_columns,
const MutationCommands & commands_for_removes);
/// Get skip indcies, that should exists in the resulting data part.
/// Get skip indices, that should exists in the resulting data part.
static MergeTreeIndices getIndicesForNewDataPart(
const MergeTreeIndices & all_indices,
const MutationCommands & commands_for_removes);

View File

@ -6,13 +6,13 @@ namespace DB
{
/** In compact format all columns are stored in one file (`data.bin`).
* Data is splitted in granules and columns are serialized sequentially in one granule.
* Data is split in granules and columns are serialized sequentially in one granule.
* Granules are written one by one in data file.
* Marks are also stored in single file (`data.mrk3`).
* In compact format one mark is an array of marks for every column and a number of rows in granule.
* Format of other data part files is not changed.
* It's considered to store only small parts in compact format (up to 10M).
* NOTE: Compact parts aren't supported for tables with non-adaptive granularty.
* NOTE: Compact parts aren't supported for tables with non-adaptive granularity.
* NOTE: In compact part compressed and uncompressed size of single column is unknown.
*/
class MergeTreeDataPartCompact : public IMergeTreeDataPart

View File

@ -28,7 +28,7 @@ struct MergeTreeSettings : public SettingsCollection<MergeTreeSettings>
#define LIST_OF_MERGE_TREE_SETTINGS(M) \
M(SettingUInt64, index_granularity, 8192, "How many rows correspond to one primary key value.", 0) \
\
/** Data storing format settigns. */ \
/** Data storing format settings. */ \
M(SettingUInt64, min_bytes_for_wide_part, 0, "Minimal uncompressed size in bytes to create part in wide format instead of compact", 0) \
M(SettingUInt64, min_rows_for_wide_part, 0, "Minimal number of rows to create part in wide format instead of compact", 0) \
\

View File

@ -331,18 +331,13 @@ void ReplicatedMergeTreeQueue::updateTimesInZooKeeper(
void ReplicatedMergeTreeQueue::removeProcessedEntry(zkutil::ZooKeeperPtr zookeeper, LogEntryPtr & entry)
{
auto code = zookeeper->tryRemove(replica_path + "/queue/" + entry->znode_name);
if (code)
LOG_ERROR(log, "Couldn't remove " << replica_path << "/queue/" << entry->znode_name << ": "
<< zkutil::ZooKeeper::error2string(code) << ". This shouldn't happen often.");
std::optional<time_t> min_unprocessed_insert_time_changed;
std::optional<time_t> max_processed_insert_time_changed;
bool found = false;
size_t queue_size = 0;
/// First remove from memory then from ZooKeeper
{
std::unique_lock lock(state_mutex);
@ -372,6 +367,11 @@ void ReplicatedMergeTreeQueue::removeProcessedEntry(zkutil::ZooKeeperPtr zookeep
notifySubscribers(queue_size);
auto code = zookeeper->tryRemove(replica_path + "/queue/" + entry->znode_name);
if (code)
LOG_ERROR(log, "Couldn't remove " << replica_path << "/queue/" << entry->znode_name << ": "
<< zkutil::ZooKeeper::error2string(code) << ". This shouldn't happen often.");
updateTimesInZooKeeper(zookeeper, min_unprocessed_insert_time_changed, max_processed_insert_time_changed);
}

View File

@ -3,6 +3,7 @@
#include <Common/escapeForFileName.h>
#include <Columns/ColumnString.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDate.h>
@ -17,41 +18,48 @@ namespace DB
StorageSystemParts::StorageSystemParts(const std::string & name_)
: StorageSystemPartsBase(name_,
{
{"partition", std::make_shared<DataTypeString>()},
{"name", std::make_shared<DataTypeString>()},
{"part_type", std::make_shared<DataTypeString>()},
{"active", std::make_shared<DataTypeUInt8>()},
{"marks", std::make_shared<DataTypeUInt64>()},
{"rows", std::make_shared<DataTypeUInt64>()},
{"bytes_on_disk", std::make_shared<DataTypeUInt64>()},
{"data_compressed_bytes", std::make_shared<DataTypeUInt64>()},
{"data_uncompressed_bytes", std::make_shared<DataTypeUInt64>()},
{"marks_bytes", std::make_shared<DataTypeUInt64>()},
{"modification_time", std::make_shared<DataTypeDateTime>()},
{"remove_time", std::make_shared<DataTypeDateTime>()},
{"refcount", std::make_shared<DataTypeUInt32>()},
{"min_date", std::make_shared<DataTypeDate>()},
{"max_date", std::make_shared<DataTypeDate>()},
{"min_time", std::make_shared<DataTypeDateTime>()},
{"max_time", std::make_shared<DataTypeDateTime>()},
{"partition_id", std::make_shared<DataTypeString>()},
{"min_block_number", std::make_shared<DataTypeInt64>()},
{"max_block_number", std::make_shared<DataTypeInt64>()},
{"level", std::make_shared<DataTypeUInt32>()},
{"data_version", std::make_shared<DataTypeUInt64>()},
{"primary_key_bytes_in_memory", std::make_shared<DataTypeUInt64>()},
{"primary_key_bytes_in_memory_allocated", std::make_shared<DataTypeUInt64>()},
{"is_frozen", std::make_shared<DataTypeUInt8>()},
{"partition", std::make_shared<DataTypeString>()},
{"name", std::make_shared<DataTypeString>()},
{"part_type", std::make_shared<DataTypeString>()},
{"active", std::make_shared<DataTypeUInt8>()},
{"marks", std::make_shared<DataTypeUInt64>()},
{"rows", std::make_shared<DataTypeUInt64>()},
{"bytes_on_disk", std::make_shared<DataTypeUInt64>()},
{"data_compressed_bytes", std::make_shared<DataTypeUInt64>()},
{"data_uncompressed_bytes", std::make_shared<DataTypeUInt64>()},
{"marks_bytes", std::make_shared<DataTypeUInt64>()},
{"modification_time", std::make_shared<DataTypeDateTime>()},
{"remove_time", std::make_shared<DataTypeDateTime>()},
{"refcount", std::make_shared<DataTypeUInt32>()},
{"min_date", std::make_shared<DataTypeDate>()},
{"max_date", std::make_shared<DataTypeDate>()},
{"min_time", std::make_shared<DataTypeDateTime>()},
{"max_time", std::make_shared<DataTypeDateTime>()},
{"partition_id", std::make_shared<DataTypeString>()},
{"min_block_number", std::make_shared<DataTypeInt64>()},
{"max_block_number", std::make_shared<DataTypeInt64>()},
{"level", std::make_shared<DataTypeUInt32>()},
{"data_version", std::make_shared<DataTypeUInt64>()},
{"primary_key_bytes_in_memory", std::make_shared<DataTypeUInt64>()},
{"primary_key_bytes_in_memory_allocated", std::make_shared<DataTypeUInt64>()},
{"is_frozen", std::make_shared<DataTypeUInt8>()},
{"database", std::make_shared<DataTypeString>()},
{"table", std::make_shared<DataTypeString>()},
{"engine", std::make_shared<DataTypeString>()},
{"disk_name", std::make_shared<DataTypeString>()},
{"path", std::make_shared<DataTypeString>()},
{"database", std::make_shared<DataTypeString>()},
{"table", std::make_shared<DataTypeString>()},
{"engine", std::make_shared<DataTypeString>()},
{"disk_name", std::make_shared<DataTypeString>()},
{"path", std::make_shared<DataTypeString>()},
{"hash_of_all_files", std::make_shared<DataTypeString>()},
{"hash_of_uncompressed_files", std::make_shared<DataTypeString>()},
{"uncompressed_hash_of_compressed_files", std::make_shared<DataTypeString>()}
{"hash_of_all_files", std::make_shared<DataTypeString>()},
{"hash_of_uncompressed_files", std::make_shared<DataTypeString>()},
{"uncompressed_hash_of_compressed_files", std::make_shared<DataTypeString>()},
{"delete_ttl_info_min", std::make_shared<DataTypeDateTime>()},
{"delete_ttl_info_max", std::make_shared<DataTypeDateTime>()},
{"move_ttl_info.expression", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
{"move_ttl_info.min", std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
{"move_ttl_info.max", std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
}
)
{
@ -128,6 +136,31 @@ void StorageSystemParts::processNextStorage(MutableColumns & columns_, const Sto
checksum = helper.uncompressed_hash_of_compressed_files;
columns_[i++]->insert(getHexUIntLowercase(checksum.first) + getHexUIntLowercase(checksum.second));
/// delete_ttl_info
{
columns_[i++]->insert(static_cast<UInt32>(part->ttl_infos.table_ttl.min));
columns_[i++]->insert(static_cast<UInt32>(part->ttl_infos.table_ttl.max));
}
/// move_ttl_info
{
Array expression_array;
Array min_array;
Array max_array;
expression_array.reserve(part->ttl_infos.moves_ttl.size());
min_array.reserve(part->ttl_infos.moves_ttl.size());
max_array.reserve(part->ttl_infos.moves_ttl.size());
for (const auto & [expression, move_ttl_info] : part->ttl_infos.moves_ttl)
{
expression_array.emplace_back(expression);
min_array.push_back(static_cast<UInt32>(move_ttl_info.min));
max_array.push_back(static_cast<UInt32>(move_ttl_info.max));
}
columns_[i++]->insert(expression_array);
columns_[i++]->insert(min_array);
columns_[i++]->insert(max_array);
}
}
}

View File

@ -234,6 +234,14 @@ def run_tests_array(all_tests_with_params):
clickhouse_proc = Popen(shlex.split(args.client), stdin=PIPE, stdout=PIPE, stderr=PIPE)
clickhouse_proc.communicate("SELECT 'Running test {suite}/{case} from pid={pid}';".format(pid = os.getpid(), case = case, suite = suite))
if not args.no_system_log_cleanup:
clickhouse_proc = Popen(shlex.split(args.client), stdin=PIPE, stdout=PIPE, stderr=PIPE)
clickhouse_proc.communicate("SYSTEM FLUSH LOGS")
for table in ['query_log', 'query_thread_log', 'trace_log', 'metric_log']:
clickhouse_proc = Popen(shlex.split(args.client), stdin=PIPE, stdout=PIPE, stderr=PIPE)
clickhouse_proc.communicate("TRUNCATE TABLE IF EXISTS system.{}".format(table))
reference_file = os.path.join(suite_dir, name) + '.reference'
stdout_file = os.path.join(suite_tmp_dir, name) + '.stdout'
stderr_file = os.path.join(suite_tmp_dir, name) + '.stderr'
@ -564,6 +572,7 @@ if __name__ == '__main__':
parser.add_argument('--stop', action='store_true', default=None, dest='stop', help='Stop on network errors')
parser.add_argument('--order', default='desc', choices=['asc', 'desc', 'random'], help='Run order')
parser.add_argument('--testname', action='store_true', default=None, dest='testname', help='Make query with test name before test run')
parser.add_argument('--no-system-log-cleanup', action='store_true', default=None, help='Do not cleanup system.*_log tables')
parser.add_argument('--hung-check', action='store_true', default=False)
parser.add_argument('--force-color', action='store_true', default=False)
parser.add_argument('--database', help='Database for tests (random name test_XXXXXX by default)')

View File

@ -1,6 +1,6 @@
<yandex>
<part_log>
<database>database_name</database>
<table>table_name</table>
<database>database_name</database> <!-- ignored -->
<table>own_part_log</table>
</part_log>
</yandex>

View File

@ -21,22 +21,21 @@ def test_config_without_part_log(start_cluster):
node1.query("CREATE TABLE test_table(word String, value UInt64) ENGINE=MergeTree() ORDER BY value")
assert "Table system.part_log doesn't exist" in node1.query_and_get_error("SELECT * FROM system.part_log")
node1.query("INSERT INTO test_table VALUES ('name', 1)")
time.sleep(10)
node1.query("SYSTEM FLUSH LOGS")
assert "Table system.part_log doesn't exist" in node1.query_and_get_error("SELECT * FROM system.part_log")
def test_config_with_standard_part_log(start_cluster):
assert "Table system.part_log doesn't exist" in node2.query_and_get_error("SELECT * FROM system.part_log")
assert node2.query("SELECT * FROM system.part_log") == ''
node2.query("CREATE TABLE test_table(word String, value UInt64) ENGINE=MergeTree() Order by value")
assert "Table system.part_log doesn't exist" in node2.query_and_get_error("SELECT * FROM system.part_log")
assert node2.query("SELECT * FROM system.part_log") == ''
node2.query("INSERT INTO test_table VALUES ('name', 1)")
time.sleep(10)
assert node2.query("SELECT * FROM system.part_log") != ""
node2.query("SYSTEM FLUSH LOGS")
assert int(node2.query("SELECT count() FROM system.part_log")) == 1
def test_config_with_non_standard_part_log(start_cluster):
assert "Table system.table_name doesn't exist" in node3.query_and_get_error("SELECT * FROM system.table_name")
assert node3.query("SELECT * FROM system.own_part_log") == ''
node3.query("CREATE TABLE test_table(word String, value UInt64) ENGINE=MergeTree() Order by value")
assert "Table system.table_name doesn't exist" in node3.query_and_get_error("SELECT * FROM system.table_name")
assert node3.query("SELECT * FROM system.own_part_log") == ''
node3.query("INSERT INTO test_table VALUES ('name', 1)")
time.sleep(10)
assert node3.query("SELECT * FROM system.table_name") != ""
node3.query("SYSTEM FLUSH LOGS")
assert int(node3.query("SELECT count() FROM system.own_part_log")) == 1

View File

@ -18,12 +18,12 @@
<secret_access_key>minio123</secret_access_key>
<proxy>
<!--
At each interaction with S3 resolver sends empty GET / request to specified endpoint to obtain proxy host.
At each interaction with S3 resolver sends empty GET request to specified endpoint URL to obtain proxy host.
Proxy host is returned as string in response body.
Then S3 client uses proxy URL formed as proxy_scheme://proxy_host:proxy_port to make request.
-->
<resolver>
<endpoint>http://resolver:8080</endpoint>
<endpoint>http://resolver:8080/hostname</endpoint>
<proxy_scheme>http</proxy_scheme>
<proxy_port>8888</proxy_port>
</resolver>

View File

@ -2,7 +2,7 @@ import bottle
import random
@bottle.route('/')
@bottle.route('/hostname')
def index():
if random.randrange(2) == 0:
return 'proxy1'

View File

@ -30,7 +30,7 @@ def test_sophisticated_default(started_cluster):
def test_partially_dropped_tables(started_cluster):
instance = started_cluster.instances['dummy']
assert instance.exec_in_container(['bash', '-c', 'find /var/lib/clickhouse -name *.sql* | sort'], privileged=True, user='root') \
assert instance.exec_in_container(['bash', '-c', 'find /var/lib/clickhouse/*/default -name *.sql* | sort'], privileged=True, user='root') \
== "/var/lib/clickhouse/metadata/default/should_be_restored.sql\n" \
"/var/lib/clickhouse/metadata/default/sophisticated_default.sql\n"
assert instance.query("SELECT n FROM should_be_restored") == "1\n2\n3\n"

View File

@ -0,0 +1,4 @@
<?xml version="1.0"?>
<yandex>
<system_tables_lazy_load>true</system_tables_lazy_load>
</yandex>

View File

@ -0,0 +1,32 @@
import pytest
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node_default = cluster.add_instance('node_default')
# main_configs is mandatory ,since system_tables_lazy_load will be read earlier then parsing of config_lazy.xml
node_lazy = cluster.add_instance('node_lazy', config_dir='configs', main_configs=['configs/config_lazy.xml'])
system_logs = [
# disabled by default
# ('system.part_log'),
# ('system.text_log'),
# enabled by default
('system.query_log'),
('system.query_thread_log'),
('system.trace_log'),
('system.metric_log'),
]
@pytest.fixture(scope='module')
def start_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
@pytest.mark.parametrize('table', system_logs)
def test_system_table(start_cluster, table):
node_default.query('SELECT * FROM {}'.format(table))
assert "Table {} doesn't exist".format(table) in node_lazy.query_and_get_error('SELECT * FROM {}'.format(table))

View File

@ -1,4 +0,0 @@
<?xml version="1.0"?>
<yandex>
<system_tables_lazy_load>false</system_tables_lazy_load>
</yandex>

View File

@ -1,24 +0,0 @@
import time
import pytest
import os
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', config_dir="configs")
@pytest.fixture(scope="module")
def start_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def test_system_tables_non_lazy_load(start_cluster):
assert node1.query_and_get_error("SELECT * FROM system.part_log") == ""
assert node1.query_and_get_error("SELECT * FROM system.query_log") == ""
assert node1.query_and_get_error("SELECT * FROM system.query_thread_log") == ""
assert node1.query_and_get_error("SELECT * FROM system.text_log") == ""
assert node1.query_and_get_error("SELECT * FROM system.trace_log") == ""
assert node1.query_and_get_error("SELECT * FROM system.metric_log") == ""

View File

@ -0,0 +1,6 @@
<test>
<create_query>CREATE TABLE point_in_polygon(`polygon` Array(Array(Float64, Float64))) ENGINE = Log()</create_query>
<create_query>insert into point_in_polygon SELECT arrayJoin(arrayMap(y -> [arrayMap(x -> (cos(x / 90. * pi()) * y, sin(x / 90. * pi()) * y), range(180))], arraySlice(range(35000), 2, 35000)))</create_query>
<query>SELECT pointInPolygon((100, 100), `polygon`) from point_in_polygon</query>
<drop_query>DROP TABLE IF EXISTS point_in_polygon</drop_query>
</test>

View File

@ -3,10 +3,10 @@
<query>SELECT count() FROM zeros(1000000) WHERE NOT ignore(randomStringUTF8(10))</query>
<query>SELECT count() FROM zeros(1000000) WHERE NOT ignore(randomStringUTF8(100))</query>
<query>SELECT count() FROM zeros(100000) WHERE NOT ignore(randomStringUTF8(1000))</query>
<query>SELECT count() FROM zeros(10000) WHERE NOT ignore(randomStringUTF8(10000))</query>
<query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(randomStringUTF8(rand() % 10))</query>
<query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(randomStringUTF8(rand() % 100))</query>
<query>SELECT count() FROM zeros(1000000) WHERE NOT ignore(randomStringUTF8(rand() % 1000))</query>
<query>SELECT count() FROM zeros(100000) WHERE NOT ignore(randomStringUTF8(100))</query>
<query>SELECT count() FROM zeros(10000) WHERE NOT ignore(randomStringUTF8(1000))</query>
<query>SELECT count() FROM zeros(1000) WHERE NOT ignore(randomStringUTF8(10000))</query>
<query>SELECT count() FROM zeros(1000000) WHERE NOT ignore(randomStringUTF8(rand() % 10))</query>
<query>SELECT count() FROM zeros(100000) WHERE NOT ignore(randomStringUTF8(rand() % 100))</query>
<query>SELECT count() FROM zeros(10000) WHERE NOT ignore(randomStringUTF8(rand() % 1000))</query>
</test>

19
tests/performance/sum.xml Normal file
View File

@ -0,0 +1,19 @@
<test>
<query>SELECT sum(number) FROM numbers(100000000)</query>
<query>SELECT sum(toUInt32(number)) FROM numbers(100000000)</query>
<query>SELECT sum(toUInt16(number)) FROM numbers(100000000)</query>
<query>SELECT sum(toUInt8(number)) FROM numbers(100000000)</query>
<query>SELECT sum(toFloat32(number)) FROM numbers(100000000)</query>
<query>SELECT sum(toFloat64(number)) FROM numbers(100000000)</query>
<query>SELECT sumKahan(toFloat32(number)) FROM numbers(100000000)</query>
<query>SELECT sumKahan(toFloat64(number)) FROM numbers(100000000)</query>
<query>SELECT sum(toNullable(number)) FROM numbers(100000000)</query>
<query>SELECT sum(toNullable(toUInt32(number))) FROM numbers(100000000)</query>
<query>SELECT sum(toNullable(toUInt16(number))) FROM numbers(100000000)</query>
<query>SELECT sum(toNullable(toUInt8(number))) FROM numbers(100000000)</query>
<query>SELECT sum(toNullable(toFloat32(number))) FROM numbers(100000000)</query>
<query>SELECT sum(toNullable(toFloat64(number))) FROM numbers(100000000)</query>
<query>SELECT sumKahan(toNullable(toFloat32(number))) FROM numbers(100000000)</query>
<query>SELECT sumKahan(toNullable(toFloat64(number))) FROM numbers(100000000)</query>
</test>

View File

@ -0,0 +1,68 @@
Const point; No holes
0
0
0
0
0
1
1
1
1
1
1
0
1
0
1
0
Non-const point; No holes
0
0
0
0
0
1
1
1
1
1
1
0
1
0
1
0
Const point; With holes
0
0
0
0
0
1
1
0
0
1
1
0
1
0
1
0
Non-const point; With holes
0
0
0
0
0
1
1
0
0
1
1
0
1
0
1
0

View File

@ -0,0 +1,86 @@
DROP TABLE IF EXISTS polygons;
SELECT 'Const point; No holes';
create table polygons ( id Int32, poly Array(Tuple(Int32, Int32))) engine = Log();
INSERT INTO polygons VALUES (1, [(0, 0), (10, 0), (10, 10), (0, 10)]);
INSERT INTO polygons VALUES (2, [(-5, -5), (5, -5), (5, 5), (-5, 5)]);
SELECT pointInPolygon((-10, 0), poly) FROM polygons ORDER BY id;
SELECT pointInPolygon((0, -10), poly) FROM polygons ORDER BY id;
SELECT pointInPolygon((-5, -5), poly) FROM polygons ORDER BY id;
SELECT pointInPolygon((0, 0), poly) FROM polygons ORDER BY id;
SELECT pointInPolygon((5, 5), poly) FROM polygons ORDER BY id;
SELECT pointInPolygon((10, 10), poly) FROM polygons ORDER BY id;
SELECT pointInPolygon((10, 5), poly) FROM polygons ORDER BY id;
SELECT pointInPolygon((5, 10), poly) FROM polygons ORDER BY id;
DROP TABLE polygons;
SELECT 'Non-const point; No holes';
create table polygons ( id Int32, pt Tuple(Int32, Int32), poly Array(Tuple(Int32, Int32))) engine = Log();
INSERT INTO polygons VALUES (1, (-10, 0), [(0, 0), (10, 0), (10, 10), (0, 10)]);
INSERT INTO polygons VALUES (2, (-10, 0), [(-5, -5), (5, -5), (5, 5), (-5, 5)]);
INSERT INTO polygons VALUES (3, (0, -10), [(0, 0), (10, 0), (10, 10), (0, 10)]);
INSERT INTO polygons VALUES (4, (0, -10), [(-5, -5), (5, -5), (5, 5), (-5, 5)]);
INSERT INTO polygons VALUES (5, (-5, -5), [(0, 0), (10, 0), (10, 10), (0, 10)]);
INSERT INTO polygons VALUES (6, (-5, -5), [(-5, -5), (5, -5), (5, 5), (-5, 5)]);
INSERT INTO polygons VALUES (7, (0, 0), [(0, 0), (10, 0), (10, 10), (0, 10)]);
INSERT INTO polygons VALUES (8, (0, 0), [(-5, -5), (5, -5), (5, 5), (-5, 5)]);
INSERT INTO polygons VALUES (9, (5, 5), [(0, 0), (10, 0), (10, 10), (0, 10)]);
INSERT INTO polygons VALUES (10, (5, 5), [(-5, -5), (5, -5), (5, 5), (-5, 5)]);
INSERT INTO polygons VALUES (11, (10, 10), [(0, 0), (10, 0), (10, 10), (0, 10)]);
INSERT INTO polygons VALUES (12, (10, 10), [(-5, -5), (5, -5), (5, 5), (-5, 5)]);
INSERT INTO polygons VALUES (13, (10, 5), [(0, 0), (10, 0), (10, 10), (0, 10)]);
INSERT INTO polygons VALUES (14, (10, 5), [(-5, -5), (5, -5), (5, 5), (-5, 5)]);
INSERT INTO polygons VALUES (15, (5, 10), [(0, 0), (10, 0), (10, 10), (0, 10)]);
INSERT INTO polygons VALUES (16, (5, 10), [(-5, -5), (5, -5), (5, 5), (-5, 5)]);
SELECT pointInPolygon(pt, poly) FROM polygons ORDER BY id;
DROP TABLE polygons;
SELECT 'Const point; With holes';
create table polygons ( id Int32, poly Array(Array(Tuple(Int32, Int32)))) engine = Log();
INSERT INTO polygons VALUES (1, [[(0, 0), (10, 0), (10, 10), (0, 10)], [(4, 4), (6, 4), (6, 6), (4, 6)]]);
INSERT INTO polygons VALUES (2, [[(-5, -5), (5, -5), (5, 5), (-5, 5)], [(-1, -1), (1, -1), (1, 1), (-1, 1)]]);
SELECT pointInPolygon((-10, 0), poly) FROM polygons ORDER BY id;
SELECT pointInPolygon((0, -10), poly) FROM polygons ORDER BY id;
SELECT pointInPolygon((-5, -5), poly) FROM polygons ORDER BY id;
SELECT pointInPolygon((0, 0), poly) FROM polygons ORDER BY id;
SELECT pointInPolygon((5, 5), poly) FROM polygons ORDER BY id;
SELECT pointInPolygon((10, 10), poly) FROM polygons ORDER BY id;
SELECT pointInPolygon((10, 5), poly) FROM polygons ORDER BY id;
SELECT pointInPolygon((5, 10), poly) FROM polygons ORDER BY id;
DROP TABLE polygons;
SELECT 'Non-const point; With holes';
create table polygons ( id Int32, pt Tuple(Int32, Int32), poly Array(Array(Tuple(Int32, Int32)))) engine = Log();
INSERT INTO polygons VALUES (1, (-10, 0), [[(0, 0), (10, 0), (10, 10), (0, 10)], [(4, 4), (6, 4), (6, 6), (4, 6)]]);
INSERT INTO polygons VALUES (2, (-10, 0), [[(-5, -5), (5, -5), (5, 5), (-5, 5)], [(-1, -1), (1, -1), (1, 1), (-1, 1)]]);
INSERT INTO polygons VALUES (3, (0, -10), [[(0, 0), (10, 0), (10, 10), (0, 10)], [(4, 4), (6, 4), (6, 6), (4, 6)]]);
INSERT INTO polygons VALUES (4, (0, -10), [[(-5, -5), (5, -5), (5, 5), (-5, 5)], [(-1, -1), (1, -1), (1, 1), (-1, 1)]]);
INSERT INTO polygons VALUES (5, (-5, -5), [[(0, 0), (10, 0), (10, 10), (0, 10)], [(4, 4), (6, 4), (6, 6), (4, 6)]]);
INSERT INTO polygons VALUES (6, (-5, -5), [[(-5, -5), (5, -5), (5, 5), (-5, 5)], [(-1, -1), (1, -1), (1, 1), (-1, 1)]]);
INSERT INTO polygons VALUES (7, (0, 0), [[(0, 0), (10, 0), (10, 10), (0, 10)], [(4, 4), (6, 4), (6, 6), (4, 6)]]);
INSERT INTO polygons VALUES (8, (0, 0), [[(-5, -5), (5, -5), (5, 5), (-5, 5)], [(-1, -1), (1, -1), (1, 1), (-1, 1)]]);
INSERT INTO polygons VALUES (9, (5, 5), [[(0, 0), (10, 0), (10, 10), (0, 10)], [(4, 4), (6, 4), (6, 6), (4, 6)]]);
INSERT INTO polygons VALUES (10, (5, 5), [[(-5, -5), (5, -5), (5, 5), (-5, 5)], [(-1, -1), (1, -1), (1, 1), (-1, 1)]]);
INSERT INTO polygons VALUES (11, (10, 10), [[(0, 0), (10, 0), (10, 10), (0, 10)], [(4, 4), (6, 4), (6, 6), (4, 6)]]);
INSERT INTO polygons VALUES (12, (10, 10), [[(-5, -5), (5, -5), (5, 5), (-5, 5)], [(-1, -1), (1, -1), (1, 1), (-1, 1)]]);
INSERT INTO polygons VALUES (13, (10, 5), [[(0, 0), (10, 0), (10, 10), (0, 10)], [(4, 4), (6, 4), (6, 6), (4, 6)]]);
INSERT INTO polygons VALUES (14, (10, 5), [[(-5, -5), (5, -5), (5, 5), (-5, 5)], [(-1, -1), (1, -1), (1, 1), (-1, 1)]]);
INSERT INTO polygons VALUES (15, (5, 10), [[(0, 0), (10, 0), (10, 10), (0, 10)], [(4, 4), (6, 4), (6, 6), (4, 6)]]);
INSERT INTO polygons VALUES (16, (5, 10), [[(-5, -5), (5, -5), (5, 5), (-5, 5)], [(-1, -1), (1, -1), (1, 1), (-1, 1)]]);
SELECT pointInPolygon(pt, poly) FROM polygons ORDER BY id;
DROP TABLE polygons;

View File

@ -47,7 +47,7 @@ SELECT
threads_realtime >= threads_time_user_system_io,
any(length(thread_ids)) >= 1
FROM
(SELECT * FROM system.query_log PREWHERE query='$heavy_cpu_query' WHERE event_date >= today()-1 AND type=2 ORDER BY event_time DESC LIMIT 1)
(SELECT * FROM system.query_log PREWHERE query='$heavy_cpu_query' WHERE type='QueryFinish' ORDER BY event_time DESC LIMIT 1)
ARRAY JOIN ProfileEvents.Names AS PN, ProfileEvents.Values AS PV"
# Check per-thread and per-query ProfileEvents consistency
@ -58,7 +58,7 @@ SELECT PN, PVq, PVt FROM
SELECT PN, sum(PV) AS PVt
FROM system.query_thread_log
ARRAY JOIN ProfileEvents.Names AS PN, ProfileEvents.Values AS PV
WHERE event_date >= today()-1 AND query_id='$query_id'
WHERE query_id='$query_id'
GROUP BY PN
) js1
ANY INNER JOIN
@ -66,7 +66,7 @@ ANY INNER JOIN
SELECT PN, PV AS PVq
FROM system.query_log
ARRAY JOIN ProfileEvents.Names AS PN, ProfileEvents.Values AS PV
WHERE event_date >= today()-1 AND query_id='$query_id'
WHERE query_id='$query_id'
) js2
USING PN
WHERE

View File

@ -19,7 +19,7 @@ $CLICKHOUSE_CLIENT --use_uncompressed_cache=1 --query_id="test-query-uncompresse
sleep 1
$CLICKHOUSE_CLIENT --query="SYSTEM FLUSH LOGS"
$CLICKHOUSE_CLIENT --query="SELECT ProfileEvents.Values[indexOf(ProfileEvents.Names, 'Seek')], ProfileEvents.Values[indexOf(ProfileEvents.Names, 'ReadCompressedBytes')], ProfileEvents.Values[indexOf(ProfileEvents.Names, 'UncompressedCacheHits')] AS hit FROM system.query_log WHERE (query_id = 'test-query-uncompressed-cache') AND (type = 2) AND event_date >= yesterday() ORDER BY event_time DESC LIMIT 1"
$CLICKHOUSE_CLIENT --query="SELECT ProfileEvents.Values[indexOf(ProfileEvents.Names, 'Seek')], ProfileEvents.Values[indexOf(ProfileEvents.Names, 'ReadCompressedBytes')], ProfileEvents.Values[indexOf(ProfileEvents.Names, 'UncompressedCacheHits')] AS hit FROM system.query_log WHERE (query_id = 'test-query-uncompressed-cache') AND (type = 'QueryFinish') ORDER BY event_time DESC LIMIT 1"
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS small_table"

View File

@ -95,7 +95,7 @@ echo 7
# and finally querylog
$CLICKHOUSE_CLIENT \
--server_logs_file=/dev/null \
--query="select * from system.query_log where event_time>now() - 10 and query like '%TOPSECRET%';"
--query="select * from system.query_log where query like '%TOPSECRET%';"
rm -f $tmp_file >/dev/null 2>&1
@ -117,8 +117,8 @@ sleep 0.1;
echo 9
$CLICKHOUSE_CLIENT \
--server_logs_file=/dev/null \
--query="SELECT if( count() > 0, 'text_log non empty', 'text_log empty') FROM system.text_log WHERE event_time>now() - 60 and message like '%find_me%';
select * from system.text_log where event_time>now() - 60 and message like '%TOPSECRET=TOPSECRET%';" --ignore-error --multiquery
--query="SELECT if( count() > 0, 'text_log non empty', 'text_log empty') FROM system.text_log WHERE message like '%find_me%';
select * from system.text_log where message like '%TOPSECRET=TOPSECRET%';" --ignore-error --multiquery
echo 'finish'
rm -f $tmp_file >/dev/null 2>&1

View File

@ -5,7 +5,7 @@ SET log_queries = 1;
SELECT sleep(0.5), ignore('test real time query profiler');
SET log_queries = 0;
SYSTEM FLUSH LOGS;
WITH addressToSymbol(arrayJoin(trace)) AS symbol SELECT count() > 0 FROM system.trace_log t WHERE event_date >= yesterday() AND query_id = (SELECT query_id FROM system.query_log WHERE event_date >= yesterday() AND query LIKE '%test real time query profiler%' AND query NOT LIKE '%system%' ORDER BY event_time DESC LIMIT 1) AND symbol LIKE '%FunctionSleep%';
WITH addressToSymbol(arrayJoin(trace)) AS symbol SELECT count() > 0 FROM system.trace_log t WHERE query_id = (SELECT query_id FROM system.query_log WHERE query LIKE '%test real time query profiler%' AND query NOT LIKE '%system%' ORDER BY event_time DESC LIMIT 1) AND symbol LIKE '%FunctionSleep%';
SET query_profiler_real_time_period_ns = 0;
SET query_profiler_cpu_time_period_ns = 1000000;
@ -13,4 +13,4 @@ SET log_queries = 1;
SELECT count(), ignore('test cpu time query profiler') FROM numbers(1000000000);
SET log_queries = 0;
SYSTEM FLUSH LOGS;
WITH addressToSymbol(arrayJoin(trace)) AS symbol SELECT count() > 0 FROM system.trace_log t WHERE event_date >= yesterday() AND query_id = (SELECT query_id FROM system.query_log WHERE event_date >= yesterday() AND query LIKE '%test cpu time query profiler%' AND query NOT LIKE '%system%' ORDER BY event_time DESC LIMIT 1) AND symbol LIKE '%Source%';
WITH addressToSymbol(arrayJoin(trace)) AS symbol SELECT count() > 0 FROM system.trace_log t WHERE query_id = (SELECT query_id FROM system.query_log WHERE query LIKE '%test cpu time query profiler%' AND query NOT LIKE '%system%' ORDER BY event_time DESC LIMIT 1) AND symbol LIKE '%Source%';

View File

@ -10,7 +10,7 @@ do
${CLICKHOUSE_CLIENT} --query="SYSTEM FLUSH LOGS"
sleep 0.1;
if [[ $($CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "SELECT count() > 0 FROM system.text_log WHERE position(system.text_log.message, 'SELECT 6103') > 0 AND event_date >= yesterday()") == 1 ]]; then echo 1; exit; fi;
if [[ $($CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "SELECT count() > 0 FROM system.text_log WHERE position(system.text_log.message, 'SELECT 6103') > 0") == 1 ]]; then echo 1; exit; fi;
done;

View File

@ -17,3 +17,8 @@ database_for_dict dict1 ComplexKeyCache
database_for_dict dict2 Hashed
6
6
6
6
6
database_for_dict.dict3 6
6

View File

@ -105,6 +105,19 @@ LAYOUT(HASHED());
SELECT dictGetString('database_for_dict.dict3', 'some_column', toUInt64(12));
-- dictGet with table name
USE database_for_dict;
SELECT dictGetString(dict3, 'some_column', toUInt64(12));
SELECT dictGetString(database_for_dict.dict3, 'some_column', toUInt64(12));
SELECT dictGetString(default.dict3, 'some_column', toUInt64(12)); -- {serverError 36}
SELECT dictGet(dict3, 'some_column', toUInt64(12));
SELECT dictGet(database_for_dict.dict3, 'some_column', toUInt64(12));
SELECT dictGet(default.dict3, 'some_column', toUInt64(12)); -- {serverError 36}
USE default;
-- alias should be handled correctly
SELECT 'database_for_dict.dict3' as n, dictGet(n, 'some_column', toUInt64(12));
DROP TABLE database_for_dict.table_for_dict;
SYSTEM RELOAD DICTIONARIES; -- {serverError 60}

View File

@ -3,5 +3,5 @@ SELECT * FROM test_table_for_01070_exception_code_in_query_log_table; -- { serve
CREATE TABLE test_table_for_01070_exception_code_in_query_log_table (value UInt64) ENGINE=Memory();
SELECT * FROM test_table_for_01070_exception_code_in_query_log_table;
SYSTEM FLUSH LOGS;
SELECT exception_code FROM system.query_log WHERE query = 'SELECT * FROM test_table_for_01070_exception_code_in_query_log_table' AND event_date >= yesterday() AND event_time > now() - INTERVAL 5 MINUTE ORDER BY exception_code;
SELECT exception_code FROM system.query_log WHERE query = 'SELECT * FROM test_table_for_01070_exception_code_in_query_log_table' ORDER BY exception_code;
DROP TABLE IF EXISTS test_table_for_01070_exception_code_in_query_log_table;

View File

@ -8,13 +8,13 @@ WITH
(
SELECT query_id
FROM system.query_log
WHERE (query = 'SELECT 1') AND (event_date >= (today() - 1))
WHERE (query = 'SELECT 1')
ORDER BY event_time DESC
LIMIT 1
) AS id
SELECT uniqExact(thread_id)
FROM system.query_thread_log
WHERE (event_date >= (today() - 1)) AND (query_id = id) AND (thread_id != master_thread_id);
WHERE (query_id = id) AND (thread_id != master_thread_id);
select sum(number) from numbers(1000000);
SYSTEM FLUSH LOGS;
@ -23,13 +23,13 @@ WITH
(
SELECT query_id
FROM system.query_log
WHERE (query = 'SELECT sum(number) FROM numbers(1000000)') AND (event_date >= (today() - 1))
WHERE (query = 'SELECT sum(number) FROM numbers(1000000)')
ORDER BY event_time DESC
LIMIT 1
) AS id
SELECT uniqExact(thread_id)
FROM system.query_thread_log
WHERE (event_date >= (today() - 1)) AND (query_id = id) AND (thread_id != master_thread_id);
WHERE (query_id = id) AND (thread_id != master_thread_id);
select sum(number) from numbers_mt(1000000);
SYSTEM FLUSH LOGS;
@ -38,10 +38,10 @@ WITH
(
SELECT query_id
FROM system.query_log
WHERE (query = 'SELECT sum(number) FROM numbers_mt(1000000)') AND (event_date >= (today() - 1))
WHERE (query = 'SELECT sum(number) FROM numbers_mt(1000000)')
ORDER BY event_time DESC
LIMIT 1
) AS id
SELECT uniqExact(thread_id) > 2
FROM system.query_thread_log
WHERE (event_date >= (today() - 1)) AND (query_id = id) AND (thread_id != master_thread_id);
WHERE (query_id = id) AND (thread_id != master_thread_id);

View File

@ -3,4 +3,4 @@ SET allow_introspection_functions = 1;
SET memory_profiler_step = 1000000;
SELECT ignore(groupArray(number), 'test memory profiler') FROM numbers(10000000);
SYSTEM FLUSH LOGS;
WITH addressToSymbol(arrayJoin(trace)) AS symbol SELECT count() > 0 FROM system.trace_log t WHERE event_date >= yesterday() AND trace_type = 'Memory' AND query_id = (SELECT query_id FROM system.query_log WHERE event_date >= yesterday() AND query LIKE '%test memory profiler%' ORDER BY event_time DESC LIMIT 1);
WITH addressToSymbol(arrayJoin(trace)) AS symbol SELECT count() > 0 FROM system.trace_log t WHERE trace_type = 'Memory' AND query_id = (SELECT query_id FROM system.query_log WHERE query LIKE '%test memory profiler%' ORDER BY event_time DESC LIMIT 1);

View File

@ -3,4 +3,4 @@
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. $CURDIR/../shell_config.sh
$CLICKHOUSE_CLIENT --quota_key Hello --query_id test_quota_key --log_queries 1 --multiquery --query "SELECT 1; SYSTEM FLUSH LOGS; SELECT DISTINCT quota_key FROM system.query_log WHERE event_date >= yesterday() AND event_time >= now() - 300 AND query_id = 'test_quota_key'"
$CLICKHOUSE_CLIENT --quota_key Hello --query_id test_quota_key --log_queries 1 --multiquery --query "SELECT 1; SYSTEM FLUSH LOGS; SELECT DISTINCT quota_key FROM system.query_log WHERE query_id = 'test_quota_key'"

View File

@ -2,14 +2,14 @@ set log_queries=1;
select '01231_log_queries_min_type/QUERY_START';
system flush logs;
select count() from system.query_log where query like '%01231_log_queries_min_type/%' and query not like '%system.query_log%' and event_date = today() and event_time >= now() - interval 1 minute;
select count() from system.query_log where query like '%01231_log_queries_min_type/%' and query not like '%system.query_log%';
set log_queries_min_type='EXCEPTION_BEFORE_START';
select '01231_log_queries_min_type/EXCEPTION_BEFORE_START';
system flush logs;
select count() from system.query_log where query like '%01231_log_queries_min_type/%' and query not like '%system.query_log%' and event_date = today() and event_time >= now() - interval 1 minute;
select count() from system.query_log where query like '%01231_log_queries_min_type/%' and query not like '%system.query_log%';
set log_queries_min_type='EXCEPTION_WHILE_PROCESSING';
select '01231_log_queries_min_type/', max(number) from system.numbers limit 1e6 settings max_rows_to_read='100K'; -- { serverError 158; }
system flush logs;
select count() from system.query_log where query like '%01231_log_queries_min_type/%' and query not like '%system.query_log%' and event_date = today() and event_time >= now() - interval 1 minute;
select count() from system.query_log where query like '%01231_log_queries_min_type/%' and query not like '%system.query_log%';

View File

@ -0,0 +1,92 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. $CURDIR/../shell_config.sh
# just in case
set -o pipefail
function execute()
{
${CLICKHOUSE_CLIENT} -n "$@"
}
#
# TEST SETTINGS
#
TEST_01278_PARTS=9
TEST_01278_MEMORY=$((100<<20))
function cleanup()
{
for i in $(seq 1 $TEST_01278_PARTS); do
echo "drop table if exists part_01278_$i;"
echo "drop table if exists mv_01278_$i;"
done | execute
echo 'drop table if exists data_01278;' | execute
echo 'drop table if exists out_01278;' | execute
echo 'drop table if exists null_01278;' | execute
}
cleanup
trap cleanup EXIT
#
# CREATE
#
{
cat <<EOL
create table data_01278 (
key UInt64,
// create bunch of fields to increase memory usage for the query
s1 Nullable(String),
s2 Nullable(String),
s3 Nullable(String),
s4 Nullable(String),
s5 Nullable(String),
s6 Nullable(String),
s7 Nullable(String),
s8 Nullable(String)
) Engine=Null()
EOL
} | execute
echo "create table null_01278 as data_01278 Engine=Null();" | execute
for i in $(seq 1 $TEST_01278_PARTS); do
echo "create table part_01278_$i as data_01278 Engine=Buffer(currentDatabase(), null_01278, 1, 86400, 86400, 1e5, 1e6, 10e6, 100e6);"
echo "create materialized view mv_01278_$i to part_01278_$i as select * from data_01278 where key%$TEST_01278_PARTS+1 == $i;"
done | execute
echo "create table out_01278 as data_01278 Engine=Merge(currentDatabase(), 'part_01278_');" | execute
#
# INSERT
#
function execute_insert()
{
{
cat <<EOL
insert into data_01278 select
number,
reinterpretAsString(number), // s1
reinterpretAsString(number), // s2
reinterpretAsString(number), // s3
reinterpretAsString(number), // s4
reinterpretAsString(number), // s5
reinterpretAsString(number), // s6
reinterpretAsString(number), // s7
reinterpretAsString(number) // s8
from numbers(100000); -- { serverError 241; }
EOL
} | {
execute --max_memory_usage=$TEST_01278_MEMORY "$@"
}
echo 'select count() from out_01278' | execute
}
# fails
execute_insert --testmode
execute_insert --testmode --min_insert_block_size_rows=1 --min_insert_block_size_rows_for_materialized_views=$((1<<20))
# passes
execute_insert --min_insert_block_size_rows=1
execute_insert --min_insert_block_size_rows_for_materialized_views=1

View File

@ -0,0 +1,8 @@
1
2
3
4
5
6
7
8

View File

@ -0,0 +1,21 @@
drop table if exists t;
create table t (x UInt8, id UInt8) ENGINE = MergeTree() order by (id);
insert into t values (1, 1);
set enable_optimize_predicate_expression = 0;
select 1 from t as l join t as r on l.id = r.id prewhere l.x;
select 2 from t as l join t as r on l.id = r.id where r.x;
select 3 from t as l join t as r on l.id = r.id prewhere l.x where r.x;
select 4 from t as l join t as r using id prewhere l.x where r.x;
select 5 from t as l join t as r on l.id = r.id where l.x and r.x;
select 6 from t as l join t as r using id where l.x and r.x;
set optimize_move_to_prewhere = 0;
select 7 from t as l join t as r on l.id = r.id where l.x and r.x;
select 8 from t as l join t as r using id where l.x and r.x;
drop table t;

View File

@ -0,0 +1,6 @@
45
45
45
1
45
\N

View File

@ -0,0 +1,6 @@
SELECT sumKahan(toFloat64(number)) FROM numbers(10);
SELECT sumKahan(toNullable(toFloat64(number))) FROM numbers(10);
SELECT sum(toNullable(number)) FROM numbers(10);
SELECT sum(x) FROM (SELECT 1 AS x UNION ALL SELECT NULL);
SELECT sum(number) FROM numbers(10);
SELECT sum(number < 1000 ? NULL : number) FROM numbers(10);

View File

@ -1,3 +1,7 @@
if (USE_CLANG_TIDY)
set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")
endif ()
if(MAKE_STATIC_LIBRARIES)
set(MAX_LINKER_MEMORY 3500)
else()

View File

@ -30,7 +30,7 @@ void run(String part_path, String date_column, String dest_path)
{
std::shared_ptr<IDisk> disk = std::make_shared<DiskLocal>("local", "/", 0);
auto old_part_path = Poco::Path::forDirectory(part_path);
String old_part_name = old_part_path.directory(old_part_path.depth() - 1);
const String & old_part_name = old_part_path.directory(old_part_path.depth() - 1);
String old_part_path_str = old_part_path.toString();
auto part_info = MergeTreePartInfo::fromPartName(old_part_name, MergeTreeDataFormatVersion(0));

View File

@ -59,9 +59,9 @@ void thread(int fd, int mode, size_t min_offset, size_t max_offset, size_t block
for (size_t i = 0; i < count; ++i)
{
long rand_result1 = rng();
long rand_result2 = rng();
long rand_result3 = rng();
uint64_t rand_result1 = rng();
uint64_t rand_result2 = rng();
uint64_t rand_result3 = rng();
size_t rand_result = rand_result1 ^ (rand_result2 << 22) ^ (rand_result3 << 43);
size_t offset;
@ -152,7 +152,7 @@ int mainImpl(int argc, char ** argv)
Stopwatch watch;
for (size_t i = 0; i < threads; ++i)
pool.scheduleOrThrowOnError(std::bind(thread, fd, mode, min_offset, max_offset, block_size, count));
pool.scheduleOrThrowOnError([=]{ thread(fd, mode, min_offset, max_offset, block_size, count); });
pool.wait();
fsync(fd);

View File

@ -13,6 +13,8 @@ int main(int, char **) { return 0; }
#include <Common/Exception.h>
#include <Common/ThreadPool.h>
#include <Common/Stopwatch.h>
#include <Common/randomSeed.h>
#include <pcg_random.hpp>
#include <IO/BufferWithOwnMemory.h>
#include <IO/ReadHelpers.h>
#include <stdio.h>
@ -52,10 +54,7 @@ void thread(int fd, int mode, size_t min_offset, size_t max_offset, size_t block
for (size_t i = 0; i < buffers_count; ++i)
buffers[i] = Memory<>(block_size, sysconf(_SC_PAGESIZE));
drand48_data rand_data;
timespec times;
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &times);
srand48_r(times.tv_nsec, &rand_data);
pcg64_fast rng(randomSeed());
size_t in_progress = 0;
size_t blocks_sent = 0;
@ -82,12 +81,9 @@ void thread(int fd, int mode, size_t min_offset, size_t max_offset, size_t block
char * buf = buffers[i].data();
long rand_result1 = 0;
long rand_result2 = 0;
long rand_result3 = 0;
lrand48_r(&rand_data, &rand_result1);
lrand48_r(&rand_data, &rand_result2);
lrand48_r(&rand_data, &rand_result3);
uint64_t rand_result1 = rng();
uint64_t rand_result2 = rng();
uint64_t rand_result3 = rng();
size_t rand_result = rand_result1 ^ (rand_result2 << 22) ^ (rand_result3 << 43);
size_t offset = min_offset + rand_result % ((max_offset - min_offset) / block_size) * block_size;
@ -172,7 +168,7 @@ int mainImpl(int argc, char ** argv)
Stopwatch watch;
for (size_t i = 0; i < threads_count; ++i)
pool.scheduleOrThrowOnError(std::bind(thread, fd, mode, min_offset, max_offset, block_size, buffers_count, count));
pool.scheduleOrThrowOnError([=]{ thread(fd, mode, min_offset, max_offset, block_size, buffers_count, count); });
pool.wait();
watch.stop();

View File

@ -113,9 +113,9 @@ int mainImpl(int argc, char ** argv)
polls[i].revents = 0;
++ops;
long rand_result1 = rng();
long rand_result2 = rng();
long rand_result3 = rng();
uint64_t rand_result1 = rng();
uint64_t rand_result2 = rng();
uint64_t rand_result3 = rng();
size_t rand_result = rand_result1 ^ (rand_result2 << 22) ^ (rand_result3 << 43);
size_t offset;

View File

@ -1,3 +1,6 @@
# Disable clang-tidy for protobuf generated files
set (CMAKE_CXX_CLANG_TIDY "")
add_compile_options(-Wno-zero-as-null-pointer-constant -Wno-array-bounds) # Protobuf generated files
if (USE_PROTOBUF)

View File

@ -102,7 +102,7 @@ std::unordered_map<std::string, Int64> getPartitionsNeedAdjustingBlockNumbers(
std::cout << "Shard: " << shard << std::endl;
std::vector<std::string> use_tables = tables.empty() ? getAllTables(zk, root, shard) : removeNotExistingTables(zk, root, shard, tables);
for (auto table : use_tables)
for (const auto & table : use_tables)
{
std::cout << "\tTable: " << table << std::endl;
std::string table_path = root + "/" + shard + "/" + table;
@ -121,7 +121,7 @@ std::unordered_map<std::string, Int64> getPartitionsNeedAdjustingBlockNumbers(
continue;
}
for (auto partition : partitions)
for (const auto & partition : partitions)
{
try
{
@ -199,7 +199,7 @@ void setCurrentBlockNumber(zkutil::ZooKeeper & zk, const std::string & path, Int
create_ephemeral_nodes(1); /// Firstly try to create just a single node.
/// Create other nodes in batches of 50 nodes.
while (current_block_number + 50 <= new_current_block_number)
while (current_block_number + 50 <= new_current_block_number) // NOLINT: clang-tidy thinks that the loop is infinite
create_ephemeral_nodes(50);
create_ephemeral_nodes(new_current_block_number - current_block_number);

View File

@ -97,10 +97,8 @@ int main(int argc, char ** argv)
bool watch = w == "w";
zkutil::EventPtr event = watch ? std::make_shared<Poco::Event>() : nullptr;
std::vector<std::string> v = zk.getChildren(path, nullptr, event);
for (size_t i = 0; i < v.size(); ++i)
{
std::cout << v[i] << std::endl;
}
for (const auto & child : v)
std::cout << child << std::endl;
if (watch)
waitForWatch(event);
}
@ -193,7 +191,7 @@ int main(int argc, char ** argv)
zk.set(path, data, version, &stat);
printStat(stat);
}
else if (cmd != "")
else if (!cmd.empty())
{
std::cout << "commands:\n";
std::cout << " q\n";

Some files were not shown because too many files have changed in this diff Show More